From 73dbd0d0cc4ea52ac07fa9cd6a0c310a05f5f136 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 27 Jun 2023 11:39:07 -0400
Subject: [PATCH 001/257] add a field to map_router_lookahead to store the cost
 of crossing layers for the same block types

---
 .../route/router_lookahead_extended_map.cpp   |  8 ++-
 vpr/src/route/router_lookahead_extended_map.h |  3 +
 vpr/src/route/router_lookahead_map.cpp        |  7 +-
 vpr/src/route/router_lookahead_map.h          |  2 +
 vpr/src/route/router_lookahead_map_utils.cpp  | 66 ++++++++++++++-----
 vpr/src/route/router_lookahead_map_utils.h    |  6 +-
 6 files changed, 70 insertions(+), 22 deletions(-)

diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp
index 375b1127177..90ec255a7ca 100644
--- a/vpr/src/route/router_lookahead_extended_map.cpp
+++ b/vpr/src/route/router_lookahead_extended_map.cpp
@@ -419,7 +419,9 @@ std::pair<float, int> ExtendedMapLookahead::run_dijkstra(RRNodeId start_node,
 
 // compute the cost maps for lookahead
 void ExtendedMapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
-    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
+        util::compute_router_src_opin_lookahead(is_flat_);
+
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 
     vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map");
@@ -614,7 +616,9 @@ void ExtendedMapLookahead::write(const std::string& file) const {
 void ExtendedMapLookahead::read(const std::string& file) {
     cost_map_.read(file);
 
-    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
+        util::compute_router_src_opin_lookahead(is_flat_);
+
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 }
 void ExtendedMapLookahead::write(const std::string& file) const {
diff --git a/vpr/src/route/router_lookahead_extended_map.h b/vpr/src/route/router_lookahead_extended_map.h
index 424a1dfa23d..ccd3faaa2ad 100644
--- a/vpr/src/route/router_lookahead_extended_map.h
+++ b/vpr/src/route/router_lookahead_extended_map.h
@@ -19,6 +19,9 @@ class ExtendedMapLookahead : public RouterLookahead {
     ///<Look-up table from SOURCE/OPIN to CHANX/CHANY of various types
     util::t_src_opin_delays src_opin_delays;
 
+    ///< Lookup table from SOURCE/OPIN to CHANX/CHANY of the another layer
+    util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
+
     ///<Look-up table from CHANX/CHANY to SINK/IPIN of various types
     util::t_chan_ipins_delays chan_ipins_delays;
 
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index bc9e57a10b1..4c10e9cc706 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -539,7 +539,9 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
+        util::compute_router_src_opin_lookahead(is_flat_);
+
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -562,7 +564,8 @@ void MapLookahead::read(const std::string& file) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
+        util::compute_router_src_opin_lookahead(is_flat_);
 }
 
 void MapLookahead::read_intra_cluster(const std::string& file) {
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index 00dc5bf62ad..8e1f4e7ee5d 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -21,6 +21,8 @@ class MapLookahead : public RouterLookahead {
     std::unordered_map<t_physical_tile_type_ptr, std::unordered_map<int, util::Cost_Entry>> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost
     // Lookup table to store the minimum cost for each dx and dy
     vtr::NdMatrix<util::Cost_Entry, 3> distance_based_min_cost; // [layer_num][dx][dy] -> cost
+    // [tile_index][from_layer_num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
+    util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
     const t_det_routing_arch& det_routing_arch_;
     bool is_flat_;
 
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 5ec27a15cc8..100f56d7644 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -19,7 +19,11 @@
 #include "route_common.h"
 #include "route_timing.h"
 
-static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays);
+static void dijkstra_flood_to_wires(int itile, RRNodeId inode,
+                                    util::t_src_opin_delays& src_opin_delays,
+                                    util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays,
+                                    bool is_multi_layer);
+
 static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays);
 
 static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev);
@@ -305,21 +309,33 @@ template void expand_dijkstra_neighbours(const RRGraphView& rr_graph,
                                                              std::vector<PQ_Entry_Base_Cost>,
                                                              std::greater<PQ_Entry_Base_Cost>>* pq);
 
-t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) {
+std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead");
     auto& device_ctx = g_vpr_ctx.device();
     auto& rr_graph = device_ctx.rr_graph;
 
-    t_src_opin_delays src_opin_delays;
+    int num_layers = device_ctx.grid.get_num_layers();
+    bool is_multi_layer = (num_layers > 1);
 
-    src_opin_delays.resize(device_ctx.grid.get_num_layers());
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+    t_src_opin_delays src_opin_delays;
+    src_opin_delays.resize(num_layers);
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size());
     }
 
+    t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
+    if(is_multi_layer) {
+        src_opin_inter_layer_delays.resize(num_layers);
+        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+            int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
+            src_opin_inter_layer_delays[layer_num].resize(num_physical_tiles);
+        }
+    }
+
+
     //We assume that the routing connectivity of each instance of a physical tile is the same,
     //and so only measure one instance of each type
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) {
             if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], layer_num) == 0) {
                 continue;
@@ -356,11 +372,22 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) {
 
                         if (ptc >= int(src_opin_delays[layer_num][itile].size())) {
                             src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional...
+                            if(is_multi_layer) {
+                                size_t old_size = src_opin_inter_layer_delays[layer_num][itile].size();
+                                src_opin_inter_layer_delays[layer_num][itile].resize(ptc + 1);
+                                for (size_t i = old_size; i < src_opin_inter_layer_delays[layer_num][itile].size(); ++i) {
+                                    src_opin_inter_layer_delays[layer_num][itile][i].resize(num_layers);
+                                }
+                            }
                         }
 
                         //Find the wire types which are reachable from inode and record them and
                         //the cost to reach them
-                        dijkstra_flood_to_wires(itile, node_id, src_opin_delays);
+                        dijkstra_flood_to_wires(itile,
+                                                node_id,
+                                                src_opin_delays,
+                                                src_opin_inter_layer_delays,
+                                                is_multi_layer);
 
                         if (src_opin_delays[layer_num][itile][ptc].empty()) {
                             VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n",
@@ -383,7 +410,7 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) {
         }
     }
 
-    return src_opin_delays;
+    return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
@@ -466,7 +493,10 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g
 
 } // namespace util
 
-static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_delays& src_opin_delays) {
+static void dijkstra_flood_to_wires(int itile,
+                                    RRNodeId node, util::t_src_opin_delays& src_opin_delays,
+                                    util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays,
+                                    bool is_multi_layer) {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
 
@@ -516,6 +546,7 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
         pq.pop();
 
         e_rr_type curr_rr_type = rr_graph.node_type(curr.node);
+        int curr_layer_num = rr_graph.node_layer(curr.node);
         if (curr_rr_type == CHANX || curr_rr_type == CHANY || curr_rr_type == SINK) {
             //We stop expansion at any CHANX/CHANY/SINK
             int seg_index;
@@ -535,12 +566,20 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
             }
 
             //Keep costs of the best path to reach each wire type
-            if (!src_opin_delays[node_layer_num][itile][ptc].count(seg_index)
-                || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay) {
+            if ((!src_opin_delays[node_layer_num][itile][ptc].count(seg_index)
+                || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay)
+                && curr_layer_num == node_layer_num) {
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
+            } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index)
+                        || src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
+                       && curr_layer_num != node_layer_num) {
+                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
+                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;
+                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay = curr.delay;
+                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].congestion = curr.congestion;
             }
 
         } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) {
@@ -564,11 +603,6 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
                     continue;
                 }
 
-                if (rr_graph.node_layer(curr.node) != node_layer_num) {
-                    //Don't change the layer
-                    continue;
-                }
-
                 t_pq_entry next;
                 next.congestion = curr.congestion + incr_cong; //Of current node
                 next.delay = curr.delay + incr_delay;          //To reach next node
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index f3a3d43249a..6728fae48d7 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -267,8 +267,10 @@ struct t_reachable_wire_inf {
 // SOURCE/OPIN of a given tile type.
 //
 // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned
-// as the lookahead expected cost.
+// as the lookahead expected cost. [opin/src layer_num][tile_index][opin/src ptc_number] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
+// [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
+typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -284,7 +286,7 @@ typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_d
 // and the tile's IPIN. If there are many connections to the same IPIN, the one with the minimum delay is selected.
 typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins_delays;
 
-t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat);
+std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 
 t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph,

From 00145104ced2a682997fb62749b668ec2df8fef9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 3 Jul 2023 09:06:43 -0400
Subject: [PATCH 002/257] fix the signiture of is_pin_conencted_to_layer

---
 libs/libarchfpga/src/physical_types_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index 2051ae043c8..e27ba096b54 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -118,7 +118,7 @@
 bool is_opin(int ipin, t_physical_tile_type_ptr type);
 
 ///@brief Returns true if the specified pin is located at "from_layer" and it is connected to "to_layer"
-bool is_pin_conencted_to_layer(t_physical_tile_type_ptr type, int ipin, int from_layer, int to_layer);
+bool is_pin_conencted_to_layer(t_physical_tile_type_ptr type, int ipin, int from_layer, int to_layer, int num_of_avail_layer);
 
 ///@brief Returns true if the given physical tile type can implement a .input block type
 bool is_input_type(t_physical_tile_type_ptr type);

From 1d6dd13c3cd5cabc01a144bb4d53ba414016b940 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 3 Jul 2023 09:19:17 -0400
Subject: [PATCH 003/257] record, for each tile type, which pins have
 connections to other layers

---
 vpr/src/route/router_lookahead_map_utils.cpp | 51 ++++++++++++++++++++
 vpr/src/route/router_lookahead_map_utils.h   |  5 ++
 2 files changed, 56 insertions(+)

diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 100f56d7644..e11e0cc88ca 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -413,6 +413,57 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
+t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
+    vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& rr_graph = device_ctx.rr_graph;
+
+    int num_layers = device_ctx.grid.get_num_layers();
+    bool is_multi_layer = (num_layers > 1);
+    if(!is_multi_layer) {
+        return t_sink_inter_layer_connection();
+    }
+    // AM: Currently, for 3D stuff, I am only focusing on the case that flat-router is not enabled. If flat_router is on, I am not sure whether it works.
+    VTR_ASSERT(!is_flat);
+
+    t_sink_inter_layer_connection sink_inter_layer_conn;
+    sink_inter_layer_conn.resize(num_layers);
+    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
+        int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
+        sink_inter_layer_conn[from_layer_num].resize(num_physical_tiles);
+        for (int itile = 0; itile < num_physical_tiles; itile++) {
+            const auto& physical_tile = device_ctx.physical_tile_types[itile];
+            int num_pins = physical_tile.num_pins;
+            sink_inter_layer_conn[from_layer_num][itile].resize(num_pins);
+            for(int pin_number = 0; pin_number < num_pins; pin_number++) {
+                sink_inter_layer_conn[from_layer_num][itile][pin_number].resize(num_layers);
+            }
+        }
+    }
+
+    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
+        for (int itile = 0; itile < (int)device_ctx.physical_tile_types.size(); itile++) {
+            const auto& physical_tile = device_ctx.physical_tile_types[itile];
+            int num_pins = physical_tile.num_pins;
+            for (int pin_num = 0; pin_num < num_pins; pin_num++) {
+                for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
+                    if (from_layer_num == to_layer_num) {
+                        sink_inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = true;
+                    } else {
+                        sink_inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = is_pin_conencted_to_layer(&device_ctx.physical_tile_types[itile],
+                                                                                                                        pin_num,
+                                                                                                                        from_layer_num,
+                                                                                                                        to_layer_num,
+                                                                                                                        device_ctx.grid.get_num_layers());
+                    }
+                }
+            }
+        }
+    }
+
+    return sink_inter_layer_conn;
+}
+
 t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
     vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead");
     auto& device_ctx = g_vpr_ctx.device();
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 6728fae48d7..b9ec7f67ffa 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -271,6 +271,8 @@ struct t_reachable_wire_inf {
 typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
 // [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
+// [source_layer_num][tile_index][pin number][to_layer_num] -> cost
+typedef std::vector<std::vector<std::vector<std::vector<bool>>>> t_sink_inter_layer_connection;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -287,6 +289,9 @@ typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_d
 typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins_delays;
 
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
+
+t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
+
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 
 t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph,

From e2042fba04c20038f6ecd40f337ae20bf1a28152 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 3 Jul 2023 09:21:31 -0400
Subject: [PATCH 004/257] change t_sink_inter_layer_connection to
 t_inter_layer_connection

---
 vpr/src/route/router_lookahead_map_utils.cpp | 20 ++++++++++----------
 vpr/src/route/router_lookahead_map_utils.h   |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index e11e0cc88ca..f4681345569 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -413,7 +413,7 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
-t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
+t_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
     auto& device_ctx = g_vpr_ctx.device();
     auto& rr_graph = device_ctx.rr_graph;
@@ -421,22 +421,22 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
     int num_layers = device_ctx.grid.get_num_layers();
     bool is_multi_layer = (num_layers > 1);
     if(!is_multi_layer) {
-        return t_sink_inter_layer_connection();
+        return t_inter_layer_connection();
     }
     // AM: Currently, for 3D stuff, I am only focusing on the case that flat-router is not enabled. If flat_router is on, I am not sure whether it works.
     VTR_ASSERT(!is_flat);
 
-    t_sink_inter_layer_connection sink_inter_layer_conn;
-    sink_inter_layer_conn.resize(num_layers);
+    t_inter_layer_connection inter_layer_conn;
+    inter_layer_conn.resize(num_layers);
     for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
         int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
-        sink_inter_layer_conn[from_layer_num].resize(num_physical_tiles);
+        inter_layer_conn[from_layer_num].resize(num_physical_tiles);
         for (int itile = 0; itile < num_physical_tiles; itile++) {
             const auto& physical_tile = device_ctx.physical_tile_types[itile];
             int num_pins = physical_tile.num_pins;
-            sink_inter_layer_conn[from_layer_num][itile].resize(num_pins);
+            inter_layer_conn[from_layer_num][itile].resize(num_pins);
             for(int pin_number = 0; pin_number < num_pins; pin_number++) {
-                sink_inter_layer_conn[from_layer_num][itile][pin_number].resize(num_layers);
+                inter_layer_conn[from_layer_num][itile][pin_number].resize(num_layers);
             }
         }
     }
@@ -448,9 +448,9 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
             for (int pin_num = 0; pin_num < num_pins; pin_num++) {
                 for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
                     if (from_layer_num == to_layer_num) {
-                        sink_inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = true;
+                        inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = true;
                     } else {
-                        sink_inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = is_pin_conencted_to_layer(&device_ctx.physical_tile_types[itile],
+                        inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = is_pin_conencted_to_layer(&device_ctx.physical_tile_types[itile],
                                                                                                                         pin_num,
                                                                                                                         from_layer_num,
                                                                                                                         to_layer_num,
@@ -461,7 +461,7 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
         }
     }
 
-    return sink_inter_layer_conn;
+    return inter_layer_conn;
 }
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index b9ec7f67ffa..a260cf230ce 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -272,7 +272,7 @@ typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>
 // [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
 // [source_layer_num][tile_index][pin number][to_layer_num] -> cost
-typedef std::vector<std::vector<std::vector<std::vector<bool>>>> t_sink_inter_layer_connection;
+typedef std::vector<std::vector<std::vector<std::vector<bool>>>> t_inter_layer_connection;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -290,7 +290,7 @@ typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins
 
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 
-t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
+t_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 

From 563f9dbdc2ce75a0ba708939697f00ff5d65ca56 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 3 Jul 2023 11:14:45 -0400
Subject: [PATCH 005/257] change
 register_tiles_with_inter_layer_connection_block implementation...only record
 whether sinks have connection to other layers

---
 vpr/src/route/router_lookahead_map_utils.cpp | 49 +++++++++++---------
 vpr/src/route/router_lookahead_map_utils.h   |  6 +--
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index f4681345569..bbb96821cb1 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -413,7 +413,7 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
-t_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
+t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
     auto& device_ctx = g_vpr_ctx.device();
     auto& rr_graph = device_ctx.rr_graph;
@@ -421,40 +421,43 @@ t_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool i
     int num_layers = device_ctx.grid.get_num_layers();
     bool is_multi_layer = (num_layers > 1);
     if(!is_multi_layer) {
-        return t_inter_layer_connection();
+        return t_sink_inter_layer_connection();
     }
     // AM: Currently, for 3D stuff, I am only focusing on the case that flat-router is not enabled. If flat_router is on, I am not sure whether it works.
     VTR_ASSERT(!is_flat);
 
-    t_inter_layer_connection inter_layer_conn;
+    t_sink_inter_layer_connection inter_layer_conn;
     inter_layer_conn.resize(num_layers);
     for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
-        int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
-        inter_layer_conn[from_layer_num].resize(num_physical_tiles);
-        for (int itile = 0; itile < num_physical_tiles; itile++) {
-            const auto& physical_tile = device_ctx.physical_tile_types[itile];
-            int num_pins = physical_tile.num_pins;
-            inter_layer_conn[from_layer_num][itile].resize(num_pins);
-            for(int pin_number = 0; pin_number < num_pins; pin_number++) {
-                inter_layer_conn[from_layer_num][itile][pin_number].resize(num_layers);
+        const auto& physical_tiles = device_ctx.physical_tile_types;
+        int num_physical_tile_types = (int)device_ctx.physical_tile_types.size();
+
+        inter_layer_conn[from_layer_num].resize(num_physical_tile_types);
+        for (int itile = 0; itile < num_physical_tile_types; itile++) {
+            if (device_ctx.grid.num_instances(&physical_tiles[itile], from_layer_num) == 0) {
+                continue;
             }
+            int num_classes = (int)physical_tiles[itile].class_inf.size();
+            inter_layer_conn[from_layer_num][itile].resize(num_classes);
         }
     }
 
     for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
         for (int itile = 0; itile < (int)device_ctx.physical_tile_types.size(); itile++) {
-            const auto& physical_tile = device_ctx.physical_tile_types[itile];
-            int num_pins = physical_tile.num_pins;
-            for (int pin_num = 0; pin_num < num_pins; pin_num++) {
-                for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
-                    if (from_layer_num == to_layer_num) {
-                        inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = true;
-                    } else {
-                        inter_layer_conn[from_layer_num][itile][pin_num][to_layer_num] = is_pin_conencted_to_layer(&device_ctx.physical_tile_types[itile],
-                                                                                                                        pin_num,
-                                                                                                                        from_layer_num,
-                                                                                                                        to_layer_num,
-                                                                                                                        device_ctx.grid.get_num_layers());
+            for (int class_num = 0; class_num < (int)inter_layer_conn[from_layer_num][itile].size(); class_num++) {
+                const auto& physical_tile = device_ctx.physical_tile_types[itile];
+                if (get_class_type_from_class_physical_num(&physical_tile, class_num) == e_pin_type::RECEIVER) {
+                    for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
+                        if (from_layer_num == to_layer_num) {
+                            continue ;
+                        } else {
+                            for (int pin_num : get_pin_list_from_class_physical_num(&physical_tile, class_num)) {
+                                if (is_pin_conencted_to_layer(&physical_tile, pin_num, from_layer_num, to_layer_num, num_layers)) {
+                                    inter_layer_conn[from_layer_num][itile][class_num].insert(to_layer_num);
+                                    break;
+                                }
+                            }
+                        }
                     }
                 }
             }
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index a260cf230ce..4dc3e7ed2b9 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -271,8 +271,8 @@ struct t_reachable_wire_inf {
 typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
 // [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
-// [source_layer_num][tile_index][pin number][to_layer_num] -> cost
-typedef std::vector<std::vector<std::vector<std::vector<bool>>>> t_inter_layer_connection;
+// [from layer num][tile_index][sink ptc number] -> set of layers that have connections to the given sink
+typedef std::vector<std::vector<std::vector<std::unordered_set<int>>>> t_sink_inter_layer_connection;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -290,7 +290,7 @@ typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins
 
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 
-t_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
+t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 

From c746a3438893fcfc62c5600a64f32ec67acfdfb9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 3 Jul 2023 14:21:43 -0400
Subject: [PATCH 006/257] add the delay of intra-cluster connection to router
 lookahead

---
 vpr/src/route/router_lookahead_map.cpp       | 208 +++++++++++++------
 vpr/src/route/router_lookahead_map.h         |   8 +-
 vpr/src/route/router_lookahead_map_utils.cpp |   2 +-
 3 files changed, 152 insertions(+), 66 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 5e11e68f97a..719e3dc4da4 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -227,6 +227,11 @@ static void store_min_cost_to_sinks(std::unordered_map<t_physical_tile_type_ptr,
 
 static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opin_global_cost_map);
 
+static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::t_reachable_wire_inf>& src_opin_delay_map,
+                                                      int layer_num,
+                                                      int delta_x,
+                                                      int delta_y);
+
 // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost
 static void read_intra_cluster_router_lookahead(std::unordered_map<t_physical_tile_type_ptr, util::t_ipin_primitive_sink_delays>& inter_tile_pin_primitive_pin_delay,
                                                 std::unordered_map<t_physical_tile_type_ptr, std::unordered_map<int, util::Cost_Entry>>& tile_min_cost,
@@ -269,6 +274,21 @@ static void print_wire_cost_map(int layer_num, const std::vector<t_segment_inf>&
 static void print_router_cost_map(const t_routing_cost_map& router_cost_map);
 
 /******** Interface class member function definitions ********/
+MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat)
+    : det_routing_arch_(det_routing_arch) , is_flat_(is_flat) {
+
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    if (num_layers > 1) {
+        const auto& sw_inf = g_vpr_ctx.device().all_sw_inf;
+        int inter_layer_sw_id = det_routing_arch_.wire_to_rr_ipin_switch_between_dice;
+        VTR_ASSERT(inter_layer_sw_id >= 0);
+        inter_layer_connection_box_sw_delay = sw_inf.at(inter_layer_sw_id).Tdel();
+    } else {
+        VTR_ASSERT(num_layers == 1);
+        inter_layer_connection_box_sw_delay = 0.;
+    }
+}
+
 float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_node, const t_conn_cost_params& params, float R_upstream) const {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
@@ -404,6 +424,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
     int delta_x, delta_y;
     int from_layer_num = rr_graph.node_layer(from_node);
+    int to_layer_num = rr_graph.node_layer(to_node);
     get_xy_deltas(from_node, to_node, &delta_x, &delta_y);
     delta_x = abs(delta_x);
     delta_y = abs(delta_y);
@@ -418,66 +439,44 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final
         //delay to reach the sink.
 
-        t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
+        t_physical_tile_type_ptr from_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
                                                                                 rr_graph.node_ylow(from_node),
                                                                                 from_layer_num});
+        t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
+                                                                                   rr_graph.node_ylow(to_node),
+                                                                                   to_layer_num});
 
-        auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type);
+        auto from_tile_index = std::distance(&device_ctx.physical_tile_types[0], from_tile_type);
+        auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
 
         auto from_ptc = rr_graph.node_ptc_num(from_node);
+        auto to_ptc = rr_graph.node_ptc_num(to_node);
+
+        if (from_layer_num == to_layer_num ||
+            inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) !=
+                inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
+            std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc],
+                                                                                       from_layer_num,
+                                                                                       delta_x,
+                                                                                       delta_y);
+        }
 
-        if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) {
-            //During lookahead profiling we were unable to find any wires which connected
-            //to this PTC.
-            //
-            //This can sometimes occur at very low channel widths (e.g. during min W search on
-            //small designs) where W discretization combined with fraction Fc may cause some
-            //pins/sources to be left disconnected.
-            //
-            //Such RR graphs are of course unroutable, but that should be determined by the
-            //router. So just return an arbitrary value here rather than error.
-
-            //We choose to return the largest (non-infinite) value possible, but scaled
-            //down by a large factor to maintain some dynaimc range in case this value ends
-            //up being processed (e.g. by the timing analyzer).
-            //
-            //The cost estimate should still be *extremely* large compared to a typical delay, and
-            //so should ensure that the router de-prioritizes exploring this path, but does not
-            //forbid the router from trying.
-            expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
-            expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
-        } else {
-            //From the current SOURCE/OPIN we look-up the wiretypes which are reachable
-            //and then add the estimates from those wire types for the distance of interest.
-            //If there are multiple options we use the minimum value.
-            for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) {
-                const util::t_reachable_wire_inf& reachable_wire_inf = kv.second;
-
-                Cost_Entry wire_cost_entry;
-                if (reachable_wire_inf.wire_rr_type == SINK) {
-                    //Some pins maybe reachable via a direct (OPIN -> IPIN) connection.
-                    //In the lookahead, we treat such connections as 'special' wire types
-                    //with no delay/congestion cost
-                    wire_cost_entry.delay = 0;
-                    wire_cost_entry.congestion = 0;
-                } else {
-                    //For an actual accessible wire, we query the wire look-up to get it's
-                    //delay and congestion cost estimates
-                    wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type,
-                                                          reachable_wire_inf.wire_seg_index,
-                                                          from_layer_num,
-                                                          delta_x,
-                                                          delta_y);
+        if (from_layer_num != to_layer_num) {
+                float tmp_expected_delay_cost, tmp_expected_cong_cost;
+                std::tie(tmp_expected_delay_cost, tmp_expected_cong_cost) =
+                    get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
+                                           to_layer_num,
+                                           delta_x,
+                                           delta_y);
+                if (tmp_expected_delay_cost < expected_delay_cost) {
+                    expected_delay_cost = tmp_expected_delay_cost;
+                    expected_cong_cost = tmp_expected_cong_cost;
                 }
-
-                float this_delay_cost = (params.criticality) * (reachable_wire_inf.delay + wire_cost_entry.delay);
-                float this_cong_cost = (1. - params.criticality) * (reachable_wire_inf.congestion + wire_cost_entry.congestion);
-
-                expected_delay_cost = std::min(expected_delay_cost, this_delay_cost);
-                expected_cong_cost = std::min(expected_cong_cost, this_cong_cost);
-            }
         }
 
+        expected_delay_cost *= params.criticality;
+        expected_cong_cost *= (1 - params.criticality);
+
         VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
                             vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
                                             rr_node_arch_name(size_t(from_node), is_flat_).c_str(),
@@ -490,7 +489,6 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                                 .c_str());
 
     } else if (from_type == CHANX || from_type == CHANY) {
-        VTR_ASSERT_SAFE(from_type == CHANX || from_type == CHANY);
         //When estimating costs from a wire, we directly look-up the result in the wire lookahead (f_wire_cost_map)
 
         auto from_cost_index = rr_graph.node_cost_index(from_node);
@@ -498,18 +496,38 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         VTR_ASSERT(from_seg_index >= 0);
 
-        /* now get the expected cost from our lookahead map */
-        Cost_Entry cost_entry = get_wire_cost_entry(from_type,
-                                                    from_seg_index,
-                                                    from_layer_num,
-                                                    delta_x,
-                                                    delta_y);
+        bool get_cost_entry = true;
+        if (from_layer_num != to_layer_num) {
+                t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
+                                                                                           rr_graph.node_ylow(to_node),
+                                                                                           to_layer_num});
+                auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
+                auto to_ptc = rr_graph.node_ptc_num(to_node);
+                if(inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) ==
+                    inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
+                    get_cost_entry = false;
+                    expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
+                    expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
+                }
+        }
+
+        if (get_cost_entry) {
+            /* now get the expected cost from our lookahead map */
+            Cost_Entry cost_entry = get_wire_cost_entry(from_type,
+                                                        from_seg_index,
+                                                        from_layer_num,
+                                                        delta_x,
+                                                        delta_y);
+            expected_delay_cost = cost_entry.delay;
+            expected_cong_cost = cost_entry.congestion;
+            if(from_layer_num != to_layer_num) {
+                    expected_delay_cost += inter_layer_connection_box_sw_delay;
+            }
 
-        float expected_delay = cost_entry.delay;
-        float expected_cong = cost_entry.congestion;
+            expected_delay_cost *= params.criticality;
+            expected_cong_cost *= (1 - params.criticality);
 
-        expected_delay_cost = params.criticality * expected_delay;
-        expected_cong_cost = (1.0 - params.criticality) * expected_cong;
+        }
 
         VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
                             vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
@@ -542,6 +560,9 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
         util::compute_router_src_opin_lookahead(is_flat_);
 
+    // Store the sinks that have connections to other layers
+    this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
+
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -566,6 +587,8 @@ void MapLookahead::read(const std::string& file) {
     //from the different physical tile type's SOURCEs & OPINs
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
         util::compute_router_src_opin_lookahead(is_flat_);
+
+    this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
 }
 
 void MapLookahead::read_intra_cluster(const std::string& file) {
@@ -1467,6 +1490,67 @@ static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opi
     }
 }
 
+static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::t_reachable_wire_inf>& src_opin_delay_map,
+                                                      int layer_num,
+                                                      int delta_x,
+                                                      int delta_y) {
+    float expected_delay_cost = std::numeric_limits<float>::infinity();
+    float expected_cong_cost = std::numeric_limits<float>::infinity();
+    if (src_opin_delay_map.empty()) {
+        //During lookahead profiling we were unable to find any wires which connected
+        //to this PTC.
+        //
+        //This can sometimes occur at very low channel widths (e.g. during min W search on
+        //small designs) where W discretization combined with fraction Fc may cause some
+        //pins/sources to be left disconnected.
+        //
+        //Such RR graphs are of course unroutable, but that should be determined by the
+        //router. So just return an arbitrary value here rather than error.
+
+        //We choose to return the largest (non-infinite) value possible, but scaled
+        //down by a large factor to maintain some dynaimc range in case this value ends
+        //up being processed (e.g. by the timing analyzer).
+        //
+        //The cost estimate should still be *extremely* large compared to a typical delay, and
+        //so should ensure that the router de-prioritizes exploring this path, but does not
+        //forbid the router from trying.
+        expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
+        expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
+    } else {
+        //From the current SOURCE/OPIN we look-up the wiretypes which are reachable
+        //and then add the estimates from those wire types for the distance of interest.
+        //If there are multiple options we use the minimum value.
+        for (const auto& kv : src_opin_delay_map) {
+            const util::t_reachable_wire_inf& reachable_wire_inf = kv.second;
+
+            Cost_Entry wire_cost_entry;
+            if (reachable_wire_inf.wire_rr_type == SINK) {
+                //Some pins maybe reachable via a direct (OPIN -> IPIN) connection.
+                //In the lookahead, we treat such connections as 'special' wire types
+                //with no delay/congestion cost
+                wire_cost_entry.delay = 0;
+                wire_cost_entry.congestion = 0;
+            } else {
+                //For an actual accessible wire, we query the wire look-up to get it's
+                //delay and congestion cost estimates
+                wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type,
+                                                      reachable_wire_inf.wire_seg_index,
+                                                      layer_num,
+                                                      delta_x,
+                                                      delta_y);
+            }
+
+            float this_delay_cost = reachable_wire_inf.delay + wire_cost_entry.delay;
+            float this_cong_cost = reachable_wire_inf.congestion + wire_cost_entry.congestion;
+
+            expected_delay_cost = std::min(expected_delay_cost, this_delay_cost);
+            expected_cong_cost = std::min(expected_cong_cost, this_cong_cost);
+        }
+    }
+
+    return std::make_pair(expected_delay_cost, expected_cong_cost);
+}
+
 //
 // When writing capnp targetted serialization, always allow compilation when
 // VTR_ENABLE_CAPNPROTO=OFF.  Generally this means throwing an exception
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index 8e1f4e7ee5d..6706cd3cd4e 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -8,9 +8,7 @@
 
 class MapLookahead : public RouterLookahead {
   public:
-    explicit MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat)
-        : det_routing_arch_(det_routing_arch)
-        , is_flat_(is_flat) {}
+    explicit MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat);
 
   private:
     //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types
@@ -23,6 +21,10 @@ class MapLookahead : public RouterLookahead {
     vtr::NdMatrix<util::Cost_Entry, 3> distance_based_min_cost; // [layer_num][dx][dy] -> cost
     // [tile_index][from_layer_num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
     util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
+    // [source_layer_num][tile_index][sink ptc num] -> cost
+    util::t_sink_inter_layer_connection inter_layer_connection;
+
+    float inter_layer_connection_box_sw_delay;
     const t_det_routing_arch& det_routing_arch_;
     bool is_flat_;
 
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index bbb96821cb1..3fbfd87d28a 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -628,7 +628,7 @@ static void dijkstra_flood_to_wires(int itile,
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
             } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index)
-                        || src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
+                        || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
                        && curr_layer_num != node_layer_num) {
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;

From 877bc765009e4ba70651a7db3679e9fafc27cc93 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 10:09:14 -0400
Subject: [PATCH 007/257] get arch_sw_id instead of rr_sw_id of inter-die
 switch

---
 vpr/src/base/SetupVPR.cpp                    | 2 +-
 vpr/src/route/router_lookahead_map.cpp       | 2 +-
 vpr/src/route/router_lookahead_map_utils.cpp | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 8a122fb7b3c..18306db87a7 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -758,7 +758,7 @@ static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_
             wire_to_arch_ipin_switch = ipin_cblock_switch_index;
         } else {
             wire_to_arch_ipin_switch_between_dice = ipin_cblock_switch_index;
-        };
+        }
     }
 }
 
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 719e3dc4da4..5c3e82bbecb 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -280,7 +280,7 @@ MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_f
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
     if (num_layers > 1) {
         const auto& sw_inf = g_vpr_ctx.device().all_sw_inf;
-        int inter_layer_sw_id = det_routing_arch_.wire_to_rr_ipin_switch_between_dice;
+        int inter_layer_sw_id = det_routing_arch_.wire_to_arch_ipin_switch_between_dice;
         VTR_ASSERT(inter_layer_sw_id >= 0);
         inter_layer_connection_box_sw_delay = sw_inf.at(inter_layer_sw_id).Tdel();
     } else {
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 3fbfd87d28a..f7186ef70d6 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -416,7 +416,6 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
 t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
     auto& device_ctx = g_vpr_ctx.device();
-    auto& rr_graph = device_ctx.rr_graph;
 
     int num_layers = device_ctx.grid.get_num_layers();
     bool is_multi_layer = (num_layers > 1);

From 39f985cc787097746f3bed0215203743bd0a99c5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 11:45:31 -0400
Subject: [PATCH 008/257] add layer_min/max to t_bb

---
 utils/route_diag/src/main.cpp                    |  2 ++
 vpr/src/base/vpr_types.h                         |  9 +++++++--
 vpr/src/place/feasible_region_move_generator.cpp |  3 +++
 vpr/src/place/initial_placement.cpp              |  6 ++++--
 vpr/src/place/move_utils.cpp                     |  6 ++++--
 vpr/src/route/connection_router.cpp              |  8 ++++++++
 vpr/src/route/route_common.cpp                   |  8 ++++++++
 vpr/src/route/route_timing.cpp                   |  4 ++++
 vpr/src/route/router_delay_profiling.cpp         | 10 +++++++++-
 vpr/src/route/router_delay_profiling.h           |  5 ++++-
 vpr/test/test_connection_router.cpp              |  2 ++
 11 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index 892674cc43b..ee993d3f453 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -84,6 +84,8 @@ static void do_one_route(const Netlist<>& net_list,
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
+    bounding_box.layer_min = 0;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = router_opts.max_criticality;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 1f47c1030d5..5b7ce78151e 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -576,18 +576,23 @@ struct t_net_power {
  */
 struct t_bb {
     t_bb() = default;
-    t_bb(int xmin_, int xmax_, int ymin_, int ymax_)
+    t_bb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_min_, int layer_max_)
         : xmin(xmin_)
         , xmax(xmax_)
         , ymin(ymin_)
-        , ymax(ymax_) {
+        , ymax(ymax_)
+        , layer_min(layer_min_)
+        , layer_max(layer_max_) {
         VTR_ASSERT(xmax_ >= xmin_);
         VTR_ASSERT(ymax_ >= ymin_);
+        VTR_ASSERT(layer_max_ >= layer_min_);
     }
     int xmin = OPEN;
     int xmax = OPEN;
     int ymin = OPEN;
     int ymax = OPEN;
+    int layer_min = OPEN;
+    int layer_max = OPEN;
 };
 
 /**
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index ee69aeda5f0..45c3f09093f 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -98,6 +98,9 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         FR_coords.ymin = std::min(from.y, max_y);
         FR_coords.ymax = std::max(from.y, yt);
     }
+
+    FR_coords.layer_min = from.layer;
+    FR_coords.layer_max = from.layer;
     VTR_ASSERT(FR_coords.ymin <= FR_coords.ymax);
 
     t_range_limiters range_limiters;
diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index 38bc772fefc..50c667d82d4 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -634,11 +634,13 @@ static bool try_random_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_log
     t_physical_tile_loc to_compressed_loc;
 
     bool legal;
+    t_bb place_bb(min_compressed_loc.x, max_compressed_loc.x,
+                  min_compressed_loc.y, max_compressed_loc.y,
+                  reg_coord.layer_num, reg_coord.layer_num);
     legal = find_compatible_compressed_loc_in_range(block_type,
                                                     delta_cx,
                                                     {cx_from, cy_from, reg_coord.layer_num},
-                                                    {min_compressed_loc.x, max_compressed_loc.x,
-                                                     min_compressed_loc.y, max_compressed_loc.y},
+                                                    place_bb,
                                                     to_compressed_loc,
                                                     false,
                                                     reg_coord.layer_num);
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 5e1188db6c3..5d4b945b87a 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -814,7 +814,9 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
     t_bb search_range(min_compressed_loc[from_layer_num].x,
                       max_compressed_loc[from_layer_num].x,
                       min_compressed_loc[from_layer_num].y,
-                      max_compressed_loc[from_layer_num].y);
+                      max_compressed_loc[from_layer_num].y,
+                      from_layer_num,
+                      from_layer_num);
 
     t_physical_tile_loc to_compressed_loc;
     bool legal = false;
@@ -1171,7 +1173,7 @@ std::vector<t_bb> get_compressed_grid_bounded_search_range(const t_compressed_bl
             max_cy = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range);
         }
 
-        search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy);
+        search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy, layer_num, layer_num);
     }
 
     return search_range;
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 4d0c0f96f05..da8405d058f 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -124,6 +124,8 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_common_setup(
         full_device_bounding_box.ymin = 0;
         full_device_bounding_box.xmax = grid_.width() - 1;
         full_device_bounding_box.ymax = grid_.height() - 1;
+        full_device_bounding_box.layer_min = 0;
+        full_device_bounding_box.layer_max = grid_.get_num_layers() - 1;
 
         //
         //TODO: potential future optimization
@@ -443,6 +445,8 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbours(t_heap* current,
         target_bb.ymin = rr_graph_->node_ylow(RRNodeId(target_node));
         target_bb.xmax = rr_graph_->node_xhigh(RRNodeId(target_node));
         target_bb.ymax = rr_graph_->node_yhigh(RRNodeId(target_node));
+        target_bb.layer_min = rr_graph_->node_layer(RRNodeId(target_node));
+        target_bb.layer_max = rr_graph_->node_layer(RRNodeId(target_node));
     }
 
     // For each node associated with the current heap element, expand all of it's neighbors
@@ -1023,6 +1027,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
     highfanout_bb.xmax = rr_graph_->node_xhigh(target_node_id);
     highfanout_bb.ymin = rr_graph_->node_ylow(target_node_id);
     highfanout_bb.ymax = rr_graph_->node_yhigh(target_node_id);
+    highfanout_bb.layer_min = rr_graph_->node_type(target_node_id);
+    highfanout_bb.layer_max = rr_graph_->node_type(target_node_id);
 
     //Add existing routing starting from the target bin.
     //If the target's bin has insufficient existing routing add from the surrounding bins
@@ -1054,6 +1060,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
                 highfanout_bb.ymin = std::min<int>(highfanout_bb.ymin, rr_graph_->node_ylow(rr_node_to_add));
                 highfanout_bb.xmax = std::max<int>(highfanout_bb.xmax, rr_graph_->node_xhigh(rr_node_to_add));
                 highfanout_bb.ymax = std::max<int>(highfanout_bb.ymax, rr_graph_->node_yhigh(rr_node_to_add));
+                highfanout_bb.layer_min = std::min<int>(highfanout_bb.layer_min, rr_graph_->node_type(rr_node_to_add));
+                highfanout_bb.layer_max = std::max<int>(highfanout_bb.layer_max, rr_graph_->node_type(rr_node_to_add));
                 if (is_flat_) {
                     if (rr_graph_->node_type(rr_node_to_add) == CHANY || rr_graph_->node_type(rr_node_to_add) == CHANX) {
                         chan_nodes_added++;
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 466608319fb..62faa9f9ab7 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -809,6 +809,8 @@ vtr::vector<ParentNetId, t_bb> load_route_bb(const Netlist<>& net_list,
         full_device_bounding_box.ymin = 0;
         full_device_bounding_box.xmax = device_ctx.grid.width() - 1;
         full_device_bounding_box.ymax = device_ctx.grid.height() - 1;
+        full_device_bounding_box.layer_min = 0;
+        full_device_bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;
     }
 
     auto nets = net_list.nets();
@@ -879,6 +881,8 @@ t_bb load_net_route_bb(const Netlist<>& net_list,
     int ymin = rr_graph.node_ylow(driver_rr);
     int xmax = rr_graph.node_xhigh(driver_rr);
     int ymax = rr_graph.node_yhigh(driver_rr);
+    int layer_min = rr_graph.node_layer(driver_rr);
+    int layer_max = rr_graph.node_layer(driver_rr);
 
     auto net_sinks = net_list.net_sinks(net_id);
     for (size_t ipin = 1; ipin < net_sinks.size() + 1; ++ipin) { //Start at 1 since looping through sinks
@@ -892,6 +896,8 @@ t_bb load_net_route_bb(const Netlist<>& net_list,
         xmax = std::max<int>(xmax, rr_graph.node_xhigh(sink_rr));
         ymin = std::min<int>(ymin, rr_graph.node_ylow(sink_rr));
         ymax = std::max<int>(ymax, rr_graph.node_yhigh(sink_rr));
+        layer_min = std::min<int>(layer_min, rr_graph.node_layer(sink_rr));
+        layer_max = std::max<int>(layer_max, rr_graph.node_layer(sink_rr));
     }
 
     /* Want the channels on all 4 sides to be usuable, even if bb_factor = 0. */
@@ -907,6 +913,8 @@ t_bb load_net_route_bb(const Netlist<>& net_list,
     bb.xmax = std::min<int>(xmax + bb_factor, device_ctx.grid.width() - 1);
     bb.ymin = std::max<int>(ymin - bb_factor, 0);
     bb.ymax = std::min<int>(ymax + bb_factor, device_ctx.grid.height() - 1);
+    bb.layer_min = layer_min;
+    bb.layer_max = layer_max;
 
     return bb;
 }
diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp
index 48074f717cb..b8f71835626 100644
--- a/vpr/src/route/route_timing.cpp
+++ b/vpr/src/route/route_timing.cpp
@@ -2005,8 +2005,10 @@ static t_bb calc_current_bb(const RouteTree& tree) {
     t_bb bb;
     bb.xmin = grid.width() - 1;
     bb.ymin = grid.height() - 1;
+    bb.layer_min = grid.get_num_layers() - 1;
     bb.xmax = 0;
     bb.ymax = 0;
+    bb.layer_max = 0;
 
     for (auto& rt_node : tree.all_nodes()) {
         //The router interprets RR nodes which cross the boundary as being
@@ -2015,8 +2017,10 @@ static t_bb calc_current_bb(const RouteTree& tree) {
         //and xlow/ylow for xmax/ymax calculations
         bb.xmin = std::min<int>(bb.xmin, rr_graph.node_xhigh(rt_node.inode));
         bb.ymin = std::min<int>(bb.ymin, rr_graph.node_yhigh(rt_node.inode));
+        bb.layer_min = std::min<int>(bb.layer_min, rr_graph.node_layer(rt_node.inode));
         bb.xmax = std::max<int>(bb.xmax, rr_graph.node_xlow(rt_node.inode));
         bb.ymax = std::max<int>(bb.ymax, rr_graph.node_ylow(rt_node.inode));
+        bb.layer_max = std::max<int>(bb.layer_max, rr_graph.node_layer(rt_node.inode));
     }
 
     VTR_ASSERT(bb.xmin <= bb.xmax);
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index a2b5faa4b75..32c1bc65928 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -23,7 +23,10 @@ RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list,
           is_flat)
     , is_flat_(is_flat) {}
 
-bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const t_router_opts& router_opts, float* net_delay) {
+bool RouterDelayProfiler::calculate_delay(int source_node,
+                                          int sink_node,
+                                          const t_router_opts& router_opts,
+                                          float* net_delay) {
     /* Returns true as long as found some way to hook up this net, even if that *
      * way resulted in overuse of resources (congestion).  If there is no way   *
      * to route this net, even ignoring congestion, it returns false.  In this  *
@@ -54,6 +57,9 @@ bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
+    bounding_box.layer_min = 0;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+
 
     t_conn_cost_params cost_params;
     cost_params.criticality = 1.;
@@ -117,6 +123,8 @@ std::vector<float> calculate_all_path_delays_from_rr_node(int src_rr_node,
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
+    bounding_box.layer_min = 0;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = 1.;
diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index ac2b507094b..5dd929013d9 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -13,7 +13,10 @@ class RouterDelayProfiler {
     RouterDelayProfiler(const Netlist<>& net_list,
                         const RouterLookahead* lookahead,
                         bool is_flat);
-    bool calculate_delay(int source_node, int sink_node, const t_router_opts& router_opts, float* net_delay);
+    bool calculate_delay(int source_node,
+                         int sink_node,
+                         const t_router_opts& router_opts,
+                         float* net_delay);
 
   private:
     const Netlist<>& net_list_;
diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp
index 82109c9b45b..84893424f18 100644
--- a/vpr/test/test_connection_router.cpp
+++ b/vpr/test/test_connection_router.cpp
@@ -33,6 +33,8 @@ static float do_one_route(int source_node,
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
+    bounding_box.layer_min = 0;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = router_opts.max_criticality;

From ea24804701271907a907af903f38dec532204cc1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 11:46:35 -0400
Subject: [PATCH 009/257] limit the router search to the same layer when
 building palce delta delay

---
 utils/route_diag/src/main.cpp            |  8 +++++---
 vpr/src/place/timing_place_lookup.cpp    |  5 +++--
 vpr/src/route/router_delay_profiling.cpp | 12 +++++++++---
 vpr/src/route/router_delay_profiling.h   |  3 ++-
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index ee993d3f453..9e6f58fc43d 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -203,9 +203,11 @@ static void profile_source(const Netlist<>& net_list,
                     vtr::ScopedStartFinishTimer delay_timer(vtr::string_fmt(
                         "Routing Src: %d Sink: %d", source_rr_node,
                         sink_rr_node));
-                    successfully_routed = profiler.calculate_delay(source_rr_node, sink_rr_node,
-                                                        router_opts,
-                                                        &delays[sink_x][sink_y]);
+                    successfully_routed = profiler.calculate_delay(source_rr_node,
+                                                                   sink_rr_node,
+                                                                   router_opts,
+                                                                   &delays[sink_x][sink_y],
+                                                                   OPEN);
                 }
 
                 if (successfully_routed) {
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index 74682d220f3..ad75484ffe0 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -388,7 +388,8 @@ static float route_connection_delay(
                 successfully_routed = route_profiler.calculate_delay(
                     size_t(source_rr_node), size_t(sink_rr_node),
                     router_opts,
-                    &net_delay_value);
+                    &net_delay_value,
+                    layer_num);
             }
 
             if (successfully_routed) break;
@@ -1196,7 +1197,7 @@ void OverrideDelayModel::compute_override_delay_model(
             VTR_ASSERT(sink_rr != OPEN);
 
             float direct_connect_delay = std::numeric_limits<float>::quiet_NaN();
-            bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay);
+            bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay, OPEN);
 
             if (found_routing_path) {
                 set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay);
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index 32c1bc65928..ab8ea943f93 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -26,7 +26,8 @@ RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list,
 bool RouterDelayProfiler::calculate_delay(int source_node,
                                           int sink_node,
                                           const t_router_opts& router_opts,
-                                          float* net_delay) {
+                                          float* net_delay,
+                                          int layer_num) {
     /* Returns true as long as found some way to hook up this net, even if that *
      * way resulted in overuse of resources (congestion).  If there is no way   *
      * to route this net, even ignoring congestion, it returns false.  In this  *
@@ -57,8 +58,13 @@ bool RouterDelayProfiler::calculate_delay(int source_node,
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
-    bounding_box.layer_min = 0;
-    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+    if (layer_num == OPEN) {
+        bounding_box.layer_min = 0;
+        bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+    } else {
+        bounding_box.layer_min = layer_num;
+        bounding_box.layer_max = layer_num;
+    }
 
 
     t_conn_cost_params cost_params;
diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index 5dd929013d9..a3943246acb 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -16,7 +16,8 @@ class RouterDelayProfiler {
     bool calculate_delay(int source_node,
                          int sink_node,
                          const t_router_opts& router_opts,
-                         float* net_delay);
+                         float* net_delay,
+                         int layer_num);
 
   private:
     const Netlist<>& net_list_;

From 1865f35f4de2c492277863cd75446bd9a74e1ec7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 11:49:27 -0400
Subject: [PATCH 010/257] prune the node if it's layer is not in the bounding
 box layer range

---
 vpr/src/route/connection_router.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index da8405d058f..fd85738ad2d 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -509,6 +509,7 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
     int to_ylow = rr_graph_->node_ylow(to_node);
     int to_xhigh = rr_graph_->node_xhigh(to_node);
     int to_yhigh = rr_graph_->node_yhigh(to_node);
+    int to_layer = rr_graph_->node_layer(to_node);
 
     // BB-pruning
     // Disable BB-pruning if RCV is enabled, as this can make it harder for circuits with high negative hold slack to resolve this
@@ -516,7 +517,9 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
     if ((to_xhigh < bounding_box.xmin    // Strictly left of BB left-edge
          || to_xlow > bounding_box.xmax  // Strictly right of BB right-edge
          || to_yhigh < bounding_box.ymin // Strictly below BB bottom-edge
-         || to_ylow > bounding_box.ymax) // Strictly above BB top-edge
+         || to_ylow > bounding_box.ymax
+         || to_layer < bounding_box.layer_min
+         || to_layer > bounding_box.layer_max) // Strictly above BB top-edge
         && !rcv_path_manager.is_enabled()) {
         VTR_LOGV_DEBUG(router_debug_,
                        "      Pruned expansion of node %d edge %zu -> %d"
@@ -540,7 +543,9 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
             if (to_xlow < target_bb.xmin
                 || to_ylow < target_bb.ymin
                 || to_xhigh > target_bb.xmax
-                || to_yhigh > target_bb.ymax) {
+                || to_yhigh > target_bb.ymax
+                || to_layer < target_bb.layer_min
+                || to_layer > target_bb.layer_max) {
                 VTR_LOGV_DEBUG(router_debug_,
                                "      Pruned expansion of node %d edge %zu -> %d"
                                " (to node is IPIN at %d,%dx%d,%d which does not"

From cfbacd4f3f01a5b2068d89702b271a470df02158 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 12:40:25 -0400
Subject: [PATCH 011/257] print node layer in node_coordinate_to_string

---
 libs/librrgraph/src/base/rr_graph_view.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index 3d808b23c71..7ff4d5912b1 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -233,6 +233,7 @@ class RRGraphView {
      * This function is inlined for runtime optimization.
      */
     inline const std::string node_coordinate_to_string(RRNodeId node) const {
+        std::string layer_num;
         std::string start_x;                                           //start x-coordinate
         std::string start_y;                                           //start y-coordinate
         std::string end_x;                                             //end x-coordinate
@@ -251,11 +252,13 @@ class RRGraphView {
             coordinate_string += ")"; //add the side of the routing resource node
             // For OPINs and IPINs the starting and ending coordinate are identical, so we can just arbitrarily assign the start to larger values
             // and the end to the lower coordinate
-            start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
+            layer_num = " (" + std::to_string(node_layer(node)) + ",";
+            start_x =  std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
             start_y = std::to_string(node_yhigh(node)) + ")";
         } else if (node_type(node) == SOURCE || node_type(node) == SINK) {
             // For SOURCE and SINK the starting and ending coordinate are identical, so just use start
-            start_x = "(" + std::to_string(node_xhigh(node)) + ",";
+            layer_num = " (" + std::to_string(node_layer(node)) + ",";
+            start_x = std::to_string(node_xhigh(node)) + ",";
             start_y = std::to_string(node_yhigh(node)) + ")";
         } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information
             RRIndexedDataId cost_index = node_cost_index(node);
@@ -267,14 +270,16 @@ class RRGraphView {
             arrow = "->"; //we will point the coordinates from start to finish, left to right
 
             if (node_direction(node) == Direction::DEC) {                //signal travels along decreasing direction
-                start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
+                layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
+                start_x = std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
                 start_y = std::to_string(node_yhigh(node)) + ")";
                 end_x = "(" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
                 end_y = std::to_string(node_ylow(node)) + ")";
             }
 
             else {                                                      // signal travels in increasing direction, stays at same point, or can travel both directions
-                start_x = " (" + std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
+                layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
+                start_x = std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
                 start_y = std::to_string(node_ylow(node)) + ")";
                 end_x = "(" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
                 end_y = std::to_string(node_yhigh(node)) + ")";
@@ -284,9 +289,9 @@ class RRGraphView {
             }
         }
 
-        coordinate_string += start_x + start_y; //Write the starting coordinates
+        coordinate_string += layer_num + start_x + start_y; //Write the starting coordinates
         coordinate_string += arrow;             //Indicate the direction
-        coordinate_string += end_x + end_y;     //Write the end coordinates
+        coordinate_string += layer_num + end_x + end_y;     //Write the end coordinates
         return coordinate_string;
     }
 

From c78aad798db6b56583c345bcd5d69ae82609db28 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 13:05:34 -0400
Subject: [PATCH 012/257] add end layer_num

---
 libs/librrgraph/src/base/rr_graph_view.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index 7ff4d5912b1..e3972c574d2 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -233,7 +233,8 @@ class RRGraphView {
      * This function is inlined for runtime optimization.
      */
     inline const std::string node_coordinate_to_string(RRNodeId node) const {
-        std::string layer_num;
+        std::string start_layer_num;
+        std::string end_layer_num;
         std::string start_x;                                           //start x-coordinate
         std::string start_y;                                           //start y-coordinate
         std::string end_x;                                             //end x-coordinate
@@ -252,12 +253,12 @@ class RRGraphView {
             coordinate_string += ")"; //add the side of the routing resource node
             // For OPINs and IPINs the starting and ending coordinate are identical, so we can just arbitrarily assign the start to larger values
             // and the end to the lower coordinate
-            layer_num = " (" + std::to_string(node_layer(node)) + ",";
+            start_layer_num = " (" + std::to_string(node_layer(node)) + ",";
             start_x =  std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
             start_y = std::to_string(node_yhigh(node)) + ")";
         } else if (node_type(node) == SOURCE || node_type(node) == SINK) {
             // For SOURCE and SINK the starting and ending coordinate are identical, so just use start
-            layer_num = " (" + std::to_string(node_layer(node)) + ",";
+            start_layer_num = " (" + std::to_string(node_layer(node)) + ",";
             start_x = std::to_string(node_xhigh(node)) + ",";
             start_y = std::to_string(node_yhigh(node)) + ")";
         } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information
@@ -270,18 +271,20 @@ class RRGraphView {
             arrow = "->"; //we will point the coordinates from start to finish, left to right
 
             if (node_direction(node) == Direction::DEC) {                //signal travels along decreasing direction
-                layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
+                start_layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
                 start_x = std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
                 start_y = std::to_string(node_yhigh(node)) + ")";
-                end_x = "(" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
+                end_layer_num = " (" + std::to_string(node_layer(node)) + ",";
+                end_x = std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
                 end_y = std::to_string(node_ylow(node)) + ")";
             }
 
             else {                                                      // signal travels in increasing direction, stays at same point, or can travel both directions
-                layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
+                start_layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
                 start_x = std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
                 start_y = std::to_string(node_ylow(node)) + ")";
-                end_x = "(" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
+                end_layer_num = " (" + std::to_string(node_layer(node)) + ",";
+                end_x = std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
                 end_y = std::to_string(node_yhigh(node)) + ")";
                 if (node_direction(node) == Direction::BIDIR) {
                     arrow = "<->"; //indicate that signal can travel both direction
@@ -289,9 +292,9 @@ class RRGraphView {
             }
         }
 
-        coordinate_string += layer_num + start_x + start_y; //Write the starting coordinates
+        coordinate_string += start_layer_num + start_x + start_y; //Write the starting coordinates
         coordinate_string += arrow;             //Indicate the direction
-        coordinate_string += layer_num + end_x + end_y;     //Write the end coordinates
+        coordinate_string += end_layer_num + end_x + end_y;     //Write the end coordinates
         return coordinate_string;
     }
 

From eddd07c569d59b74f70b827b29b25c24a0c6f0d3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 18:06:53 -0400
Subject: [PATCH 013/257] add layer num to connection router debugger

---
 vpr/src/route/connection_router.cpp | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index fd85738ad2d..b15e11ecb4d 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -102,9 +102,9 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_common_setup(
         return nullptr;
     }
 
-    VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as normal net (BB: %d,%d x %d,%d)\n", sink_node,
-                   bounding_box.xmin, bounding_box.ymin,
-                   bounding_box.xmax, bounding_box.ymax);
+    VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as normal net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node,
+                   bounding_box.layer_min, bounding_box.xmin, bounding_box.ymin,
+                   bounding_box.layer_max, bounding_box.xmax, bounding_box.ymax);
 
     t_heap* cheapest = timing_driven_route_connection_from_heap(sink_node,
                                                                 cost_params,
@@ -192,9 +192,9 @@ std::pair<bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_connection_f
         return std::make_pair(false, t_heap());
     }
 
-    VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as high fanout net (BB: %d,%d x %d,%d)\n", sink_node,
-                   high_fanout_bb.xmin, high_fanout_bb.ymin,
-                   high_fanout_bb.xmax, high_fanout_bb.ymax);
+    VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as high fanout net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node,
+                   high_fanout_bb.layer_min, high_fanout_bb.xmin, high_fanout_bb.ymin,
+                   high_fanout_bb.layer_max, high_fanout_bb.xmax, high_fanout_bb.ymax);
 
     t_heap* cheapest = timing_driven_route_connection_from_heap(sink_node,
                                                                 cost_params,
@@ -523,11 +523,13 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
         && !rcv_path_manager.is_enabled()) {
         VTR_LOGV_DEBUG(router_debug_,
                        "      Pruned expansion of node %d edge %zu -> %d"
-                       " (to node location %d,%dx%d,%d outside of expanded"
-                       " net bounding box %d,%dx%d,%d)\n",
+                       " (to node location %d,%d,%d x %d,%d,%d outside of expanded"
+                       " net bounding box %d,%d,%d x %d,%d,%d)\n",
                        from_node, size_t(from_edge), to_node_int,
-                       to_xlow, to_ylow, to_xhigh, to_yhigh,
-                       bounding_box.xmin, bounding_box.ymin, bounding_box.xmax, bounding_box.ymax);
+                       to_layer, to_xlow, to_ylow,
+                       to_layer, to_xhigh, to_yhigh,
+                       bounding_box.layer_min, bounding_box.xmin, bounding_box.ymin,
+                       bounding_box.layer_max, bounding_box.xmax, bounding_box.ymax);
         return; /* Node is outside (expanded) bounding box. */
     }
 
@@ -548,10 +550,11 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
                 || to_layer > target_bb.layer_max) {
                 VTR_LOGV_DEBUG(router_debug_,
                                "      Pruned expansion of node %d edge %zu -> %d"
-                               " (to node is IPIN at %d,%dx%d,%d which does not"
-                               " lead to target block %d,%dx%d,%d)\n",
+                               " (to node is IPIN at %d,%d,%d x %d,%d,%d which does not"
+                               " lead to target block %d,%d,%d x %d,%d,%d)\n",
                                from_node, size_t(from_edge), to_node_int,
-                               to_xlow, to_ylow, to_xhigh, to_yhigh,
+                               to_layer, to_xlow, to_ylow,
+                               to_layer, to_xhigh, to_yhigh,
                                target_bb.xmin, target_bb.ymin, target_bb.xmax, target_bb.ymax);
                 return;
             }

From 00ad779fdb4f4edaab90ed0414cb035fd7c190d8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 4 Jul 2023 19:04:11 -0400
Subject: [PATCH 014/257] change delay function parameters to get
 t_physiacl_tile_loc instead of getting location components separately - Add
 an offset to delay if layer is crossed

---
 vpr/src/place/place_delay_model.cpp | 43 +++++++++++++++++------------
 vpr/src/place/place_delay_model.h   |  8 ++++--
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index 44d8c4a0b49..0d79270582a 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -27,11 +27,20 @@
 #endif /* VTR_ENABLE_CAPNPROTO */
 
 ///@brief DeltaDelayModel methods.
-float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const {
-    int delta_x = std::abs(from_x - to_x);
-    int delta_y = std::abs(from_y - to_y);
-
-    return delays_[layer_num][delta_x][delta_y];
+float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
+    int delta_x = std::abs(from_loc.x - to_loc.x);
+    int delta_y = std::abs(from_loc.y - to_loc.y);
+
+
+    // TODO: This is compatible with the case that only OPINs are connected to other layers.
+    // Ideally, I should check whether OPINs are conneced or IPINs and use the correct layer.
+    // If both are connected, minimum should be taken. In the case that channels are also connected,
+    // I haven't thought about what to do.
+    float cross_layer_td = 0;
+    if (from_loc.layer_num != to_loc.layer_num) {
+        cross_layer_td = cross_layer_delay_;
+    }
+    return delays_[to_loc.layer_num][delta_x][delta_y] + cross_layer_td;
 }
 
 void DeltaDelayModel::dump_echo(std::string filepath) const {
@@ -60,13 +69,13 @@ const DeltaDelayModel* OverrideDelayModel::base_delay_model() const {
 }
 
 ///@brief OverrideDelayModel methods.
-float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const {
+float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const {
     //First check to if there is an override delay value
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
-    t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type({from_x, from_y, layer_num});
-    t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type({to_x, to_y, layer_num});
+    t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc);
+    t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc);
 
     t_override override_key;
     override_key.from_type = from_type_ptr->index;
@@ -76,8 +85,8 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x,
 
     //Delay overrides may be different for +/- delta so do not use
     //an absolute delta for the look-up
-    override_key.delta_x = to_x - from_x;
-    override_key.delta_y = to_y - from_y;
+    override_key.delta_x = to_loc.x - from_loc.x;
+    override_key.delta_y = to_loc.y - from_loc.y;
 
     float delay_val = std::numeric_limits<float>::quiet_NaN();
     auto override_iter = delay_overrides_.find(override_key);
@@ -86,7 +95,7 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x,
         delay_val = override_iter->second;
     } else {
         //Fall back to the base delay model if no override was found
-        delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin, layer_num);
+        delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin);
     }
 
     return delay_val;
@@ -346,9 +355,10 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste
 
         int source_x = place_ctx.block_locs[source_block].loc.x;
         int source_y = place_ctx.block_locs[source_block].loc.y;
+        int source_layer = place_ctx.block_locs[source_block].loc.layer;
         int sink_x = place_ctx.block_locs[sink_block].loc.x;
         int sink_y = place_ctx.block_locs[sink_block].loc.y;
-        int sink_layer_num = place_ctx.block_locs[sink_block].loc.layer;
+        int sink_layer = place_ctx.block_locs[sink_block].loc.layer;
 
         /**
          * This heuristic only considers delta_x and delta_y, a much better
@@ -357,13 +367,10 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste
          * In particular this approach does not accurately capture the effect
          * of fast carry-chain connections.
          */
-        delay_source_to_sink = delay_model->delay(source_x,
-                                                  source_y,
+        delay_source_to_sink = delay_model->delay({source_x, source_y, source_layer},
                                                   source_block_ipin,
-                                                  sink_x,
-                                                  sink_y,
-                                                  sink_block_ipin,
-                                                  sink_layer_num);
+                                                  {sink_x, sink_y, sink_layer},
+                                                  sink_block_ipin);
         if (delay_source_to_sink < 0) {
             VPR_ERROR(VPR_ERROR_PLACE,
                       "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n"
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index 09b6969c011..ad0d2baf031 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -62,7 +62,7 @@ class PlaceDelayModel {
      *
      * Either compute or read methods must be invoked before invoking delay.
      */
-    virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const = 0;
+    virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0;
 
     ///@brief Dumps the delay model to an echo file.
     virtual void dump_echo(std::string filename) const = 0;
@@ -96,7 +96,7 @@ class DeltaDelayModel : public PlaceDelayModel {
         const t_placer_opts& placer_opts,
         const t_router_opts& router_opts,
         int longest_length) override;
-    float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const override;
+    float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
     void dump_echo(std::string filepath) const override;
 
     void read(const std::string& file) override;
@@ -107,6 +107,7 @@ class DeltaDelayModel : public PlaceDelayModel {
 
   private:
     vtr::NdMatrix<float, 3> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
+    float cross_layer_delay_;
     bool is_flat_;
 };
 
@@ -121,7 +122,7 @@ class OverrideDelayModel : public PlaceDelayModel {
         int longest_length) override;
     // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the
     // specified from and to pins
-    float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const override;
+    float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override;
     void dump_echo(std::string filepath) const override;
 
     void read(const std::string& file) override;
@@ -135,6 +136,7 @@ class OverrideDelayModel : public PlaceDelayModel {
 
   private:
     std::unique_ptr<DeltaDelayModel> base_delay_model_;
+    float cross_layer_delay_;
     bool is_flat_;
 
     void compute_override_delay_model(RouterDelayProfiler& router,

From c53d7b7c7e0958c51870173d92a7abe0911b8824 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jul 2023 11:17:58 -0400
Subject: [PATCH 015/257] write a function to return the minimum delay of
 switches that cross the layers

---
 vpr/src/place/place_delay_model.cpp           |  1 -
 vpr/src/route/router_delay_profiling.cpp      |  1 -
 .../route/router_lookahead_extended_map.cpp   |  6 +-
 vpr/src/route/router_lookahead_map.cpp        | 64 ++++++++-----------
 vpr/src/route/router_lookahead_map_utils.cpp  | 22 +++----
 vpr/src/route/rr_graph.cpp                    | 11 ++--
 vpr/src/route/rr_graph.h                      |  2 -
 vpr/src/route/rr_graph2.cpp                   |  2 +-
 vpr/src/util/vpr_utils.cpp                    | 18 ++++++
 vpr/src/util/vpr_utils.h                      |  4 ++
 10 files changed, 67 insertions(+), 64 deletions(-)

diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index 0d79270582a..96fdef81175 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -31,7 +31,6 @@ float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin
     int delta_x = std::abs(from_loc.x - to_loc.x);
     int delta_y = std::abs(from_loc.y - to_loc.y);
 
-
     // TODO: This is compatible with the case that only OPINs are connected to other layers.
     // Ideally, I should check whether OPINs are conneced or IPINs and use the correct layer.
     // If both are connected, minimum should be taken. In the case that channels are also connected,
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index ab8ea943f93..ca34f4e4ccb 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -66,7 +66,6 @@ bool RouterDelayProfiler::calculate_delay(int source_node,
         bounding_box.layer_max = layer_num;
     }
 
-
     t_conn_cost_params cost_params;
     cost_params.criticality = 1.;
     cost_params.astar_fac = router_opts.router_profiler_astar_fac;
diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp
index 90ec255a7ca..e49dc2cca01 100644
--- a/vpr/src/route/router_lookahead_extended_map.cpp
+++ b/vpr/src/route/router_lookahead_extended_map.cpp
@@ -419,8 +419,7 @@ std::pair<float, int> ExtendedMapLookahead::run_dijkstra(RRNodeId start_node,
 
 // compute the cost maps for lookahead
 void ExtendedMapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
-        util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 
@@ -616,8 +615,7 @@ void ExtendedMapLookahead::write(const std::string& file) const {
 void ExtendedMapLookahead::read(const std::string& file) {
     cost_map_.read(file);
 
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
-        util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 }
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index d2191669212..05dbb9efefe 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -275,8 +275,8 @@ static void print_router_cost_map(const t_routing_cost_map& router_cost_map);
 
 /******** Interface class member function definitions ********/
 MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat)
-    : det_routing_arch_(det_routing_arch) , is_flat_(is_flat) {
-
+    : det_routing_arch_(det_routing_arch)
+    , is_flat_(is_flat) {
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
     if (num_layers > 1) {
         const auto& sw_inf = g_vpr_ctx.device().all_sw_inf;
@@ -440,8 +440,8 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         //delay to reach the sink.
 
         t_physical_tile_type_ptr from_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
-                                                                                rr_graph.node_ylow(from_node),
-                                                                                from_layer_num});
+                                                                                     rr_graph.node_ylow(from_node),
+                                                                                     from_layer_num});
         t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
                                                                                    rr_graph.node_ylow(to_node),
                                                                                    to_layer_num});
@@ -452,9 +452,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         auto from_ptc = rr_graph.node_ptc_num(from_node);
         auto to_ptc = rr_graph.node_ptc_num(to_node);
 
-        if (from_layer_num == to_layer_num ||
-            inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) !=
-                inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
+        if (from_layer_num == to_layer_num || inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) != inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
             std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc],
                                                                                        from_layer_num,
                                                                                        delta_x,
@@ -462,16 +460,15 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         }
 
         if (from_layer_num != to_layer_num) {
-                float tmp_expected_delay_cost, tmp_expected_cong_cost;
-                std::tie(tmp_expected_delay_cost, tmp_expected_cong_cost) =
-                    get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
-                                           to_layer_num,
-                                           delta_x,
-                                           delta_y);
-                if (tmp_expected_delay_cost < expected_delay_cost) {
-                    expected_delay_cost = tmp_expected_delay_cost;
-                    expected_cong_cost = tmp_expected_cong_cost;
-                }
+            float tmp_expected_delay_cost, tmp_expected_cong_cost;
+            std::tie(tmp_expected_delay_cost, tmp_expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
+                                                                                               to_layer_num,
+                                                                                               delta_x,
+                                                                                               delta_y);
+            if (tmp_expected_delay_cost < expected_delay_cost) {
+                expected_delay_cost = tmp_expected_delay_cost;
+                expected_cong_cost = tmp_expected_cong_cost;
+            }
         }
 
         expected_delay_cost *= params.criticality;
@@ -498,17 +495,16 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         bool get_cost_entry = true;
         if (from_layer_num != to_layer_num) {
-                t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
-                                                                                           rr_graph.node_ylow(to_node),
-                                                                                           to_layer_num});
-                auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
-                auto to_ptc = rr_graph.node_ptc_num(to_node);
-                if(inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) ==
-                    inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
-                    get_cost_entry = false;
-                    expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
-                    expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
-                }
+            t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
+                                                                                       rr_graph.node_ylow(to_node),
+                                                                                       to_layer_num});
+            auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
+            auto to_ptc = rr_graph.node_ptc_num(to_node);
+            if (inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) == inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
+                get_cost_entry = false;
+                expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
+                expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
+            }
         }
 
         if (get_cost_entry) {
@@ -520,13 +516,12 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                                                         delta_y);
             expected_delay_cost = cost_entry.delay;
             expected_cong_cost = cost_entry.congestion;
-            if(from_layer_num != to_layer_num) {
-                    expected_delay_cost += inter_layer_connection_box_sw_delay;
+            if (from_layer_num != to_layer_num) {
+                expected_delay_cost += inter_layer_connection_box_sw_delay;
             }
 
             expected_delay_cost *= params.criticality;
             expected_cong_cost *= (1 - params.criticality);
-
         }
 
         VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
@@ -557,12 +552,10 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
-        util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
     // Store the sinks that have connections to other layers
     this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
-
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -585,8 +578,7 @@ void MapLookahead::read(const std::string& file) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) =
-        util::compute_router_src_opin_lookahead(is_flat_);
+    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
     this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
 }
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index f7186ef70d6..b3eb93dbd77 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -19,10 +19,7 @@
 #include "route_common.h"
 #include "route_timing.h"
 
-static void dijkstra_flood_to_wires(int itile, RRNodeId inode,
-                                    util::t_src_opin_delays& src_opin_delays,
-                                    util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays,
-                                    bool is_multi_layer);
+static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays, util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, bool is_multi_layer);
 
 static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays);
 
@@ -324,7 +321,7 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     }
 
     t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
-    if(is_multi_layer) {
+    if (is_multi_layer) {
         src_opin_inter_layer_delays.resize(num_layers);
         for (int layer_num = 0; layer_num < num_layers; layer_num++) {
             int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
@@ -332,7 +329,6 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
         }
     }
 
-
     //We assume that the routing connectivity of each instance of a physical tile is the same,
     //and so only measure one instance of each type
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
@@ -372,7 +368,7 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
 
                         if (ptc >= int(src_opin_delays[layer_num][itile].size())) {
                             src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional...
-                            if(is_multi_layer) {
+                            if (is_multi_layer) {
                                 size_t old_size = src_opin_inter_layer_delays[layer_num][itile].size();
                                 src_opin_inter_layer_delays[layer_num][itile].resize(ptc + 1);
                                 for (size_t i = old_size; i < src_opin_inter_layer_delays[layer_num][itile].size(); ++i) {
@@ -419,7 +415,7 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
 
     int num_layers = device_ctx.grid.get_num_layers();
     bool is_multi_layer = (num_layers > 1);
-    if(!is_multi_layer) {
+    if (!is_multi_layer) {
         return t_sink_inter_layer_connection();
     }
     // AM: Currently, for 3D stuff, I am only focusing on the case that flat-router is not enabled. If flat_router is on, I am not sure whether it works.
@@ -448,7 +444,7 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
                 if (get_class_type_from_class_physical_num(&physical_tile, class_num) == e_pin_type::RECEIVER) {
                     for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
                         if (from_layer_num == to_layer_num) {
-                            continue ;
+                            continue;
                         } else {
                             for (int pin_num : get_pin_list_from_class_physical_num(&physical_tile, class_num)) {
                                 if (is_pin_conencted_to_layer(&physical_tile, pin_num, from_layer_num, to_layer_num, num_layers)) {
@@ -547,7 +543,8 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g
 } // namespace util
 
 static void dijkstra_flood_to_wires(int itile,
-                                    RRNodeId node, util::t_src_opin_delays& src_opin_delays,
+                                    RRNodeId node,
+                                    util::t_src_opin_delays& src_opin_delays,
                                     util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays,
                                     bool is_multi_layer) {
     auto& device_ctx = g_vpr_ctx.device();
@@ -620,14 +617,13 @@ static void dijkstra_flood_to_wires(int itile,
 
             //Keep costs of the best path to reach each wire type
             if ((!src_opin_delays[node_layer_num][itile][ptc].count(seg_index)
-                || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay)
+                 || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay)
                 && curr_layer_num == node_layer_num) {
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay;
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
-            } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index)
-                        || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
+            } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index) || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
                        && curr_layer_num != node_layer_num) {
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;
diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp
index 193f0ef459c..260060b08b0 100644
--- a/vpr/src/route/rr_graph.cpp
+++ b/vpr/src/route/rr_graph.cpp
@@ -880,18 +880,17 @@ std::set<int> get_layers_pin_is_connected_to(const t_physical_tile_type_ptr type
     return layer_pin_index_is_connected_to;
 }
 
-std::set<int> get_layers_connected_to_pin(const t_physical_tile_type_ptr type, int to_layer, int pin_index){
+std::set<int> get_layers_connected_to_pin(const t_physical_tile_type_ptr type, int to_layer, int pin_index) {
     const auto& device_ctx = g_vpr_ctx.device();
     std::set<int> layers_connected_to_pin;
-    for(int layer = 0; layer < device_ctx.grid.get_num_layers(); layer++){
-        if(is_pin_conencted_to_layer(type, pin_index, layer, to_layer, device_ctx.grid.get_num_layers())){
+    for (int layer = 0; layer < device_ctx.grid.get_num_layers(); layer++) {
+        if (is_pin_conencted_to_layer(type, pin_index, layer, to_layer, device_ctx.grid.get_num_layers())) {
             layers_connected_to_pin.insert(layer);
         }
     }
     return layers_connected_to_pin;
 }
 
-
 bool channel_widths_unchanged(const t_chan_width& current, const t_chan_width& proposed) {
     if (current.max != proposed.max
         || current.x_max != proposed.x_max
@@ -3276,9 +3275,9 @@ static vtr::NdMatrix<int, 6> alloc_and_load_pin_to_seg_type(const e_pin_type pin
         for (auto type_layer_index : type_layer) {
             for (int width = 0; width < Type->width; ++width) {
                 for (int height = 0; height < Type->height; ++height) {
-                    for (e_side side: SIDES) {
+                    for (e_side side : SIDES) {
                         if (Type->pinloc[width][height][side][pin] == 1) {
-                            for (auto i = 0; i < (int) get_layers_connected_to_pin(Type, type_layer_index, pin).size(); i++) {
+                            for (auto i = 0; i < (int)get_layers_connected_to_pin(Type, type_layer_index, pin).size(); i++) {
                                 dir_list[width][height][type_layer_index][side][num_dir[width][height][type_layer_index][side]] = pin;
                                 num_dir[width][height][type_layer_index][side]++;
                             }
diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h
index 26873e715de..af06257d98e 100644
--- a/vpr/src/route/rr_graph.h
+++ b/vpr/src/route/rr_graph.h
@@ -64,6 +64,4 @@ bool pins_connected(t_block_loc cluster_loc,
                     int from_pin_logical_num,
                     int to_pin_logical_num);
 
-
-
 #endif
diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp
index 686aa6d50ed..99444a9ed50 100644
--- a/vpr/src/route/rr_graph2.cpp
+++ b/vpr/src/route/rr_graph2.cpp
@@ -1645,7 +1645,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
                     for (iconn = 0; iconn < max_conn; iconn++) {
                         ipin = track_to_pin_lookup[type->index][phy_track][width_offset][height_offset][layer][side][iconn];
 
-                        if(!is_pin_conencted_to_layer(type,ipin,layer_index,layer,device_ctx.grid.get_num_layers())){
+                        if (!is_pin_conencted_to_layer(type, ipin, layer_index, layer, device_ctx.grid.get_num_layers())) {
                             continue;
                         }
 
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index e3815e4e006..1263146fa77 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -2508,3 +2508,21 @@ void add_pb_child_to_list(std::list<const t_pb*>& pb_list, const t_pb* parent_pb
         }
     }
 }
+
+float get_min_cross_layer_delay(std::vector<t_arch_switch_inf> arch_switch_inf,
+                                const std::vector<t_segment_inf>& segment_inf,
+                                const int wire_to_ipin_arch_sw_id) {
+    float min_delay = std::numeric_limits<float>::max();
+
+    if (wire_to_ipin_arch_sw_id >= 0) {
+        min_delay = std::min(min_delay, arch_switch_inf[wire_to_ipin_arch_sw_id].Tdel());
+    }
+    for (const auto& seg_inf : segment_inf) {
+        int cross_layer_sw_arch_id = seg_inf.arch_opin_between_dice_switch;
+        if (cross_layer_sw_arch_id >= OPEN) {
+            min_delay = std::min(min_delay, arch_switch_inf[cross_layer_sw_arch_id].Tdel());
+        }
+    }
+
+    return min_delay;
+}
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index f3a8f8917e7..d04fea1630e 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -311,4 +311,8 @@ t_arch_switch_inf create_internal_arch_sw(float delay);
 
 void add_pb_child_to_list(std::list<const t_pb*>& pb_list, const t_pb* parent_pb);
 
+float get_min_cross_layer_delay(std::vector<t_arch_switch_inf> arch_switch_inf,
+                                const std::vector<t_segment_inf>& segment_inf,
+                                const int wire_to_ipin_arch_sw_id);
+
 #endif

From 505d0f68b2af52ef82b5868db5584b03b55a35f6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jul 2023 15:10:57 -0400
Subject: [PATCH 016/257] add min_cross_layer_cost to place delay model
 constructor

---
 vpr/src/place/place.cpp                    |  1 +
 vpr/src/place/place_delay_model.cpp        | 16 +++++++++++++---
 vpr/src/place/place_delay_model.h          | 18 +++++++++++++-----
 vpr/src/place/timing_place_lookup.cpp      | 10 +++++++---
 vpr/src/place/timing_place_lookup.h        |  1 +
 vpr/src/util/vpr_utils.cpp                 |  2 +-
 vpr/src/util/vpr_utils.h                   |  2 +-
 vpr/test/test_place_delay_model_serdes.cpp | 19 ++++++++++++++-----
 8 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b6696bdb8ac..d6f425ca73f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -506,6 +506,7 @@ void try_place(const Netlist<>& net_list,
     if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
         place_delay_model = alloc_lookups_and_delay_model(net_list,
+                                                          device_ctx.arch_switch_inf,
                                                           chan_width_dist,
                                                           placer_opts,
                                                           router_opts,
diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index 96fdef81175..bfdd15707f4 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -37,6 +37,7 @@ float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin
     // I haven't thought about what to do.
     float cross_layer_td = 0;
     if (from_loc.layer_num != to_loc.layer_num) {
+        VTR_ASSERT(std::isfinite(cross_layer_delay_));
         cross_layer_td = cross_layer_delay_;
     }
     return delays_[to_loc.layer_num][delta_x][delta_y] + cross_layer_td;
@@ -266,7 +267,7 @@ void OverrideDelayModel::read(const std::string& file) {
     auto model = reader.getRoot<VprOverrideDelayModel>();
     ToNdMatrix<3, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat);
 
-    base_delay_model_ = std::make_unique<DeltaDelayModel>(delays, is_flat_);
+    base_delay_model_ = std::make_unique<DeltaDelayModel>(cross_layer_delay_, delays, is_flat_);
 
     // Reading non-scalar capnproto fields is roughly equivilant to using
     // a std::vector of the field type.  Actual type is capnp::List<X>::Reader.
@@ -318,6 +319,7 @@ void OverrideDelayModel::write(const std::string& file) const {
 
 ///@brief Initialize the placer delay model.
 std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+                                                               const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& placer_opts,
                                                                const t_router_opts& router_opts,
@@ -326,8 +328,16 @@ std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>&
                                                                const t_direct_inf* directs,
                                                                const int num_directs,
                                                                bool is_flat) {
-    return compute_place_delay_model(placer_opts, router_opts, net_list, det_routing_arch, segment_inf,
-                                     chan_width_dist, directs, num_directs, is_flat);
+    return compute_place_delay_model(placer_opts,
+                                     router_opts,
+                                     net_list,
+                                     arch_switch_inf,
+                                     det_routing_arch,
+                                     segment_inf,
+                                     chan_width_dist,
+                                     directs,
+                                     num_directs,
+                                     is_flat);
 }
 
 /**
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index ad0d2baf031..2e2574904bc 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -29,6 +29,7 @@ class PlaceDelayModel;
 
 ///@brief Initialize the placer delay model.
 std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
+                                                               const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                                                t_chan_width_dist chan_width_dist,
                                                                const t_placer_opts& place_opts,
                                                                const t_router_opts& router_opts,
@@ -85,10 +86,15 @@ class PlaceDelayModel {
 ///@brief A simple delay model based on the distance (delta) between block locations.
 class DeltaDelayModel : public PlaceDelayModel {
   public:
-    DeltaDelayModel(bool is_flat)
-        : is_flat_(is_flat) {}
-    DeltaDelayModel(vtr::NdMatrix<float, 3> delta_delays, bool is_flat)
+    DeltaDelayModel(float min_cross_layer_delay,
+                    bool is_flat)
+        : cross_layer_delay_(min_cross_layer_delay)
+        , is_flat_(is_flat) {}
+    DeltaDelayModel(float min_cross_layer_delay,
+                    vtr::NdMatrix<float, 3> delta_delays,
+                    bool is_flat)
         : delays_(std::move(delta_delays))
+        , cross_layer_delay_(min_cross_layer_delay)
         , is_flat_(is_flat) {}
 
     void compute(
@@ -113,8 +119,10 @@ class DeltaDelayModel : public PlaceDelayModel {
 
 class OverrideDelayModel : public PlaceDelayModel {
   public:
-    OverrideDelayModel(bool is_flat)
-        : is_flat_(is_flat) {}
+    OverrideDelayModel(float min_cross_layer_delay,
+                       bool is_flat)
+        : cross_layer_delay_(min_cross_layer_delay)
+        , is_flat_(is_flat) {}
     void compute(
         RouterDelayProfiler& route_profiler,
         const t_placer_opts& placer_opts,
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index ad75484ffe0..d3508789539 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -166,6 +166,7 @@ static float find_neightboring_average(vtr::NdMatrix<float, 3>& matrix, t_physic
 std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
+                                                           const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                                            t_det_routing_arch* det_routing_arch,
                                                            std::vector<t_segment_inf>& segment_inf,
                                                            t_chan_width_dist chan_width_dist,
@@ -193,10 +194,13 @@ std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&
 
     /*now setup and compute the actual arrays */
     std::unique_ptr<PlaceDelayModel> place_delay_model;
+    float min_cross_layer_delay = get_min_cross_layer_delay(arch_switch_inf,
+                                                            segment_inf,
+                                                            det_routing_arch->wire_to_arch_ipin_switch_between_dice);
     if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
-        place_delay_model = std::make_unique<DeltaDelayModel>(is_flat);
+        place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
     } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
-        place_delay_model = std::make_unique<OverrideDelayModel>(is_flat);
+        place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
     } else {
         VTR_ASSERT_MSG(false, "Invalid placer delay model");
     }
@@ -240,7 +244,7 @@ void OverrideDelayModel::compute(
         longest_length,
         is_flat_);
 
-    base_delay_model_ = std::make_unique<DeltaDelayModel>(delays, false);
+    base_delay_model_ = std::make_unique<DeltaDelayModel>(cross_layer_delay_, delays, false);
 
     compute_override_delay_model(route_profiler, router_opts);
 }
diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h
index ac3e7f4df1e..e1a93afbf11 100644
--- a/vpr/src/place/timing_place_lookup.h
+++ b/vpr/src/place/timing_place_lookup.h
@@ -5,6 +5,7 @@
 std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
                                                            const t_router_opts& router_opts,
                                                            const Netlist<>& net_list,
+                                                           const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                                            t_det_routing_arch* det_routing_arch,
                                                            std::vector<t_segment_inf>& segment_inf,
                                                            t_chan_width_dist chan_width_dist,
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 1263146fa77..40ea8d57668 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -2509,7 +2509,7 @@ void add_pb_child_to_list(std::list<const t_pb*>& pb_list, const t_pb* parent_pb
     }
 }
 
-float get_min_cross_layer_delay(std::vector<t_arch_switch_inf> arch_switch_inf,
+float get_min_cross_layer_delay(const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                 const std::vector<t_segment_inf>& segment_inf,
                                 const int wire_to_ipin_arch_sw_id) {
     float min_delay = std::numeric_limits<float>::max();
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index d04fea1630e..16425e4983c 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -311,7 +311,7 @@ t_arch_switch_inf create_internal_arch_sw(float delay);
 
 void add_pb_child_to_list(std::list<const t_pb*>& pb_list, const t_pb* parent_pb);
 
-float get_min_cross_layer_delay(std::vector<t_arch_switch_inf> arch_switch_inf,
+float get_min_cross_layer_delay(const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                 const std::vector<t_segment_inf>& segment_inf,
                                 const int wire_to_ipin_arch_sw_id);
 
diff --git a/vpr/test/test_place_delay_model_serdes.cpp b/vpr/test/test_place_delay_model_serdes.cpp
index 818b5cc3dfe..988b3e255b4 100644
--- a/vpr/test/test_place_delay_model_serdes.cpp
+++ b/vpr/test/test_place_delay_model_serdes.cpp
@@ -23,12 +23,17 @@ TEST_CASE("round_trip_delta_delay_model", "[vpr]") {
         }
     }
 
-    DeltaDelayModel model(std::move(delays), false);
+    float min_cross_layer_delay = 0.;
+
+    DeltaDelayModel model(min_cross_layer_delay,
+                          std::move(delays),
+                          false);
     const auto& delays1 = model.delays();
 
     model.write(kDeltaDelayBin);
 
-    DeltaDelayModel model2(false);
+    DeltaDelayModel model2(min_cross_layer_delay,
+                           false);
     model2.read(kDeltaDelayBin);
 
     const auto& delays2 = model2.delays();
@@ -61,15 +66,19 @@ TEST_CASE("round_trip_override_delay_model", "[vpr]") {
             }
         }
     }
-    OverrideDelayModel model(false);
-    auto base_model = std::make_unique<DeltaDelayModel>(delays, false);
+    float min_cross_layer_delay = 0.;
+    OverrideDelayModel model(min_cross_layer_delay, false);
+    auto base_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay,
+                                                        delays,
+                                                        false);
     model.set_base_delay_model(std::move(base_model));
     model.set_delay_override(1, 2, 3, 4, 5, 6, -1);
     model.set_delay_override(2, 2, 3, 4, 5, 6, -2);
 
     model.write(kOverrideDelayBin);
 
-    OverrideDelayModel model2(false);
+    OverrideDelayModel model2(min_cross_layer_delay,
+                              false);
     model2.read(kOverrideDelayBin);
 
     const auto& delays1 = model.base_delay_model()->delays();

From 3a5e2c661ce03b9c7a7e8d1916137ca7d9cf28ae Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jul 2023 19:17:14 -0400
Subject: [PATCH 017/257] add uniform_inter_layer_move_generator

---
 .../uniform_inter_layer_move_generator.cpp    | 56 +++++++++++++++++++
 .../uniform_inter_layer_move_generator.h      | 18 ++++++
 2 files changed, 74 insertions(+)
 create mode 100644 vpr/src/place/uniform_inter_layer_move_generator.cpp
 create mode 100644 vpr/src/place/uniform_inter_layer_move_generator.h

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
new file mode 100644
index 00000000000..099967bffe5
--- /dev/null
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -0,0 +1,56 @@
+#include "uniform_inter_layer_move_generator.h"
+#include "globals.h"
+#include "place_constraints.h"
+#include "move_utils.h"
+
+e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+    //Find a movable block based on blk_type
+    ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL);
+
+    if (!b_from) { //No movable block found
+        return e_create_move::ABORT;
+    }
+
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    t_pl_loc from = place_ctx.block_locs[b_from].loc;
+    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
+    VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
+
+    const auto& block_compressed_grid = g_vpr_ctx.placement().compressed_block_grids[cluster_from_type->index];
+
+    const auto& compatible_layers = block_compressed_grid.get_layer_nums();
+
+    if(compatible_layers.size() < 2) {
+        return e_create_move::ABORT;
+    }
+
+    std::vector<int> candidate_layers;
+    candidate_layers.reserve(compatible_layers.size() - 1);
+
+    for(const auto& layer_num : candidate_layers) {
+        if(layer_num != from.layer) {
+            candidate_layers.push_back(layer_num);
+        }
+    }
+
+    int to_layer = candidate_layers[vtr::irand((int)candidate_layers.size() - 1)];
+
+    t_pl_loc to;
+    to.layer = to_layer;
+
+    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
+        return e_create_move::ABORT;
+    }
+
+    e_create_move create_move = ::create_move(blocks_affected, b_from, to);
+
+    //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
+    if (!floorplan_legal(blocks_affected)) {
+        return e_create_move::ABORT;
+    }
+
+    return create_move;
+}
\ No newline at end of file
diff --git a/vpr/src/place/uniform_inter_layer_move_generator.h b/vpr/src/place/uniform_inter_layer_move_generator.h
new file mode 100644
index 00000000000..36a71d4e18c
--- /dev/null
+++ b/vpr/src/place/uniform_inter_layer_move_generator.h
@@ -0,0 +1,18 @@
+#ifndef VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H
+#define VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H
+
+#include "move_generator.h"
+#include "timing_place.h"
+
+/**
+ * @brief Uniform inter-layer move generator
+ *
+ * randomly picks a from_block with equal probabilities for all blocks, and then moves it randomly within
+ * a range limit centered on from_block in the compressed block grid space
+ */
+
+class UniformInterLayerMoveGenerator : public MoveGenerator {
+    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/);
+};
+
+#endif //VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H

From 6819b9a930f0e4776ffdefdc24588374db794300 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jul 2023 19:18:32 -0400
Subject: [PATCH 018/257] change find_loc_centroid to take the layer from the
 to location instead of from layer

---
 vpr/src/place/centroid_move_generator.cpp     |   1 +
 .../place/feasible_region_move_generator.cpp  |   1 +
 vpr/src/place/initial_placement.cpp           |   9 +-
 vpr/src/place/median_move_generator.cpp       |   1 +
 vpr/src/place/move_utils.cpp                  | 162 ++++++++----------
 vpr/src/place/move_utils.h                    |  16 +-
 .../weighted_centroid_move_generator.cpp      |   1 +
 .../place/weighted_median_move_generator.cpp  |   1 +
 8 files changed, 90 insertions(+), 102 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 22e2a4ed6a9..e9225bab091 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -29,6 +29,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     range_limiters.first_rlim = place_move_ctx.first_rlim;
 
     t_pl_loc to, centroid;
+    to.layer = from.layer;
 
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, NULL);
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 45c3f09093f..f3bd73dc7f1 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -118,6 +118,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         center.y = (FR_coords.ymin + FR_coords.ymax) / 2;
         // TODO: Currently, we don't move blocks between different types of layers
         center.layer = from.layer;
+        to.layer = from.layer;
         if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from))
             return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index 50c667d82d4..a017da13ab0 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -307,11 +307,10 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_
     int first_rlim = 15;
 
     auto search_range = get_compressed_grid_target_search_range(compressed_block_grid,
-                                                                compressed_centroid_loc,
-                                                                first_rlim,
-                                                                num_layers);
+                                                                compressed_centroid_loc[centroid_loc_layer_num],
+                                                                first_rlim);
 
-    int delta_cx = search_range[centroid_loc_layer_num].xmax - search_range[centroid_loc_layer_num].xmin;
+    int delta_cx = search_range.xmax - search_range.xmin;
 
     //Block has not been placed yet, so the "from" coords will be (-1, -1)
     int cx_from = OPEN;
@@ -323,7 +322,7 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_
     bool legal = find_compatible_compressed_loc_in_range(block_type,
                                                          delta_cx,
                                                          {cx_from, cy_from, layer_from},
-                                                         search_range[centroid_loc_layer_num],
+                                                         search_range,
                                                          to_compressed_loc,
                                                          false,
                                                          centroid_loc_layer_num);
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 9dae21bca5b..b5e209013ed 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -120,6 +120,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
+    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from))
         return e_create_move::ABORT;
 
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 5d4b945b87a..f83538c6462 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -712,6 +712,8 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
     const int from_layer_num = from.layer;
+    const int to_layer_num = to.layer;
+    VTR_ASSERT(to.layer != OPEN);
 
     //Determine the coordinates in the compressed grid space of the current block
     std::vector<t_physical_tile_loc> compressed_locs = get_compressed_loc(compressed_block_grid,
@@ -719,11 +721,10 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
                                                                           num_layers);
 
     //Determine the valid compressed grid location ranges
-    std::vector<t_bb> search_range = get_compressed_grid_target_search_range(compressed_block_grid,
-                                                                             compressed_locs,
-                                                                             rlim,
-                                                                             num_layers);
-    int delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin;
+    t_bb search_range = get_compressed_grid_target_search_range(compressed_block_grid,
+                                                                compressed_locs[from_layer_num],
+                                                                rlim);
+    int delta_cx = search_range.xmax - search_range.xmin;
 
     t_physical_tile_loc to_compressed_loc;
     bool legal = false;
@@ -732,7 +733,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     if (is_cluster_constrained(b_from)) {
         bool intersect = intersect_range_limit_with_floorplan_constraints(type,
                                                                           b_from,
-                                                                          search_range[from_layer_num],
+                                                                          search_range,
                                                                           delta_cx,
                                                                           from_layer_num);
         if (!intersect) {
@@ -743,7 +744,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     legal = find_compatible_compressed_loc_in_range(type,
                                                     delta_cx,
                                                     compressed_locs[from_layer_num],
-                                                    search_range[from_layer_num],
+                                                    search_range,
                                                     to_compressed_loc,
                                                     false,
                                                     from_layer_num);
@@ -868,7 +869,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
                           ClusterBlockId b_from) {
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
-    const int from_layer_num = from_loc.layer;
+    const int to_layer_num = to_loc.layer;
+    VTR_ASSERT(to_layer_num >= 0);
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     std::vector<t_physical_tile_loc> from_compressed_loc = get_compressed_loc(compressed_block_grid,
@@ -882,23 +884,21 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
 
     //Determine the valid compressed grid location ranges
     int delta_cx;
-    std::vector<t_bb> search_range;
+    t_bb search_range;
 
     // If we are early in the anneal and the range limit still big enough --> search around the center location that the move proposed
     // If not --> search around the current location of the block but in the direction of the center location that the move proposed
     if (range_limiters.original_rlim > 0.15 * range_limiters.first_rlim) {
         search_range = get_compressed_grid_target_search_range(compressed_block_grid,
-                                                               centroid_compressed_loc,
-                                                               std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim),
-                                                               num_layers);
+                                                               centroid_compressed_loc[to_layer_num],
+                                                               std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim));
     } else {
         search_range = get_compressed_grid_bounded_search_range(compressed_block_grid,
-                                                                from_compressed_loc,
-                                                                centroid_compressed_loc,
-                                                                std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim),
-                                                                num_layers);
+                                                                from_compressed_loc[to_layer_num],
+                                                                centroid_compressed_loc[to_layer_num],
+                                                                std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim));
     }
-    delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin;
+    delta_cx = search_range.xmax - search_range.xmin;
 
     t_physical_tile_loc to_compressed_loc;
     bool legal = false;
@@ -906,9 +906,9 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
     if (is_cluster_constrained(b_from)) {
         bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type,
                                                                           b_from,
-                                                                          search_range[from_layer_num],
+                                                                          search_range,
                                                                           delta_cx,
-                                                                          from_layer_num);
+                                                                          to_layer_num);
         if (!intersect) {
             return false;
         }
@@ -917,11 +917,11 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
     //TODO: For now, we only move the blocks on the same tile
     legal = find_compatible_compressed_loc_in_range(blk_type,
                                                     delta_cx,
-                                                    from_compressed_loc[from_layer_num],
-                                                    search_range[from_layer_num],
+                                                    from_compressed_loc[to_layer_num],
+                                                    search_range,
                                                     to_compressed_loc,
                                                     false,
-                                                    from_layer_num);
+                                                    to_layer_num);
 
     if (!legal) {
         //No valid position found
@@ -1078,10 +1078,10 @@ std::vector<t_physical_tile_loc> get_compressed_loc(const t_compressed_block_gri
     //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers
     std::vector<t_physical_tile_loc> compressed_locs(num_layers);
 
-    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
-        if (layer_num != grid_loc.layer) {
-            continue;
-        }
+    const auto& compatible_layers = compressed_block_grid.get_layer_nums();
+
+    for (const auto& layer_num : compatible_layers) {
+        // This would cause a problem if two blocks of the same types are on different x/y locations of different layers
         compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc({grid_loc.x, grid_loc.y, layer_num});
     }
 
@@ -1091,91 +1091,77 @@ std::vector<t_physical_tile_loc> get_compressed_loc(const t_compressed_block_gri
 std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid,
                                                            t_pl_loc grid_loc,
                                                            int num_layers) {
-    //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers
     std::vector<t_physical_tile_loc> compressed_locs(num_layers);
 
-    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
-        if (layer_num != grid_loc.layer) {
-            continue;
-        }
+    const auto& compatible_layers = compressed_block_grid.get_layer_nums();
+
+    for (const auto& layer_num : compatible_layers) {
+
         compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc_approx({grid_loc.x, grid_loc.y, layer_num});
     }
 
     return compressed_locs;
 }
 
-std::vector<t_bb> get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
-                                                          const std::vector<t_physical_tile_loc>& compressed_locs,
-                                                          float rlim,
-                                                          int num_layers) {
-    std::vector<t_bb> search_ranges(num_layers, t_bb());
-    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
-        const auto& layer_loc = compressed_locs[layer_num];
-        //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion
-        if (layer_loc.x == OPEN || layer_loc.y == OPEN || layer_loc.layer_num == OPEN) {
-            //No valid compressed location for this layer
-            continue;
-        }
-        int rlim_x_max_range = std::min<int>((int)compressed_block_grid.get_num_columns(layer_num), rlim);
-        int rlim_y_max_range = std::min<int>((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
+t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                             const t_physical_tile_loc& compressed_loc,
+                                             float rlim) {
+    t_bb search_ranges;
+    int layer_num = compressed_loc.layer_num;
+    VTR_ASSERT(compressed_loc.x != OPEN && compressed_loc.y != OPEN && compressed_loc.layer_num != OPEN);
 
-        search_ranges[layer_num].xmin = std::max(0, layer_loc.x - rlim_x_max_range);
-        search_ranges[layer_num].xmax = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, layer_loc.x + rlim_x_max_range);
+    int rlim_x_max_range = std::min<int>((int)compressed_block_grid.get_num_columns(layer_num), rlim);
+    int rlim_y_max_range = std::min<int>((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
 
-        search_ranges[layer_num].ymin = std::max(0, layer_loc.y - rlim_y_max_range);
-        search_ranges[layer_num].ymax = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, layer_loc.y + rlim_y_max_range);
-    }
+    search_ranges.xmin = std::max(0, compressed_loc.x - rlim_x_max_range);
+    search_ranges.xmax = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, compressed_loc.x + rlim_x_max_range);
+
+    search_ranges.ymin = std::max(0, compressed_loc.y - rlim_y_max_range);
+    search_ranges.ymax = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, compressed_loc.y + rlim_y_max_range);
 
     return search_ranges;
 }
 
-std::vector<t_bb> get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
-                                                           const std::vector<t_physical_tile_loc>& from_compressed_loc,
-                                                           const std::vector<t_physical_tile_loc>& target_compressed_loc,
-                                                           float rlim,
-                                                           int num_layers) {
-    std::vector<t_bb> search_range(num_layers, t_bb());
+t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                              const t_physical_tile_loc& from_compressed_loc,
+                                              const t_physical_tile_loc& target_compressed_loc,
+                                              float rlim) {
 
-    int min_cx, max_cx, min_cy, max_cy;
+    t_bb search_range;
 
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion
-        if (from_compressed_loc[layer_num].x == OPEN || from_compressed_loc[layer_num].y == OPEN || from_compressed_loc[layer_num].layer_num == OPEN) {
-            continue;
-        }
-        VTR_ASSERT(from_compressed_loc[layer_num].layer_num == layer_num);
-        VTR_ASSERT(target_compressed_loc[layer_num].layer_num == layer_num);
+    int min_cx, max_cx, min_cy, max_cy;
 
-        int rlim_x_max_range = std::min<int>(compressed_block_grid.get_num_columns(layer_num), rlim);
-        int rlim_y_max_range = std::min<int>(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
+    //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion
+    VTR_ASSERT(from_compressed_loc.x != OPEN && from_compressed_loc.y != OPEN && from_compressed_loc.layer_num != OPEN);
+    VTR_ASSERT(target_compressed_loc.x != OPEN && target_compressed_loc.y != OPEN && target_compressed_loc.layer_num != OPEN);
 
-        int cx_from = from_compressed_loc[layer_num].x;
-        int cy_from = from_compressed_loc[layer_num].y;
-        if (cx_from == OPEN || cy_from == OPEN) {
-            continue;
-        }
+    int layer_num = target_compressed_loc.layer_num;
+    int rlim_x_max_range = std::min<int>(compressed_block_grid.get_num_columns(layer_num), rlim);
+    int rlim_y_max_range = std::min<int>(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
 
-        int cx_centroid = target_compressed_loc[layer_num].x;
-        int cy_centroid = target_compressed_loc[layer_num].y;
+    int cx_from = from_compressed_loc.x;
+    int cy_from = from_compressed_loc.y;
 
-        if (cx_centroid < cx_from) {
-            min_cx = std::max(0, cx_from - rlim_x_max_range);
-            max_cx = cx_from;
-        } else {
-            min_cx = cx_from;
-            max_cx = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range);
-        }
-        if (cy_centroid < cy_from) {
-            min_cy = std::max(0, cy_from - rlim_y_max_range);
-            max_cy = cy_from;
-        } else {
-            min_cy = cy_from;
-            max_cy = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range);
-        }
+    int cx_centroid = target_compressed_loc.x;
+    int cy_centroid = target_compressed_loc.y;
 
-        search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy, layer_num, layer_num);
+    if (cx_centroid < cx_from) {
+        min_cx = std::max(0, cx_from - rlim_x_max_range);
+        max_cx = cx_from;
+    } else {
+        min_cx = cx_from;
+        max_cx = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range);
+    }
+    if (cy_centroid < cy_from) {
+        min_cy = std::max(0, cy_from - rlim_y_max_range);
+        max_cy = cy_from;
+    } else {
+        min_cy = cy_from;
+        max_cy = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range);
     }
 
+    search_range = t_bb(min_cx, max_cx, min_cy, max_cy, layer_num, layer_num);
+
     return search_range;
 }
 
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 9cdc908fa29..ab8bfe27493 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -273,10 +273,9 @@ std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_bl
  * @param num_layers
  * @return A compressed search range for each layer
  */
-std::vector<t_bb> get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
-                                                          const std::vector<t_physical_tile_loc>& compressed_locs,
-                                                          float rlim,
-                                                          int num_layers);
+t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                             const t_physical_tile_loc& compressed_locs,
+                                             float rlim);
 
 /**
  * @brief This function calculates the search range based on the given rlim value and the number of columns/rows
@@ -292,11 +291,10 @@ std::vector<t_bb> get_compressed_grid_target_search_range(const t_compressed_blo
  * @param num_layers
  * @return
  */
-std::vector<t_bb> get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
-                                                           const std::vector<t_physical_tile_loc>& from_compressed_loc,
-                                                           const std::vector<t_physical_tile_loc>& target_compressed_loc,
-                                                           float rlim,
-                                                           int num_layers);
+t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                              const t_physical_tile_loc& from_compressed_loc,
+                                              const t_physical_tile_loc& target_compressed_loc,
+                                              float rlim);
 
 /*
  * If the block to be moved (b_from) has a floorplan constraint, this routine changes the max and min coords
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 4e968680cba..584841292eb 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -34,6 +34,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
     /* Find a  */
+    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index a5e59fec044..a159432a7ce 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -101,6 +101,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
+    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From d31c2e78c1b6b6632051143878ce075a5b999d8e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 5 Jul 2023 19:31:26 -0400
Subject: [PATCH 019/257] add uniform inter-layer to avail moves

---
 vpr/src/base/vpr_types.h                  | 6 +++---
 vpr/src/place/move_utils.cpp              | 1 +
 vpr/src/place/move_utils.h                | 1 +
 vpr/src/place/simpleRL_move_generator.cpp | 2 ++
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 5b7ce78151e..f22801422e7 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -530,9 +530,9 @@ enum class e_timing_update_type {
  ****************************************************************************/
 
 /* Values of number of placement available move types */
-#define NUM_PL_MOVE_TYPES 7
-#define NUM_PL_NONTIMING_MOVE_TYPES 3
-#define NUM_PL_1ST_STATE_MOVE_TYPES 4
+#define NUM_PL_MOVE_TYPES 8
+#define NUM_PL_NONTIMING_MOVE_TYPES 4
+#define NUM_PL_1ST_STATE_MOVE_TYPES 5
 
 /* Timing data structures end */
 enum sched_type {
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index f83538c6462..465cdcdad34 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -946,6 +946,7 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
 //Array of move type strings
 static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings = {
     "Uniform",
+    "UniformInterLayer",
     "Median",
     "W. Centroid",
     "Centroid",
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index ab8bfe27493..f083a91acd3 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -21,6 +21,7 @@ enum e_move_result {
 //This is to list all the available moves
 enum class e_move_type {
     UNIFORM,
+    UniformInterLayer,
     MEDIAN,
     CENTROID,
     W_CENTROID,
diff --git a/vpr/src/place/simpleRL_move_generator.cpp b/vpr/src/place/simpleRL_move_generator.cpp
index 3f9c92f6ae0..59ba8aeddf1 100644
--- a/vpr/src/place/simpleRL_move_generator.cpp
+++ b/vpr/src/place/simpleRL_move_generator.cpp
@@ -18,6 +18,7 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agen
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
+    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();
@@ -32,6 +33,7 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
+    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();

From dfadf206341b31f510384e135f285721cb087e90 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 12:45:28 -0400
Subject: [PATCH 020/257] move uniform inter layer move to timing-driven moves

---
 vpr/src/base/read_options.cpp             | 2 +-
 vpr/src/place/RL_agent_util.cpp           | 1 +
 vpr/src/place/simpleRL_move_generator.cpp | 4 ++--
 vpr/src/place/simpleRL_move_generator.h   | 1 +
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 74a5159da96..b192034f00a 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1986,7 +1986,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "This option is only effective for timing-driven placement."
             "The numbers listed are interpreted as the percentage probabilities of {uniformMove, MedianMove, CentroidMove, WeightedCentroid, WeightedMedian, Timing feasible Region(TFR), Critical UniformMove}, in that order.")
         .nargs('+')
-        .default_value({"100", "0", "0", "0", "0", "0", "0"})
+        .default_value({"100", "0", "0", "0", "0", "0", "0", "0"})
 
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index 51dc0959708..89769e3ac37 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -15,6 +15,7 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
             VTR_LOG("Probability of Weighted_median_move : %f \n", placer_opts.place_static_move_prob[4]);
             VTR_LOG("Probability of Timing_feasible_region_move : %f \n", placer_opts.place_static_move_prob[5]);
             VTR_LOG("Probability of Critical_uniform_move : %f \n", placer_opts.place_static_move_prob[6]);
+            VTR_LOG("Probability of Inter Layer Uniform Move : %f \n", placer_opts.place_static_move_prob[7]);
             move_generator = std::make_unique<StaticMoveGenerator>(placer_opts.place_static_move_prob);
             move_generator2 = std::make_unique<StaticMoveGenerator>(placer_opts.place_static_move_prob);
         } else { //Non-timing driven placement
diff --git a/vpr/src/place/simpleRL_move_generator.cpp b/vpr/src/place/simpleRL_move_generator.cpp
index 59ba8aeddf1..9972ff0e19a 100644
--- a/vpr/src/place/simpleRL_move_generator.cpp
+++ b/vpr/src/place/simpleRL_move_generator.cpp
@@ -18,13 +18,13 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agen
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
-    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
     avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique<FeasibleRegionMoveGenerator>();
+    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
 
     karmed_bandit_agent = std::move(agent);
 }
@@ -33,13 +33,13 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
-    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
     avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique<FeasibleRegionMoveGenerator>();
+    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
 
     karmed_bandit_agent = std::move(agent);
 }
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index f8f16602881..29ad4ca51d0 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -8,6 +8,7 @@
 #include "uniform_move_generator.h"
 #include "critical_uniform_move_generator.h"
 #include "centroid_move_generator.h"
+#include "uniform_inter_layer_move_generator.h"
 
 /**
  * @brief KArmedBanditAgent is the base class for RL agents that target the k-armed bandit problems

From 4c101562134b9321f06993b6d3165130eadd11a3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 12:53:45 -0400
Subject: [PATCH 021/257] add 1.0 as the inter layer uniform move time elapsed

---
 vpr/src/place/simpleRL_move_generator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 29ad4ca51d0..bae78ca3161 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -52,7 +52,7 @@ class KArmedBanditAgent {
     /* Ratios of the average runtime to calculate each move type              */
     /* These ratios are useful for different reward functions                 *
      * The vector is calculated by averaging many runs on different circuits  */
-    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 2.5, 2.1, 0.8, 2.2};
+    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 2.5, 2.1, 0.8, 2.2, 1.0};
 
     FILE* agent_info_file_ = nullptr;
 };

From 974eeb5c987aab6f5e21666c1112f9a2c3291727 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 12:54:35 -0400
Subject: [PATCH 022/257] decrease NUM_PL_NONTIMING_MOVE_TYPES
 NUM_PL_1ST_STATE_MOVE_TYPES by one since inter layer uniform is now performed
 in state

---
 vpr/src/base/vpr_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index f22801422e7..76bb792a543 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -531,8 +531,8 @@ enum class e_timing_update_type {
 
 /* Values of number of placement available move types */
 #define NUM_PL_MOVE_TYPES 8
-#define NUM_PL_NONTIMING_MOVE_TYPES 4
-#define NUM_PL_1ST_STATE_MOVE_TYPES 5
+#define NUM_PL_NONTIMING_MOVE_TYPES 3
+#define NUM_PL_1ST_STATE_MOVE_TYPES 4
 
 /* Timing data structures end */
 enum sched_type {

From a13e38dbc513a7586e7ad3d177207e83a3241f85 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 12:56:33 -0400
Subject: [PATCH 023/257] related to makeing unifrom inter die move being
 performed in state 2

---
 vpr/src/place/move_utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index f083a91acd3..5cb8c540219 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -21,13 +21,13 @@ enum e_move_result {
 //This is to list all the available moves
 enum class e_move_type {
     UNIFORM,
-    UniformInterLayer,
     MEDIAN,
     CENTROID,
     W_CENTROID,
     W_MEDIAN,
     CRIT_UNIFORM,
     FEASIBLE_REGION,
+    UniformInterLayer,
     NUMBER_OF_AUTO_MOVES,
     MANUAL_MOVE = NUMBER_OF_AUTO_MOVES
 };

From a55af92750831ff836c0ef985eb094e9db46ee3a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 13:07:31 -0400
Subject: [PATCH 024/257] pass a vild to_layer to find_loc_uniform

---
 vpr/src/place/critical_uniform_move_generator.cpp | 2 +-
 vpr/src/place/move_utils.cpp                      | 9 ++++-----
 vpr/src/place/noc_place_utils.cpp                 | 2 +-
 vpr/src/place/uniform_move_generator.cpp          | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 32d531138a5..b50f2433b4f 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -22,7 +22,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 465cdcdad34..08fed5771e8 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -711,7 +711,6 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    const int from_layer_num = from.layer;
     const int to_layer_num = to.layer;
     VTR_ASSERT(to.layer != OPEN);
 
@@ -722,7 +721,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
 
     //Determine the valid compressed grid location ranges
     t_bb search_range = get_compressed_grid_target_search_range(compressed_block_grid,
-                                                                compressed_locs[from_layer_num],
+                                                                compressed_locs[to_layer_num],
                                                                 rlim);
     int delta_cx = search_range.xmax - search_range.xmin;
 
@@ -735,7 +734,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
                                                                           b_from,
                                                                           search_range,
                                                                           delta_cx,
-                                                                          from_layer_num);
+                                                                          to_layer_num);
         if (!intersect) {
             return false;
         }
@@ -743,11 +742,11 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //TODO: For now, we only move the blocks on the same tile
     legal = find_compatible_compressed_loc_in_range(type,
                                                     delta_cx,
-                                                    compressed_locs[from_layer_num],
+                                                    compressed_locs[to_layer_num],
                                                     search_range,
                                                     to_compressed_loc,
                                                     false,
-                                                    from_layer_num);
+                                                    to_layer_num);
 
     if (!legal) {
         //No valid position found
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 11d9121ff2b..c8e460f3391 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -479,7 +479,7 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa
 
     // now choose a compatible block to swap with
     t_pl_loc to;
-
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index c979295e4f0..0e935b794dc 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -20,7 +20,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }

From c0e553f72c24e00633da91de2c9d8b2ab1498454 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 13:10:30 -0400
Subject: [PATCH 025/257] remove redundant parts from inter layer uniform

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 099967bffe5..17975a149c1 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -38,13 +38,9 @@ e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_mov
 
     int to_layer = candidate_layers[vtr::irand((int)candidate_layers.size() - 1)];
 
-    t_pl_loc to;
+    t_pl_loc to = from;
     to.layer = to_layer;
 
-    if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
-        return e_create_move::ABORT;
-    }
-
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 74fec5ff428783baa52398446ebea074ddeacd3f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 13:16:08 -0400
Subject: [PATCH 026/257] check whether rt node has a connection to other layer

---
 vpr/src/route/connection_router.cpp | 42 +++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index b15e11ecb4d..da1b9b39ab2 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -4,6 +4,42 @@
 #include "binary_heap.h"
 #include "bucket.h"
 
+inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
+                                    const RRGraphView* rr_graph,
+                                    RRNodeId from_node,
+                                    RRNodeId sink_node) {
+    VTR_ASSERT(rr_graph->node_type(sink_node) == t_rr_type::SINK);
+
+    // ASSUMPTION: Only OPINs can connect to other layers
+
+    int sink_layer = rr_graph->node_layer(sink_node);
+
+    if (rr_graph->node_layer(from_node) == sink_layer) {
+        return true;
+    } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY) {
+        return false;
+    } else {
+        auto edges = rr_nodes.edge_range(from_node);
+
+//        for (RREdgeId from_edge : edges) {
+//            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
+//            rr_nodes.prefetch_node(to_node);
+//
+//            int switch_idx = rr_nodes.edge_switch(from_edge);
+//            VTR_PREFETCH(&rr_switch_inf_[switch_idx], 0, 0);
+//        }
+
+        for (RREdgeId from_edge : edges) {
+            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
+            if (rr_graph->node_layer(to_node) == sink_layer) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+}
+
 inline static bool relevant_node_to_target(const RRGraphView* rr_graph,
                                            RRNodeId node_to_add,
                                            RRNodeId target_node) {
@@ -909,6 +945,9 @@ void ConnectionRouter<Heap>::add_route_tree_to_heap(
     /* Pre-order depth-first traversal */
     // IPINs and SINKS are not re_expanded
     if (rt_node.re_expand) {
+        if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node))) {
+            return;
+        }
         add_route_tree_node_to_heap(rt_node,
                                     target_node,
                                     cost_params,
@@ -1060,6 +1099,9 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
                         continue;
                 }
 
+                if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), target_node_id)) {
+                    continue;
+                }
                 // Put the node onto the heap
                 add_route_tree_node_to_heap(rt_node, target_node, cost_params, true);
 

From 8ad1c01e3ae6600065edbaa206c4b52fb07b1644 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 14:44:48 -0400
Subject: [PATCH 027/257] comment unused parameter - debug
 get_min_cross_layer_delay

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 2 +-
 vpr/src/place/uniform_inter_layer_move_generator.h   | 2 +-
 vpr/src/util/vpr_utils.cpp                           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 17975a149c1..2dfd4c33a4d 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -3,7 +3,7 @@
 #include "place_constraints.h"
 #include "move_utils.h"
 
-e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
     //Find a movable block based on blk_type
     ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL);
 
diff --git a/vpr/src/place/uniform_inter_layer_move_generator.h b/vpr/src/place/uniform_inter_layer_move_generator.h
index 36a71d4e18c..8b4b98b668d 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.h
+++ b/vpr/src/place/uniform_inter_layer_move_generator.h
@@ -12,7 +12,7 @@
  */
 
 class UniformInterLayerMoveGenerator : public MoveGenerator {
-    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/);
+    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/);
 };
 
 #endif //VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 40ea8d57668..9bb856fe0f9 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -2514,12 +2514,12 @@ float get_min_cross_layer_delay(const std::vector<t_arch_switch_inf>& arch_switc
                                 const int wire_to_ipin_arch_sw_id) {
     float min_delay = std::numeric_limits<float>::max();
 
-    if (wire_to_ipin_arch_sw_id >= 0) {
+    if (wire_to_ipin_arch_sw_id != OPEN) {
         min_delay = std::min(min_delay, arch_switch_inf[wire_to_ipin_arch_sw_id].Tdel());
     }
     for (const auto& seg_inf : segment_inf) {
         int cross_layer_sw_arch_id = seg_inf.arch_opin_between_dice_switch;
-        if (cross_layer_sw_arch_id >= OPEN) {
+        if (cross_layer_sw_arch_id != OPEN) {
             min_delay = std::min(min_delay, arch_switch_inf[cross_layer_sw_arch_id].Tdel());
         }
     }

From 847aed9a37f7cc0c28fea68350bb3bfde06333e4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 16:56:46 -0400
Subject: [PATCH 028/257] for the non-timing-driven moves, find a free layer at
 the given x-y location and move block to that layer

---
 vpr/src/place/centroid_move_generator.cpp |  4 ++++
 vpr/src/place/median_move_generator.cpp   |  7 ++++++-
 vpr/src/place/move_utils.cpp              | 25 +++++++++++++++++++++++
 vpr/src/place/move_utils.h                |  2 ++
 vpr/src/place/uniform_move_generator.cpp  |  4 ++++
 5 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index e9225bab091..a8eae29ba63 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,6 +39,10 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
         return e_create_move::ABORT;
     }
 
+    int new_layer = find_free_layer(cluster_from_type, to);
+    VTR_ASSERT(new_layer != OPEN);
+    to.layer = new_layer;
+
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index b5e209013ed..7d75926085b 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -121,8 +121,13 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
     to.layer = from.layer;
-    if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from))
+    if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
+    }
+
+    int new_layer = find_free_layer(cluster_from_type, to);
+    VTR_ASSERT(new_layer != OPEN);
+    to.layer = new_layer;
 
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 08fed5771e8..1013a97ef1b 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1229,3 +1229,28 @@ std::string e_move_result_to_string(e_move_result move_outcome) {
     std::string move_result_to_string[] = {"Rejected", "Accepted", "Aborted"};
     return move_result_to_string[move_outcome];
 }
+
+int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& place_ctx = g_vpr_ctx.placement();
+
+    int free_layer = loc.layer;
+    if (device_ctx.grid.get_num_layers() > 1) {
+        const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();
+        if (compatible_layers.size() > 1) {
+            if (place_ctx.grid_blocks.block_at_location(loc) != EMPTY_BLOCK_ID) {
+                for (const auto& layer : compatible_layers) {
+                    if (layer != free_layer) {
+                        loc.layer = layer;
+                        if (place_ctx.grid_blocks.block_at_location(loc) == EMPTY_BLOCK_ID) {
+                            free_layer = layer;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return free_layer;
+}
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 5cb8c540219..d1804e65725 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -347,4 +347,6 @@ int convert_phys_to_agent_blk_type(int phys_blk_type_index);
  */
 int get_num_agent_types();
 
+int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc);
+
 #endif
diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index 0e935b794dc..21d8980fdea 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -39,6 +39,10 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_LOG("\n");
 #endif
 
+    int new_layer = find_free_layer(cluster_from_type, to);
+    VTR_ASSERT(new_layer != OPEN);
+    to.layer = new_layer;
+
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 4eec20d24bddf5f9195265567b4c491409f33922 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 6 Jul 2023 17:21:05 -0400
Subject: [PATCH 029/257] add inter layer uniform only if several layers are on
 FPGA

---
 vpr/src/place/RL_agent_util.cpp           | 10 ++++++----
 vpr/src/place/simpleRL_move_generator.cpp | 12 ++++++++----
 vpr/src/place/simpleRL_move_generator.h   |  4 ++--
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index 89769e3ac37..e800dc7273f 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -5,6 +5,8 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
     //extract available physical block types in the netlist
     determine_agent_block_types();
 
+    bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
+
     if (placer_opts.RL_agent_placement == false) {
         if (placer_opts.place_algorithm.is_timing_driven()) {
             VTR_LOG("Using static probabilities for choosing each move type\n");
@@ -64,11 +66,11 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
                                                                             placer_opts.place_agent_epsilon);
             }
             karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1);
+            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1, is_multi_layer);
             //agent's 2nd state
             karmed_bandit_agent2 = std::make_unique<EpsilonGreedyAgent>(num_2nd_state_avail_moves, placer_opts.place_agent_epsilon);
             karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2);
+            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2, is_multi_layer);
         } else {
             std::unique_ptr<SoftmaxAgent> karmed_bandit_agent1, karmed_bandit_agent2;
             //agent's 1st state
@@ -81,11 +83,11 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
                 karmed_bandit_agent1 = std::make_unique<SoftmaxAgent>(num_1st_state_avail_moves);
             }
             karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1);
+            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1, is_multi_layer);
             //agent's 2nd state
             karmed_bandit_agent2 = std::make_unique<SoftmaxAgent>(num_2nd_state_avail_moves);
             karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2);
+            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2, is_multi_layer);
         }
     }
 }
diff --git a/vpr/src/place/simpleRL_move_generator.cpp b/vpr/src/place/simpleRL_move_generator.cpp
index 9972ff0e19a..41d13f9c2dd 100644
--- a/vpr/src/place/simpleRL_move_generator.cpp
+++ b/vpr/src/place/simpleRL_move_generator.cpp
@@ -14,7 +14,7 @@ static float scaled_clipped_exp(float x) { return std::exp(std::min(1000 * x, fl
  *  RL move generator implementation   *
  *                                     *
  *                                     */
-SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agent) {
+SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agent, bool is_multi_layer) {
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
@@ -24,12 +24,14 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agen
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
     avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique<FeasibleRegionMoveGenerator>();
-    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
+    if (is_multi_layer) {
+        avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
+    }
 
     karmed_bandit_agent = std::move(agent);
 }
 
-SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>& agent) {
+SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>& agent, bool is_multi_layer) {
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
@@ -39,7 +41,9 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
     avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique<FeasibleRegionMoveGenerator>();
-    avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
+    if (is_multi_layer) {
+        avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
+    }
 
     karmed_bandit_agent = std::move(agent);
 }
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index bae78ca3161..2a0d0ba78a8 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -169,8 +169,8 @@ class SimpleRLMoveGenerator : public MoveGenerator {
 
   public:
     // constructors using a pointer to the agent used
-    SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>& agent);
-    SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agent);
+    SimpleRLMoveGenerator(std::unique_ptr<EpsilonGreedyAgent>& agent, bool is_multi_layer);
+    SimpleRLMoveGenerator(std::unique_ptr<SoftmaxAgent>& agent, bool is_multi_layer);
 
     // Updates affected_blocks with the proposed move, while respecting the current rlim
     e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& move_type, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities);

From 53193f3634cf587689a9ea18953ede12e34127d9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 7 Jul 2023 09:29:47 -0400
Subject: [PATCH 030/257] if multiple layers are not available set
 num_2nd_state_avail_moves to NUM_PL_MOVE_TYPES - 1

---
 vpr/src/place/RL_agent_util.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index e800dc7273f..51c1b0826d9 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -50,7 +50,17 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
 
         auto& place_ctx = g_vpr_ctx.placement();
         int num_1st_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_1ST_STATE_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES;
-        int num_2nd_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES;
+        int num_2nd_state_avail_moves;
+        if (placer_opts.place_algorithm.is_timing_driven()) {
+            if (is_multi_layer) {
+                num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES;
+            } else {
+                num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES - 1;
+            }
+        } else {
+            num_2nd_state_avail_moves = NUM_PL_NONTIMING_MOVE_TYPES;
+        }
+
 
         if (placer_opts.place_agent_algorithm == E_GREEDY) {
             std::unique_ptr<EpsilonGreedyAgent> karmed_bandit_agent1, karmed_bandit_agent2;

From f313ded9c05332985887df33b67a8a726a34ece1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 7 Jul 2023 11:16:57 -0400
Subject: [PATCH 031/257] add layer_num to t_pl_offset

---
 vpr/src/base/vpr_types.h            | 25 ++++++++++++++++---------
 vpr/src/place/initial_placement.cpp |  2 +-
 vpr/test/test_vpr_constraints.cpp   |  2 +-
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 76bb792a543..b7606adb936 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -604,16 +604,19 @@ struct t_bb {
  */
 struct t_pl_offset {
     t_pl_offset() = default;
-    t_pl_offset(int xoffset, int yoffset, int sub_tile_offset)
-        : x(xoffset)
+    t_pl_offset(int layer_offset, int xoffset, int yoffset, int sub_tile_offset)
+        : layer(layer_offset)
+        , x(xoffset)
         , y(yoffset)
         , sub_tile(sub_tile_offset) {}
 
+    int layer = 0;
     int x = 0;
     int y = 0;
     int sub_tile = 0;
 
     t_pl_offset& operator+=(const t_pl_offset& rhs) {
+        layer += rhs.layer;
         x += rhs.x;
         y += rhs.y;
         sub_tile += rhs.sub_tile;
@@ -621,6 +624,7 @@ struct t_pl_offset {
     }
 
     t_pl_offset& operator-=(const t_pl_offset& rhs) {
+        layer -= rhs.layer;
         x -= rhs.x;
         y -= rhs.y;
         sub_tile -= rhs.sub_tile;
@@ -638,18 +642,19 @@ struct t_pl_offset {
     }
 
     friend t_pl_offset operator-(const t_pl_offset& other) {
-        return t_pl_offset(-other.x, -other.y, -other.sub_tile);
+        return t_pl_offset(-other.layer, -other.x, -other.y, -other.sub_tile);
     }
     friend t_pl_offset operator+(const t_pl_offset& other) {
-        return t_pl_offset(+other.x, +other.y, +other.sub_tile);
+        return t_pl_offset(+other.layer, +other.x, +other.y, +other.sub_tile);
     }
 
     friend bool operator<(const t_pl_offset& lhs, const t_pl_offset& rhs) {
+        VTR_ASSERT(lhs.layer == rhs.layer);
         return std::tie(lhs.x, lhs.y, lhs.sub_tile) < std::tie(rhs.x, rhs.y, rhs.sub_tile);
     }
 
     friend bool operator==(const t_pl_offset& lhs, const t_pl_offset& rhs) {
-        return std::tie(lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.x, rhs.y, rhs.sub_tile);
+        return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile);
     }
 
     friend bool operator!=(const t_pl_offset& lhs, const t_pl_offset& rhs) {
@@ -693,7 +698,7 @@ struct t_pl_loc {
     int layer = OPEN;
 
     t_pl_loc& operator+=(const t_pl_offset& rhs) {
-        VTR_ASSERT(this->layer != OPEN);
+        layer += rhs.layer;
         x += rhs.x;
         y += rhs.y;
         sub_tile += rhs.sub_tile;
@@ -701,7 +706,7 @@ struct t_pl_loc {
     }
 
     t_pl_loc& operator-=(const t_pl_offset& rhs) {
-        VTR_ASSERT(this->layer != OPEN);
+        layer -= rhs.layer;
         x -= rhs.x;
         y -= rhs.y;
         sub_tile -= rhs.sub_tile;
@@ -725,8 +730,10 @@ struct t_pl_loc {
     }
 
     friend t_pl_offset operator-(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        VTR_ASSERT(lhs.layer == rhs.layer);
-        return {lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile};
+        return {lhs.layer - rhs.layer,
+                lhs.x - rhs.x,
+                lhs.y - rhs.y,
+                lhs.sub_tile - rhs.sub_tile};
     }
 
     friend bool operator<(const t_pl_loc& lhs, const t_pl_loc& rhs) {
diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index a017da13ab0..4229dad95f0 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -1072,7 +1072,7 @@ bool place_one_block(const ClusterBlockId& blk_id,
         //If it does not belong to a macro, create a macro with the one block and then pass to the placement routines
         //This is done so that the initial placement flow can be the same whether the block belongs to a macro or not
         t_pl_macro_member macro_member;
-        t_pl_offset block_offset(0, 0, 0);
+        t_pl_offset block_offset(0, 0, 0, 0);
 
         macro_member.blk_index = blk_id;
         macro_member.offset = block_offset;
diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp
index 30772950e19..baee4101136 100644
--- a/vpr/test/test_vpr_constraints.cpp
+++ b/vpr/test/test_vpr_constraints.cpp
@@ -441,7 +441,7 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") {
 TEST_CASE("MacroConstraints", "[vpr]") {
     t_pl_macro pl_macro;
     PartitionRegion head_pr;
-    t_pl_offset offset(2, 1, 0);
+    t_pl_offset offset(0, 2, 1, 0);
 
     Region reg;
     reg.set_region_rect({5, 2, 9, 6, 0});

From 6b83d54f5047e53b0dd0ccaa095f2ece3b944edc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 7 Jul 2023 17:50:10 -0400
Subject: [PATCH 032/257] delete the assertion in has_path_to_sink that check
 sink type

---
 vpr/src/route/connection_router.cpp | 5 +++--
 vpr/src/route/route_common.cpp      | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index da1b9b39ab2..52927de98bb 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -8,7 +8,6 @@ inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                                     const RRGraphView* rr_graph,
                                     RRNodeId from_node,
                                     RRNodeId sink_node) {
-    VTR_ASSERT(rr_graph->node_type(sink_node) == t_rr_type::SINK);
 
     // ASSUMPTION: Only OPINs can connect to other layers
 
@@ -16,7 +15,9 @@ inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
 
     if (rr_graph->node_layer(from_node) == sink_layer) {
         return true;
-    } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY) {
+    } else if (rr_graph->node_type(from_node) == CHANX ||
+               rr_graph->node_type(from_node) == CHANY ||
+               rr_graph->node_type(from_node) == IPIN) {
         return false;
     } else {
         auto edges = rr_nodes.edge_range(from_node);
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 62faa9f9ab7..e1f52d4d07f 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -892,6 +892,10 @@ t_bb load_net_route_bb(const Netlist<>& net_list,
         VTR_ASSERT(rr_graph.node_xlow(sink_rr) <= rr_graph.node_xhigh(sink_rr));
         VTR_ASSERT(rr_graph.node_ylow(sink_rr) <= rr_graph.node_yhigh(sink_rr));
 
+        VTR_ASSERT(rr_graph.node_layer(sink_rr) >= 0);
+        VTR_ASSERT(rr_graph.node_layer(sink_rr) <= device_ctx.grid.get_num_layers() - 1);
+
+
         xmin = std::min<int>(xmin, rr_graph.node_xlow(sink_rr));
         xmax = std::max<int>(xmax, rr_graph.node_xhigh(sink_rr));
         ymin = std::min<int>(ymin, rr_graph.node_ylow(sink_rr));

From 7f25d9851ed7498fc8a8b010d86f737261c7f4cb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 7 Jul 2023 18:16:57 -0400
Subject: [PATCH 033/257] if node is of type sink, add it to heap even if it is
 not on the same layer as sink

---
 vpr/src/route/connection_router.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 52927de98bb..fb85bce8234 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -13,13 +13,15 @@ inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
 
     int sink_layer = rr_graph->node_layer(sink_node);
 
-    if (rr_graph->node_layer(from_node) == sink_layer) {
+    if (rr_graph->node_layer(from_node) == sink_layer ||
+        rr_graph->node_type(from_node) == SOURCE) {
         return true;
     } else if (rr_graph->node_type(from_node) == CHANX ||
                rr_graph->node_type(from_node) == CHANY ||
                rr_graph->node_type(from_node) == IPIN) {
         return false;
     } else {
+        VTR_ASSERT(rr_graph->node_type(from_node) == OPIN);
         auto edges = rr_nodes.edge_range(from_node);
 
 //        for (RREdgeId from_edge : edges) {

From 0a7d67ab46373b7dbd9b1200a96b26a53183c762 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gamil.com>
Date: Sun, 9 Jul 2023 11:16:01 -0400
Subject: [PATCH 034/257] set layer num when high fanout bb is set

---
 vpr/src/route/connection_router.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index fb85bce8234..cd1476b98cd 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -550,6 +550,8 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
     int to_yhigh = rr_graph_->node_yhigh(to_node);
     int to_layer = rr_graph_->node_layer(to_node);
 
+    VTR_ASSERT(bounding_box.layer_max < g_vpr_ctx.device().grid.get_num_layers());
+
     // BB-pruning
     // Disable BB-pruning if RCV is enabled, as this can make it harder for circuits with high negative hold slack to resolve this
     // TODO: Only disable pruning if the net has negative hold slack, maybe go off budgets
@@ -1043,6 +1045,9 @@ static t_bb adjust_highfanout_bounding_box(t_bb highfanout_bb) {
     bb.xmax += HIGH_FANOUT_BB_FAC;
     bb.ymax += HIGH_FANOUT_BB_FAC;
 
+    bb.layer_min = highfanout_bb.layer_min;
+    bb.layer_max = highfanout_bb.layer_max;
+
     return bb;
 }
 

From b270a0165493bc7aaf391a2fd0d1319d41f98c57 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gamil.com>
Date: Sun, 9 Jul 2023 11:23:39 -0400
Subject: [PATCH 035/257] fix move idx to move name index for uniform inter
 layer move

---
 vpr/src/place/move_utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 1013a97ef1b..d7f5962701d 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -945,13 +945,13 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
 //Array of move type strings
 static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings = {
     "Uniform",
-    "UniformInterLayer",
     "Median",
     "W. Centroid",
     "Centroid",
     "W. Median",
     "Crit. Uniform",
     "Feasible Region",
+    "UniformInterLayer",
     "Manual Move"};
 
 //To convert enum move type to string

From 8e0e147bd4b726efe95611bf7f7e5553a64e60bb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 08:55:15 -0400
Subject: [PATCH 036/257] if there is no switch assigned for inter-lie
 connection for connection box, assing the infinit delay in router lookahead

---
 vpr/src/route/router_lookahead_map.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 05dbb9efefe..638528881a5 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -281,8 +281,11 @@ MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_f
     if (num_layers > 1) {
         const auto& sw_inf = g_vpr_ctx.device().all_sw_inf;
         int inter_layer_sw_id = det_routing_arch_.wire_to_arch_ipin_switch_between_dice;
-        VTR_ASSERT(inter_layer_sw_id >= 0);
-        inter_layer_connection_box_sw_delay = sw_inf.at(inter_layer_sw_id).Tdel();
+        if (inter_layer_sw_id != OPEN) {
+            inter_layer_connection_box_sw_delay = sw_inf.at(inter_layer_sw_id).Tdel();
+        } else {
+            inter_layer_connection_box_sw_delay = std::numeric_limits<float>::max();
+        }
     } else {
         VTR_ASSERT(num_layers == 1);
         inter_layer_connection_box_sw_delay = 0.;
@@ -517,6 +520,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
             expected_delay_cost = cost_entry.delay;
             expected_cong_cost = cost_entry.congestion;
             if (from_layer_num != to_layer_num) {
+                VTR_ASSERT(std::isfinite(inter_layer_connection_box_sw_delay));
                 expected_delay_cost += inter_layer_connection_box_sw_delay;
             }
 

From cacd06cdaf43281d25582643c3d607e806d05554 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 08:59:48 -0400
Subject: [PATCH 037/257] debug: node type was used instead of node layer

---
 vpr/src/route/connection_router.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index cd1476b98cd..225f4694bc8 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -1118,8 +1118,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
                 highfanout_bb.ymin = std::min<int>(highfanout_bb.ymin, rr_graph_->node_ylow(rr_node_to_add));
                 highfanout_bb.xmax = std::max<int>(highfanout_bb.xmax, rr_graph_->node_xhigh(rr_node_to_add));
                 highfanout_bb.ymax = std::max<int>(highfanout_bb.ymax, rr_graph_->node_yhigh(rr_node_to_add));
-                highfanout_bb.layer_min = std::min<int>(highfanout_bb.layer_min, rr_graph_->node_type(rr_node_to_add));
-                highfanout_bb.layer_max = std::max<int>(highfanout_bb.layer_max, rr_graph_->node_type(rr_node_to_add));
+                highfanout_bb.layer_min = std::min<int>(highfanout_bb.layer_min, rr_graph_->node_layer(rr_node_to_add));
+                highfanout_bb.layer_max = std::max<int>(highfanout_bb.layer_max, rr_graph_->node_layer(rr_node_to_add));
                 if (is_flat_) {
                     if (rr_graph_->node_type(rr_node_to_add) == CHANY || rr_graph_->node_type(rr_node_to_add) == CHANX) {
                         chan_nodes_added++;

From 690c41e256d7b985078e0f40aa008c38949776fb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 15:08:31 -0400
Subject: [PATCH 038/257] multiply the area of fpga die by the number of layers
 to get the correct router area per tile

---
 vpr/src/route/rr_graph_area.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/route/rr_graph_area.cpp b/vpr/src/route/rr_graph_area.cpp
index a32b7e41a0d..8b1f66030fe 100644
--- a/vpr/src/route/rr_graph_area.cpp
+++ b/vpr/src/route/rr_graph_area.cpp
@@ -492,7 +492,7 @@ void count_unidir_routing_transistors(std::vector<t_segment_inf>& /*segment_inf*
 
     VTR_LOG("\n");
     VTR_LOG("Routing area (in minimum width transistor areas)...\n");
-    VTR_LOG("\tTotal routing area: %#g, per logic tile: %#g\n", ntrans, ntrans / (float)(device_ctx.grid.width() * device_ctx.grid.height()));
+    VTR_LOG("\tTotal routing area: %#g, per logic tile: %#g\n", ntrans, ntrans / (float)(device_ctx.grid.get_num_layers() * device_ctx.grid.width() * device_ctx.grid.height()));
 }
 
 static float get_cblock_trans(int* num_inputs_to_cblock, int wire_to_ipin_switch, int max_inputs_to_cblock, float trans_sram_bit) {

From 74c61d18089498cf671d0f83e859fc7d5220adc5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 16:03:01 -0400
Subject: [PATCH 039/257] add swap op numbers to parsed file

---
 vtr_flow/parse/parse_config/common/vpr.place.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vtr_flow/parse/parse_config/common/vpr.place.txt b/vtr_flow/parse/parse_config/common/vpr.place.txt
index 8713e8fe51f..64da113842b 100644
--- a/vtr_flow/parse/parse_config/common/vpr.place.txt
+++ b/vtr_flow/parse/parse_config/common/vpr.place.txt
@@ -1,6 +1,13 @@
 #VPR Place Metrics
 placed_wirelength_est;vpr.out;BB estimate of min-dist \(placement\) wire length: (\d+)
 
+#VPR Number of heap operations
+total_swap;vpr.out;Swaps called\s*:\s*(\d+)
+accepted_swap;vpr.out;\s*Swaps accepted\s*:\s*(\d+).*
+rejected_swap;vpr.out;\s*Swaps rejected\s*:\s*(\d+).*
+aborted_swap;vpr.out;\s*Swaps aborted\s*:\s*(\d+).*
+
+
 #VPR Run-time Metrics
 place_mem;vpr.out;.*Placement took.*\(max_rss (.*), .*\)
 place_time;vpr.out;\s*Placement took (.*) seconds

From ad11a177f8cf0ee40e9f85710973170576ff5676 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 18:04:09 -0400
Subject: [PATCH 040/257] put assertion to assert if uniform_inter_layer is
 called when there isn't multiple layers

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 2dfd4c33a4d..99a7e778e7c 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -4,6 +4,8 @@
 #include "move_utils.h"
 
 e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+    // If this moved is called, we know that there are at least two layers.
+    VTR_ASSERT(g_vpr_ctx.device().grid.get_num_layers() > 1);
     //Find a movable block based on blk_type
     ClusterBlockId b_from = propose_block_to_move(blk_type, false, NULL, NULL);
 

From 915be195680cd778fa3ffd4648256c627ecf3575 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 18:05:24 -0400
Subject: [PATCH 041/257] fix the white space for place stats

---
 vpr/src/place/place.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d6f425ca73f..e7992ae68f6 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3232,7 +3232,7 @@ static void print_placement_swaps_stats(const t_annealing_state& state) {
             num_swap_accepted, 100 * accept_rate);
     VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits,
             num_swap_rejected, 100 * reject_rate);
-    VTR_LOG("\tSwaps aborted : %*d (%4.1f %%)\n", num_swap_print_digits,
+    VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits,
             num_swap_aborted, 100 * abort_rate);
 }
 

From 3e6a734fc9464938d7f5257681dc8fc518e5ac90 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 18:07:05 -0400
Subject: [PATCH 042/257] add layer check to is_legal_swap_to_location

---
 vpr/src/place/move_utils.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index d7f5962701d..d63c42ce772 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -454,7 +454,9 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
     auto& place_ctx = g_vpr_ctx.placement();
 
     if (to.x < 0 || to.x >= int(device_ctx.grid.width())
-        || to.y < 0 || to.y >= int(device_ctx.grid.height())) {
+        || to.y < 0 || to.y >= int(device_ctx.grid.height())
+        || to.layer < 0
+        || to.layer >= int(device_ctx.grid.get_num_layers())) {
         return false;
     }
 
@@ -1234,6 +1236,7 @@ int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& place_ctx = g_vpr_ctx.placement();
 
+    // TODO: Compatible layer vector should be shuffled first, and then iterated through
     int free_layer = loc.layer;
     if (device_ctx.grid.get_num_layers() > 1) {
         const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();

From e8b593ba5fec5f7b1d3f3abe55a4be5d8ff3084b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 18:20:22 -0400
Subject: [PATCH 043/257] get ref const

---
 vpr/src/place/move_utils.cpp | 3 +--
 vpr/src/place/move_utils.h   | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index d63c42ce772..22586831c13 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1232,7 +1232,7 @@ std::string e_move_result_to_string(e_move_result move_outcome) {
     return move_result_to_string[move_outcome];
 }
 
-int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc) {
+int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& place_ctx = g_vpr_ctx.placement();
 
@@ -1244,7 +1244,6 @@ int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc) {
             if (place_ctx.grid_blocks.block_at_location(loc) != EMPTY_BLOCK_ID) {
                 for (const auto& layer : compatible_layers) {
                     if (layer != free_layer) {
-                        loc.layer = layer;
                         if (place_ctx.grid_blocks.block_at_location(loc) == EMPTY_BLOCK_ID) {
                             free_layer = layer;
                             break;
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index d1804e65725..f8d69fc072a 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -347,6 +347,6 @@ int convert_phys_to_agent_blk_type(int phys_blk_type_index);
  */
 int get_num_agent_types();
 
-int find_free_layer(t_logical_block_type_ptr logical_block, t_pl_loc loc);
+int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc);
 
 #endif

From 335d3bd1bf33fb826ce9ea1b3abb11cbc583755e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 10 Jul 2023 19:08:31 -0400
Subject: [PATCH 044/257] incorporate layer num in hashing t_pl_loc

---
 vpr/src/base/vpr_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index b7606adb936..c1c44f065e3 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -757,6 +757,7 @@ struct hash<t_pl_loc> {
         std::size_t seed = std::hash<int>{}(v.x);
         vtr::hash_combine(seed, v.y);
         vtr::hash_combine(seed, v.sub_tile);
+        vtr::hash_combine(seed, v.layer);
         return seed;
     }
 };

From 476b879c0ff29b2284687b978674028c3c2b9b36 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 11 Jul 2023 08:01:20 -0400
Subject: [PATCH 045/257] fix the bug in UniformInterLayerMoveGenerator

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 99a7e778e7c..76e533b7612 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -32,12 +32,13 @@ e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_mov
     std::vector<int> candidate_layers;
     candidate_layers.reserve(compatible_layers.size() - 1);
 
-    for(const auto& layer_num : candidate_layers) {
+    for(const auto& layer_num : compatible_layers) {
         if(layer_num != from.layer) {
             candidate_layers.push_back(layer_num);
         }
     }
 
+    VTR_ASSERT(!candidate_layers.empty());
     int to_layer = candidate_layers[vtr::irand((int)candidate_layers.size() - 1)];
 
     t_pl_loc to = from;

From 667169c72bb35f190eb4d101c677626390654008 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jul 2023 17:37:56 -0400
Subject: [PATCH 046/257] add gaurd to make sure that if capnproto is not
 supported, it doesn't raise any bug

---
 vpr/src/route/router_lookahead_cost_map.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vpr/src/route/router_lookahead_cost_map.cpp b/vpr/src/route/router_lookahead_cost_map.cpp
index 9e3de711d9d..95b7554176e 100644
--- a/vpr/src/route/router_lookahead_cost_map.cpp
+++ b/vpr/src/route/router_lookahead_cost_map.cpp
@@ -397,6 +397,8 @@ std::pair<util::Cost_Entry, int> CostMap::get_nearby_cost_entry(const vtr::NdMat
  * the cost map data structures, exploiting the capnp serialization.
  */
 
+#ifdef VTR_ENABLE_CAPNPROTO
+
 static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) {
     out->delay = in.getDelay();
     out->congestion = in.getCongestion();
@@ -491,3 +493,15 @@ void CostMap::write(const std::string& file) const {
 
     writeMessageToFile(file, &builder);
 }
+
+#else
+
+void CostMap::read(const std::string& /*file*/) {
+    VPR_FATAL_ERROR("Read CostMap requires the support of capnp");
+}
+
+void CostMap::write(const std::string& /*file*/) const {
+    VPR_FATAL_ERROR("Write CostMap requires the support of capnp");
+}
+
+#endif

From 52ed8bacb95407a6e229634c2ed4cc4942459ea8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jul 2023 18:27:40 -0400
Subject: [PATCH 047/257] fix the layer num in
 add_high_fanout_route_tree_to_heap

---
 vpr/src/route/connection_router.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 225f4694bc8..29612e906a5 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -1082,8 +1082,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
     highfanout_bb.xmax = rr_graph_->node_xhigh(target_node_id);
     highfanout_bb.ymin = rr_graph_->node_ylow(target_node_id);
     highfanout_bb.ymax = rr_graph_->node_yhigh(target_node_id);
-    highfanout_bb.layer_min = rr_graph_->node_type(target_node_id);
-    highfanout_bb.layer_max = rr_graph_->node_type(target_node_id);
+    highfanout_bb.layer_min = rr_graph_->node_layer(target_node_id);
+    highfanout_bb.layer_max = rr_graph_->node_layer(target_node_id);
 
     //Add existing routing starting from the target bin.
     //If the target's bin has insufficient existing routing add from the surrounding bins

From 451f11aacc33b2c7b6c074408c65dbf411de6d98 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 12 Jul 2023 18:28:51 -0400
Subject: [PATCH 048/257] fix the router lookahead functions when capnp is not
 enabled

---
 vpr/src/route/router_lookahead_cost_map.cpp | 4 ++--
 vpr/src/route/router_lookahead_map.cpp      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/route/router_lookahead_cost_map.cpp b/vpr/src/route/router_lookahead_cost_map.cpp
index 95b7554176e..c00257a1ce4 100644
--- a/vpr/src/route/router_lookahead_cost_map.cpp
+++ b/vpr/src/route/router_lookahead_cost_map.cpp
@@ -497,11 +497,11 @@ void CostMap::write(const std::string& file) const {
 #else
 
 void CostMap::read(const std::string& /*file*/) {
-    VPR_FATAL_ERROR("Read CostMap requires the support of capnp");
+    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Read CostMap requires the support of capnp");
 }
 
 void CostMap::write(const std::string& /*file*/) const {
-    VPR_FATAL_ERROR("Write CostMap requires the support of capnp");
+    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Write CostMap requires the support of capnp");
 }
 
 #endif
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 638528881a5..e0119fe1cb6 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -1566,7 +1566,7 @@ void read_router_lookahead(const std::string& /*file*/) {
     VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::read " DISABLE_ERROR);
 }
 
-void DeltaDelayModel::write(const std::string& /*file*/) const {
+void write_router_lookahead(const std::string& file) {
     VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::write " DISABLE_ERROR);
 }
 

From d58c5539843d05791c4c0cb5ef036f5c23388284 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 13 Jul 2023 08:34:13 -0400
Subject: [PATCH 049/257] fix bounding box layer num in test_connection_router

---
 vpr/test/test_connection_router.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp
index 84893424f18..90f6379e4d4 100644
--- a/vpr/test/test_connection_router.cpp
+++ b/vpr/test/test_connection_router.cpp
@@ -34,7 +34,7 @@ static float do_one_route(int source_node,
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
     bounding_box.layer_min = 0;
-    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = router_opts.max_criticality;

From abae71b9eeebcba2e322dc9a51519860c13db06b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 13 Jul 2023 08:51:44 -0400
Subject: [PATCH 050/257] change a field name in noc_storage

---
 vpr/src/noc/noc_storage.cpp | 4 ++--
 vpr/src/noc/noc_storage.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index 5c08c79ef2b..0703bdd0e05 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -127,7 +127,7 @@ void NocStorage::set_device_grid_width(int grid_width) {
 
 void NocStorage::set_device_grid_spec(int grid_width, int grid_height) {
     device_grid_width = grid_width;
-    num_layer_blocks = grid_width * grid_height;
+    layer_num_blocks = grid_width * grid_height;
     return;
 }
 
@@ -231,7 +231,7 @@ NocLinkId NocStorage::get_parallel_link(NocLinkId current_link) const {
 
 int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const {
     // calculate the key value
-    return (num_layer_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x);
+    return (layer_num_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x);
 }
 
 void NocStorage::echo_noc(char* file_name) const {
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index 71e95b8f838..294c00ba313 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -138,7 +138,7 @@ class NocStorage {
      * 
      */
     int device_grid_width;
-    int num_layer_blocks;
+    int layer_num_blocks;
 
     // prevent "copying" of this object
     NocStorage(const NocStorage&) = delete;

From fffb10d928e36b7e97fca6d3bee9c4855fcba6c0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 13 Jul 2023 09:19:39 -0400
Subject: [PATCH 051/257] sometimes the sink_node passed to connection router
 is equal to -1 - consider that case in add_route_tree_to_heap

---
 vpr/src/route/connection_router.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 29612e906a5..61572f5045c 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -950,7 +950,7 @@ void ConnectionRouter<Heap>::add_route_tree_to_heap(
     /* Pre-order depth-first traversal */
     // IPINs and SINKS are not re_expanded
     if (rt_node.re_expand) {
-        if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node))) {
+        if ((target_node != OPEN) && !has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node))) {
             return;
         }
         add_route_tree_node_to_heap(rt_node,

From 1a30f13d3484cf2d0291ea7e80a70996f75827f1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 13 Jul 2023 09:23:26 -0400
Subject: [PATCH 052/257] fix calculate_all_path_delays_from_rr_node bounding
 box layer num - make format

---
 vpr/src/place/RL_agent_util.cpp               |  1 -
 vpr/src/place/move_utils.cpp                  |  2 --
 .../uniform_inter_layer_move_generator.cpp    |  6 ++---
 vpr/src/route/connection_router.cpp           | 23 ++++++++-----------
 vpr/src/route/route_common.cpp                |  1 -
 vpr/src/route/router_delay_profiling.cpp      |  2 +-
 6 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index 51c1b0826d9..ea7f7fd701b 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -61,7 +61,6 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
             num_2nd_state_avail_moves = NUM_PL_NONTIMING_MOVE_TYPES;
         }
 
-
         if (placer_opts.place_agent_algorithm == E_GREEDY) {
             std::unique_ptr<EpsilonGreedyAgent> karmed_bandit_agent1, karmed_bandit_agent2;
             //agent's 1st state
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 22586831c13..c808387d51a 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1098,7 +1098,6 @@ std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_bl
     const auto& compatible_layers = compressed_block_grid.get_layer_nums();
 
     for (const auto& layer_num : compatible_layers) {
-
         compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc_approx({grid_loc.x, grid_loc.y, layer_num});
     }
 
@@ -1128,7 +1127,6 @@ t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& com
                                               const t_physical_tile_loc& from_compressed_loc,
                                               const t_physical_tile_loc& target_compressed_loc,
                                               float rlim) {
-
     t_bb search_range;
 
     int min_cx, max_cx, min_cy, max_cy;
diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 76e533b7612..87204616908 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -25,15 +25,15 @@ e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_mov
 
     const auto& compatible_layers = block_compressed_grid.get_layer_nums();
 
-    if(compatible_layers.size() < 2) {
+    if (compatible_layers.size() < 2) {
         return e_create_move::ABORT;
     }
 
     std::vector<int> candidate_layers;
     candidate_layers.reserve(compatible_layers.size() - 1);
 
-    for(const auto& layer_num : compatible_layers) {
-        if(layer_num != from.layer) {
+    for (const auto& layer_num : compatible_layers) {
+        if (layer_num != from.layer) {
             candidate_layers.push_back(layer_num);
         }
     }
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 61572f5045c..822a24292fe 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -8,29 +8,25 @@ inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                                     const RRGraphView* rr_graph,
                                     RRNodeId from_node,
                                     RRNodeId sink_node) {
-
     // ASSUMPTION: Only OPINs can connect to other layers
 
     int sink_layer = rr_graph->node_layer(sink_node);
 
-    if (rr_graph->node_layer(from_node) == sink_layer ||
-        rr_graph->node_type(from_node) == SOURCE) {
+    if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE) {
         return true;
-    } else if (rr_graph->node_type(from_node) == CHANX ||
-               rr_graph->node_type(from_node) == CHANY ||
-               rr_graph->node_type(from_node) == IPIN) {
+    } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY || rr_graph->node_type(from_node) == IPIN) {
         return false;
     } else {
         VTR_ASSERT(rr_graph->node_type(from_node) == OPIN);
         auto edges = rr_nodes.edge_range(from_node);
 
-//        for (RREdgeId from_edge : edges) {
-//            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
-//            rr_nodes.prefetch_node(to_node);
-//
-//            int switch_idx = rr_nodes.edge_switch(from_edge);
-//            VTR_PREFETCH(&rr_switch_inf_[switch_idx], 0, 0);
-//        }
+        //        for (RREdgeId from_edge : edges) {
+        //            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
+        //            rr_nodes.prefetch_node(to_node);
+        //
+        //            int switch_idx = rr_nodes.edge_switch(from_edge);
+        //            VTR_PREFETCH(&rr_switch_inf_[switch_idx], 0, 0);
+        //        }
 
         for (RREdgeId from_edge : edges) {
             RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
@@ -40,7 +36,6 @@ inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
         }
         return false;
     }
-
 }
 
 inline static bool relevant_node_to_target(const RRGraphView* rr_graph,
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index e1f52d4d07f..246f5ee5131 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -895,7 +895,6 @@ t_bb load_net_route_bb(const Netlist<>& net_list,
         VTR_ASSERT(rr_graph.node_layer(sink_rr) >= 0);
         VTR_ASSERT(rr_graph.node_layer(sink_rr) <= device_ctx.grid.get_num_layers() - 1);
 
-
         xmin = std::min<int>(xmin, rr_graph.node_xlow(sink_rr));
         xmax = std::max<int>(xmax, rr_graph.node_xhigh(sink_rr));
         ymin = std::min<int>(ymin, rr_graph.node_ylow(sink_rr));
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index ca34f4e4ccb..ca69d83fe1f 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -129,7 +129,7 @@ std::vector<float> calculate_all_path_delays_from_rr_node(int src_rr_node,
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
     bounding_box.layer_min = 0;
-    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = 1.;

From 25278dd4a0232418b6aa1a2cc0d657ddf4b34964 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 18 Jul 2023 10:59:48 -0400
Subject: [PATCH 053/257] instead of choosing only among layers other that the
 current layer, choose layer among all compatible layers

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index 87204616908..ab2c73820a3 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -29,17 +29,7 @@ e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_mov
         return e_create_move::ABORT;
     }
 
-    std::vector<int> candidate_layers;
-    candidate_layers.reserve(compatible_layers.size() - 1);
-
-    for (const auto& layer_num : compatible_layers) {
-        if (layer_num != from.layer) {
-            candidate_layers.push_back(layer_num);
-        }
-    }
-
-    VTR_ASSERT(!candidate_layers.empty());
-    int to_layer = candidate_layers[vtr::irand((int)candidate_layers.size() - 1)];
+    int to_layer = compatible_layers[vtr::irand((int)compatible_layers.size() - 1)];
 
     t_pl_loc to = from;
     to.layer = to_layer;

From e340ead1473e083feccad03fb7c21bb363142a75 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 24 Jul 2023 11:46:42 -0400
Subject: [PATCH 054/257] prevent loading RR graph again if it is already
 loaded

---
 vpr/src/route/rr_graph.cpp | 75 +++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp
index d200404d14a..e0924cc26ac 100644
--- a/vpr/src/route/rr_graph.cpp
+++ b/vpr/src/route/rr_graph.cpp
@@ -673,46 +673,47 @@ void create_rr_graph(const t_graph_type graph_type,
     bool echo_enabled = getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH_INDEXED_DATA);
     const char* echo_file_name = getEchoFileName(E_ECHO_RR_GRAPH_INDEXED_DATA);
     bool load_rr_graph = !det_routing_arch->read_rr_graph_filename.empty();
-    if (load_rr_graph) {
-        if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) {
-            free_rr_graph();
 
-            load_rr_file(&mutable_device_ctx.rr_graph_builder,
-                         &mutable_device_ctx.rr_graph,
-                         device_ctx.physical_tile_types,
-                         segment_inf,
-                         &mutable_device_ctx.rr_indexed_data,
-                         &mutable_device_ctx.rr_rc_data,
-                         grid,
-                         device_ctx.arch_switch_inf,
-                         graph_type,
-                         device_ctx.arch,
-                         &mutable_device_ctx.chan_width,
-                         router_opts.base_cost_type,
-                         device_ctx.virtual_clock_network_root_idx,
-                         &det_routing_arch->wire_to_rr_ipin_switch,
-                         &det_routing_arch->wire_to_arch_ipin_switch_between_dice,
-                         det_routing_arch->read_rr_graph_filename.c_str(),
-                         &det_routing_arch->read_rr_graph_filename,
-                         router_opts.read_rr_edge_metadata,
-                         router_opts.do_check_rr_graph,
-                         echo_enabled,
-                         echo_file_name,
-                         is_flat);
-            if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) {
-                mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm,
-                                                                  router_opts.reorder_rr_graph_nodes_threshold,
-                                                                  router_opts.reorder_rr_graph_nodes_seed);
-            }
+    if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_graph.empty()) {
+        //No change in channel width, so skip re-building RR graph
+        if (is_flat && !device_ctx.rr_graph_is_flat) {
+            VTR_LOG("RR graph channel widths unchanged, intra-cluster resources should be added...\n");
+        } else {
+            VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n");
+            return;
         }
     } else {
-        if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_graph.empty()) {
-            //No change in channel width, so skip re-building RR graph
-            if (is_flat && !device_ctx.rr_graph_is_flat) {
-                VTR_LOG("RR graph channel widths unchanged, intra-cluster resources should be added...\n");
-            } else {
-                VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n");
-                return;
+        if (load_rr_graph) {
+            if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) {
+                free_rr_graph();
+
+                load_rr_file(&mutable_device_ctx.rr_graph_builder,
+                             &mutable_device_ctx.rr_graph,
+                             device_ctx.physical_tile_types,
+                             segment_inf,
+                             &mutable_device_ctx.rr_indexed_data,
+                             &mutable_device_ctx.rr_rc_data,
+                             grid,
+                             device_ctx.arch_switch_inf,
+                             graph_type,
+                             device_ctx.arch,
+                             &mutable_device_ctx.chan_width,
+                             router_opts.base_cost_type,
+                             device_ctx.virtual_clock_network_root_idx,
+                             &det_routing_arch->wire_to_rr_ipin_switch,
+                             &det_routing_arch->wire_to_arch_ipin_switch_between_dice,
+                             det_routing_arch->read_rr_graph_filename.c_str(),
+                             &det_routing_arch->read_rr_graph_filename,
+                             router_opts.read_rr_edge_metadata,
+                             router_opts.do_check_rr_graph,
+                             echo_enabled,
+                             echo_file_name,
+                             is_flat);
+                if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) {
+                    mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm,
+                                                                      router_opts.reorder_rr_graph_nodes_threshold,
+                                                                      router_opts.reorder_rr_graph_nodes_seed);
+                }
             }
         } else {
             free_rr_graph();

From f3df583f37e75edd56ce35a0898e9a7f9b7346bb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 7 Aug 2023 15:37:15 -0400
Subject: [PATCH 055/257] add assertion to find_free_layer

---
 vpr/src/place/move_utils.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index c808387d51a..f04bedc42df 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1236,6 +1236,7 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 
     // TODO: Compatible layer vector should be shuffled first, and then iterated through
     int free_layer = loc.layer;
+    VTR_ASSERT(loc.layer != OPEN);
     if (device_ctx.grid.get_num_layers() > 1) {
         const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();
         if (compatible_layers.size() > 1) {

From de10ebb5a64bf6545f78611c5ea1995e5fca3ea9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 8 Aug 2023 19:40:38 -0400
Subject: [PATCH 056/257] add a new data structure (t_2D_bb) that only shows a
 bb on one layer

---
 vpr/src/base/vpr_types.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index c1c44f065e3..c6d5ed32326 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -595,6 +595,24 @@ struct t_bb {
     int layer_max = OPEN;
 };
 
+struct t_2D_tbb {
+    t_2D_tbb() = default;
+    t_2D_tbb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_num_)
+        : xmin(xmin_)
+        , xmax(xmax_)
+        , ymin(ymin_)
+        , ymax(ymax_)
+        , layer_num (layer_num_){
+        VTR_ASSERT(xmax_ >= xmin_);
+        VTR_ASSERT(ymax_ >= ymin_);
+    }
+    int xmin = OPEN;
+    int xmax = OPEN;
+    int ymin = OPEN;
+    int ymax = OPEN;
+    int layer_num = OPEN;
+};
+
 /**
  * @brief An offset between placement locations (t_pl_loc)
  *

From 603a81693c90bd2035193901e29275c847dfa87e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 8 Aug 2023 19:43:38 -0400
Subject: [PATCH 057/257] change fields under PlacerMoveContext to take
 t_2D_tbb instead of t_bb - add num_sink_pin_layer to keep track of the number
 of pins for each net on each layer

---
 vpr/src/place/placer_context.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 23df961b144..9971699ece2 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -92,10 +92,13 @@ struct PlacerRuntimeContext : public Context {
 struct PlacerMoveContext : public Context {
   public:
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
-    vtr::vector<ClusterNetId, t_bb> bb_coords;
+    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
-    vtr::vector<ClusterNetId, t_bb> bb_num_on_edges;
+    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_num_on_edges;
+
+    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
+    vtr::vector<ClusterNetId, std::vector<int>> num_sink_pin_layer;
 
     // The first range limit calculated by the anneal
     float first_rlim;

From d26138a89e131100ea0a1ab4759c924f863bc010 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 8 Aug 2023 19:56:03 -0400
Subject: [PATCH 058/257] break the bb between layers - compute the cost for
 each layer

---
 vpr/src/place/place.cpp | 683 ++++++++++++++++++++++++++--------------
 1 file changed, 439 insertions(+), 244 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a21a95155b6..3b94674874c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -146,7 +146,8 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 
 /* The following arrays are used by the try_swap function for speed.   */
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
-static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> ts_bb_coord_new, ts_bb_edge_new;
+static vtr::vector<ClusterNetId, std::vector<int>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
 /* These file-scoped variables keep track of the number of swaps       *
@@ -324,9 +325,47 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
 static e_move_result assess_swap(double delta_c, double t);
 
-static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
-
-static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew);
+static void get_non_updateable_bb(ClusterNetId net_id,
+                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<int>& num_sink_layer);
+
+static void update_bb(ClusterNetId net_id,
+                      std::vector<t_2D_tbb>& bb_edge_new,
+                      std::vector<t_2D_tbb>& bb_coord_new,
+                      std::vector<int>& bb_pin_sink_count_new,
+                      t_physical_tile_loc pin_old_loc,
+                      t_physical_tile_loc pin_new_loc);
+
+static void update_bb_pin_sink_count(ClusterNetId net_id,
+                                     const t_physical_tile_loc& pin_old_loc,
+                                     const t_physical_tile_loc& pin_new_loc,
+                                     const std::vector<int>& curr_layer_pin_sink_count,
+                                     std::vector<int>& bb_pin_sink_count_new);
+
+static void update_bb_edges(ClusterNetId net_id,
+                            const t_physical_tile_loc& pin_old_loc,
+                            const t_physical_tile_loc& pin_new_loc,
+                            const std::vector<t_2D_tbb>& curr_bb_edge,
+                            const std::vector<t_2D_tbb>& curr_bb_coord,
+                            std::vector<t_2D_tbb>& bb_edge_new,
+                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<int>& bb_pin_sink_count_new);
+
+static void remove_block_from_bb_edge(ClusterNetId net_id,
+                                      std::vector<t_2D_tbb>& bb_edge_new,
+                                      std::vector<t_2D_tbb>& bb_coord_new,
+                                      std::vector<int>& bb_layer_pin_sink_count,
+                                      const int& old_num_block_on_edge,
+                                      const int& old_edge_coord,
+                                      int& new_num_block_on_edge,
+                                      int& new_edge_coord);
+
+static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
+                            const t_2D_tbb& bb_coord_old,
+                            const t_physical_tile_loc& old_pin_loc,
+                            const t_physical_tile_loc& new_pin_loc,
+                            t_2D_tbb& bb_edge_new,
+                            t_2D_tbb& bb_coord_new);
 
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
@@ -354,11 +393,16 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c
 
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static double get_net_cost(ClusterNetId net_id, t_bb* bb_ptr);
+static double get_net_cost(ClusterNetId net_id,
+                           const std::vector<t_2D_tbb>& bb_ptr,
+                           const std::vector<int>& layer_pin_sink_count);
 
-static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges);
+static void get_bb_from_scratch(ClusterNetId net_id,
+                                std::vector<t_2D_tbb>& num_on_edges,
+                                std::vector<t_2D_tbb>& coords,
+                                std::vector<int>& layer_pin_sink_count);
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr);
+static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector<t_2D_tbb>& bbptr);
 
 static void free_try_swap_arrays();
 
@@ -1372,6 +1416,7 @@ static void update_move_nets(int num_nets_affected) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
         place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
+        place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
             place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
 
@@ -1794,7 +1839,8 @@ static int find_affected_nets_and_update_costs(
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
         proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                 &ts_bb_coord_new[net_id]);
+                                                 ts_bb_coord_new[net_id],
+                                                 ts_layer_sink_pin_count[net_id]);
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
@@ -1832,7 +1878,9 @@ static void update_net_bb(const ClusterNetId net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net, &ts_bb_coord_new[net]);
+            get_non_updateable_bb(net,
+                                  ts_bb_coord_new[net],
+                                  ts_layer_sink_pin_count[net]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -1843,13 +1891,20 @@ static void update_net_bb(const ClusterNetId net,
         int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
 
         //Incremental bounding box update
-        update_bb(net, &ts_bb_coord_new[net], &ts_bb_edge_new[net],
-                  blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-                  blocks_affected.moved_blocks[iblk].old_loc.y
-                      + pin_height_offset,
-                  blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-                  blocks_affected.moved_blocks[iblk].new_loc.y
-                      + pin_height_offset);
+        t_physical_tile_loc pin_old_loc(
+            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
+            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
+            blocks_affected.moved_blocks[iblk].old_loc.layer);
+        t_physical_tile_loc pin_new_loc(
+            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
+            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
+            blocks_affected.moved_blocks[iblk].new_loc.layer);
+        update_bb(net,
+                  ts_bb_edge_new[net],
+                  ts_bb_coord_new[net],
+                  ts_layer_sink_pin_count[net],
+                  pin_old_loc,
+                  pin_new_loc);
     }
 }
 
@@ -2210,19 +2265,23 @@ static double comp_bb_cost(e_cost_methods method) {
              * so they can use a fast bounding box calculator.                    */
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
                 && method == NORMAL) {
-                get_bb_from_scratch(net_id, &place_move_ctx.bb_coords[net_id],
-                                    &place_move_ctx.bb_num_on_edges[net_id]);
+                get_bb_from_scratch(net_id,
+                                    place_move_ctx.bb_num_on_edges[net_id],
+                                    place_move_ctx.bb_coords[net_id],
+                                    place_move_ctx.num_sink_pin_layer[net_id]);
             } else {
                 get_non_updateable_bb(net_id,
-                                      &place_move_ctx.bb_coords[net_id]);
+                                      place_move_ctx.bb_coords[net_id],
+                                      place_move_ctx.num_sink_pin_layer[net_id]);
             }
 
             net_cost[net_id] = get_net_cost(net_id,
-                                            &place_move_ctx.bb_coords[net_id]);
+                                            place_move_ctx.bb_coords[net_id],
+                                            place_move_ctx.num_sink_pin_layer[net_id]);
             cost += net_cost[net_id];
             if (method == CHECK)
                 expected_wirelength += get_net_wirelength_estimate(net_id,
-                                                                   &place_move_ctx.bb_coords[net_id]);
+                                                                   place_move_ctx.bb_coords[net_id]);
         }
     }
 
@@ -2253,6 +2312,8 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
+    const int num_layers = device_ctx.grid.get_num_layers();
+
     init_placement_context();
 
     max_pins_per_clb = 0;
@@ -2296,8 +2357,9 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
-    place_move_ctx.bb_coords.resize(num_nets, t_bb());
-    place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
+    place_move_ctx.bb_coords.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+
+    place_move_ctx.bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
@@ -2356,8 +2418,11 @@ static void alloc_and_load_try_swap_structs() {
 
     size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
-    ts_bb_coord_new.resize(num_nets, t_bb());
-    ts_bb_edge_new.resize(num_nets, t_bb());
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+    ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, 0));
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2367,6 +2432,7 @@ static void alloc_and_load_try_swap_structs() {
 static void free_try_swap_structs() {
     vtr::release_memory(ts_bb_coord_new);
     vtr::release_memory(ts_bb_edge_new);
+    vtr::release_memory(ts_layer_sink_pin_count);
     vtr::release_memory(ts_nets_to_update);
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2377,13 +2443,26 @@ static void free_try_swap_structs() {
  * from only the block location information).  It updates both the       *
  * coordinate and number of pins on each edge information.  It           *
  * should only be called when the bounding box information is not valid. */
-static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges) {
-    int pnum, x, y, xmin, xmax, ymin, ymax;
-    int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
+static void get_bb_from_scratch(ClusterNetId net_id,
+                                std::vector<t_2D_tbb>& num_on_edges,
+                                std::vector<t_2D_tbb>& coords,
+                                std::vector<int>& layer_pin_sink_count) {
+    auto& device_ctx = g_vpr_ctx.device();
+    const int num_layers = device_ctx.grid.get_num_layers();
+    int pnum, x, y, layer;
+    std::vector<int> xmin(num_layers);
+    std::vector<int> xmax(num_layers);
+    std::vector<int> ymin(num_layers);
+    std::vector<int> ymax(num_layers);
+    std::vector<int> xmin_edge;
+    std::vector<int> xmax_edge;
+    std::vector<int> ymin_edge;
+    std::vector<int> ymax_edge;
+
+    std::vector<int> num_sink_pin_layer(num_layers, 0);
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
@@ -2397,18 +2476,23 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_
     x = max(min<int>(x, grid.width() - 2), 1);
     y = max(min<int>(y, grid.height() - 2), 1);
 
-    xmin = x;
-    ymin = y;
-    xmax = x;
-    ymax = y;
-    xmin_edge = 1;
-    ymin_edge = 1;
-    xmax_edge = 1;
-    ymax_edge = 1;
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        xmin[layer_num] = x;
+        ymin[layer_num] = y;
+        xmax[layer_num] = x;
+        ymax[layer_num] = y;
+        xmin_edge[layer_num] = 1;
+        ymin_edge[layer_num] = 1;
+        xmax_edge[layer_num] = 1;
+        ymax_edge[layer_num] = 1;
+    }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         pnum = tile_pin_index(pin_id);
+        layer = place_ctx.block_locs[bnum].loc.layer;
+        VTR_ASSERT(layer >= 0 && layer < num_layers);
+        num_sink_pin_layer[layer]++;
         x = place_ctx.block_locs[bnum].loc.x
             + physical_tile_type(bnum)->pin_width_offset[pnum];
         y = place_ctx.block_locs[bnum].loc.y
@@ -2424,44 +2508,47 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_
         x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
         y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
 
-        if (x == xmin) {
-            xmin_edge++;
+        if (x == xmin[layer]) {
+            xmin_edge[layer]++;
         }
-        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
-            xmax_edge++;
-        } else if (x < xmin) {
-            xmin = x;
-            xmin_edge = 1;
-        } else if (x > xmax) {
-            xmax = x;
-            xmax_edge = 1;
+        if (x == xmax[layer]) { /* Recall that xmin could equal xmax -- don't use else */
+            xmax_edge[layer]++;
+        } else if (x < xmin[layer]) {
+            xmin[layer] = x;
+            xmin_edge[layer] = 1;
+        } else if (x > xmax[layer]) {
+            xmax[layer] = x;
+            xmax_edge[layer] = 1;
         }
 
-        if (y == ymin) {
-            ymin_edge++;
+        if (y == ymin[layer]) {
+            ymin_edge[layer]++;
         }
-        if (y == ymax) {
-            ymax_edge++;
-        } else if (y < ymin) {
-            ymin = y;
-            ymin_edge = 1;
-        } else if (y > ymax) {
-            ymax = y;
-            ymax_edge = 1;
+        if (y == ymax[layer]) {
+            ymax_edge[layer]++;
+        } else if (y < ymin[layer]) {
+            ymin[layer] = y;
+            ymin_edge[layer] = 1;
+        } else if (y > ymax[layer]) {
+            ymax[layer] = y;
+            ymax_edge[layer] = 1;
         }
     }
 
     /* Copy the coordinates and number on edges information into the proper   *
      * structures.                                                            */
-    coords->xmin = xmin;
-    coords->xmax = xmax;
-    coords->ymin = ymin;
-    coords->ymax = ymax;
-
-    num_on_edges->xmin = xmin_edge;
-    num_on_edges->xmax = xmax_edge;
-    num_on_edges->ymin = ymin_edge;
-    num_on_edges->ymax = ymax_edge;
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        coords[layer].xmin = xmin[layer];
+        coords[layer].xmax = xmax[layer];
+        coords[layer].ymin = ymin[layer];
+        coords[layer].ymax = ymax[layer];
+
+        num_on_edges[layer].xmin = xmin_edge[layer];
+        num_on_edges[layer].xmax = xmax_edge[layer];
+        num_on_edges[layer].ymin = ymin_edge[layer];
+        num_on_edges[layer].ymax = ymax_edge[layer];
+    }
+    g_placer_ctx.mutable_move().num_sink_pin_layer[net_id] = num_sink_pin_layer;
 }
 
 static double wirelength_crossing_count(size_t fanout) {
@@ -2475,52 +2562,62 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) {
+static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector<t_2D_tbb>& bbptr) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
-    double ncost, crossing;
+    double ncost = 0.;
+    double crossing = 0.;
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& place_move_ctx = g_placer_ctx.move();
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        crossing = wirelength_crossing_count(
+            place_move_ctx.num_sink_pin_layer[net_id][layer_num]);
 
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
+        /* Could insert a check for xmin == xmax.  In that case, assume  *
      * connection will be made with no bends and hence no x-cost.    *
      * Same thing for y-cost.                                        */
 
-    /* Cost = wire length along channel * cross_count / average      *
+        /* Cost = wire length along channel * cross_count / average      *
      * channel capacity.   Do this for x, then y direction and add.  */
 
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing;
+        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing;
 
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing;
+        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing;
+    }
 
     return (ncost);
 }
 
-static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) {
+static double get_net_cost(ClusterNetId /* net_id */,
+                           const std::vector<t_2D_tbb>& bbptr,
+                           const std::vector<int>& layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    double ncost = 0.;
+    double crossing = 0.;
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
 
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
+        /* Could insert a check for xmin == xmax.  In that case, assume  *
+        * connection will be made with no bends and hence no x-cost.    *
+        * Same thing for y-cost.                                        */
 
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
+        /* Cost = wire length along channel * cross_count / average      *
+        * channel capacity.   Do this for x, then y direction and add.  */
 
-    ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1];
+        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing
+                * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1];
 
-    ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1];
+        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing
+                 * chany_place_cost_fac[bbptr[layer_num].xmax][bbptr[layer_num].xmin - 1];
+    }
 
     return (ncost);
 }
@@ -2532,47 +2629,60 @@ static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) {
  * Currently assumes channels on both sides of the CLBs forming the   *
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
-static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) {
+static void get_non_updateable_bb(ClusterNetId net_id,
+                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<int>& num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    int xmax, ymax, xmin, ymin, x, y;
+    auto& device_ctx = g_vpr_ctx.device();
+    int num_layers = device_ctx.grid.get_num_layers();
+    num_sink_layer = std::vector<int>(num_layers, 0);
+    std::vector<int> xmax(num_layers);
+    std::vector<int> ymax(num_layers);
+    std::vector<int> xmin(num_layers);
+    std::vector<int> ymin(num_layers);
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
+
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
     pnum = net_pin_to_tile_pin_index(net_id, 0);
 
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
+    int src_x = place_ctx.block_locs[bnum].loc.x
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
+    int src_y = place_ctx.block_locs[bnum].loc.y
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
 
-    xmin = x;
-    ymin = y;
-    xmax = x;
-    ymax = y;
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        xmin[layer_num] = src_x;
+        ymin[layer_num] = src_y;
+        xmax[layer_num] = src_x;
+        ymax[layer_num] = src_y;
+    }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         pnum = tile_pin_index(pin_id);
-        x = place_ctx.block_locs[bnum].loc.x
+        int x = place_ctx.block_locs[bnum].loc.x
             + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
+        int y = place_ctx.block_locs[bnum].loc.y
             + physical_tile_type(bnum)->pin_height_offset[pnum];
 
-        if (x < xmin) {
-            xmin = x;
-        } else if (x > xmax) {
-            xmax = x;
+        int layer_num = place_ctx.block_locs[bnum].loc.layer;
+        VTR_ASSERT(layer_num >= 0 && layer_num < num_layers);
+        num_sink_layer[layer_num]++;
+        if (x < xmin[layer_num]) {
+            xmin[layer_num] = x;
+        } else if (x > xmax[layer_num]) {
+            xmax[layer_num] = x;
         }
 
-        if (y < ymin) {
-            ymin = y;
-        } else if (y > ymax) {
-            ymax = y;
+        if (y < ymin[layer_num]) {
+            ymin[layer_num] = y;
+        } else if (y > ymax[layer_num]) {
+            ymax[layer_num] = y;
         }
     }
 
@@ -2583,14 +2693,20 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) {
      * channel immediately to the left of the bounding box, I want to    *
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
-
-    bb_coord_new->xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    bb_coord_new->xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    }
 }
 
-static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew) {
+static void update_bb(ClusterNetId net_id,
+                      std::vector<t_2D_tbb>& bb_edge_new,
+                      std::vector<t_2D_tbb>& bb_coord_new,
+                      std::vector<int>& bb_pin_sink_count_new,
+                      t_physical_tile_loc pin_old_loc,
+                      t_physical_tile_loc pin_new_loc) {
     /* Updates the bounding box of a net by storing its coordinates in    *
      * the bb_coord_new data structure and the number of blocks on each   *
      * edge in the bb_edge_new data structure.  This routine should only  *
@@ -2604,15 +2720,18 @@ static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new
      * The x and y coordinates are the pin's x and y coordinates.         */
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
-    const t_bb *curr_bb_edge, *curr_bb_coord;
+    const std::vector<t_2D_tbb> *curr_bb_edge, *curr_bb_coord;
+    const std::vector<int> *curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
-    xnew = max(min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    ynew = max(min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    xold = max(min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    yold = max(min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    int new_layer = pin_new_loc.layer_num;
+
+    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
     /* Check if the net had been updated before. */
     if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
@@ -2620,166 +2739,242 @@ static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new
         return;
     } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
-        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
         curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
+        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        curr_layer_pin_sink_count = &place_move_ctx.num_sink_pin_layer[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
-        curr_bb_coord = bb_coord_new;
-        curr_bb_edge = bb_edge_new;
+        curr_bb_edge = &bb_edge_new;
+        curr_bb_coord = &bb_coord_new;
+        curr_layer_pin_sink_count = &bb_pin_sink_count_new;
     }
 
     /* Check if I can update the bounding box incrementally. */
 
-    if (xnew < xold) { /* Move to left. */
-
-        /* Update the xmax fields for coordinates and number of edges first. */
+    update_bb_pin_sink_count(net_id,
+                             pin_old_loc,
+                             pin_new_loc,
+                             *curr_layer_pin_sink_count,
+                             bb_pin_sink_count_new);
+
+    update_bb_edges(net_id,
+                    pin_old_loc,
+                    pin_new_loc,
+                    *curr_bb_edge,
+                    *curr_bb_coord,
+                    bb_edge_new,
+                    bb_coord_new,
+                    bb_pin_sink_count_new);
 
-        if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
-            if (curr_bb_edge->xmax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new->xmax = curr_bb_edge->xmax - 1;
-                bb_coord_new->xmax = curr_bb_coord->xmax;
-            }
-        } else { /* Move to left, old postion was not at xmax. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
-            bb_edge_new->xmax = curr_bb_edge->xmax;
-        }
+    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        return;
+    }
 
-        /* Now do the xmin fields for coordinates and number of edges. */
+    add_block_to_bb((*curr_bb_edge)[new_layer],
+                    (*curr_bb_coord)[new_layer],
+                    pin_old_loc,
+                    pin_new_loc,
+                    bb_edge_new[new_layer],
+                    bb_coord_new[new_layer]);
 
-        if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
-            bb_coord_new->xmin = xnew;
-            bb_edge_new->xmin = 1;
-        } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
-            bb_coord_new->xmin = xnew;
-            bb_edge_new->xmin = curr_bb_edge->xmin + 1;
-        } else { /* Xmin unchanged. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
-            bb_edge_new->xmin = curr_bb_edge->xmin;
-        }
-        /* End of move to left case. */
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    }
+}
 
-    } else if (xnew > xold) { /* Move to right. */
+static void update_bb_pin_sink_count(ClusterNetId net_id,
+                                     const t_physical_tile_loc& pin_old_loc,
+                                     const t_physical_tile_loc& pin_new_loc,
+                                     const std::vector<int>& curr_layer_pin_sink_count,
+                                     std::vector<int>& bb_pin_sink_count_new) {
+    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0);
+    bb_pin_sink_count_new[pin_old_loc.layer_num] = curr_layer_pin_sink_count[pin_old_loc.layer_num] - 1;
+    bb_pin_sink_count_new[pin_new_loc.layer_num] = curr_layer_pin_sink_count[pin_new_loc.layer_num] + 1;
 
-        /* Update the xmin fields for coordinates and number of edges first. */
+}
 
-        if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
-            if (curr_bb_edge->xmin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+static void update_bb_edges(ClusterNetId net_id,
+                            const t_physical_tile_loc& pin_old_loc,
+                            const t_physical_tile_loc& pin_new_loc,
+                            const std::vector<t_2D_tbb>& curr_bb_edge,
+                            const std::vector<t_2D_tbb>& curr_bb_coord,
+                            std::vector<t_2D_tbb>& bb_edge_new,
+                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<int>& bb_pin_sink_count_new) {
+    int old_layer = pin_old_loc.layer_num;
+
+    if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmax) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
+            pin_new_loc.x < pin_old_loc.x) {
+            remove_block_from_bb_edge(net_id,
+                                      bb_edge_new,
+                                      bb_coord_new,
+                                      bb_pin_sink_count_new,
+                                      curr_bb_edge[old_layer].xmax,
+                                      curr_bb_coord[old_layer].xmax,
+                                      bb_edge_new[old_layer].xmax,
+                                      bb_coord_new[old_layer].xmax);
+            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
-            } else {
-                bb_edge_new->xmin = curr_bb_edge->xmin - 1;
-                bb_coord_new->xmin = curr_bb_coord->xmin;
             }
-        } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
-            bb_edge_new->xmin = curr_bb_edge->xmin;
-        }
-
-        /* Now do the xmax fields for coordinates and number of edges. */
-
-        if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
-            bb_coord_new->xmax = xnew;
-            bb_edge_new->xmax = 1;
-        } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
-            bb_coord_new->xmax = xnew;
-            bb_edge_new->xmax = curr_bb_edge->xmax + 1;
-        } else { /* Xmax unchanged. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
-            bb_edge_new->xmax = curr_bb_edge->xmax;
         }
-        /* End of move to right case. */
-
-    } else { /* xnew == xold -- no x motion. */
-        bb_coord_new->xmin = curr_bb_coord->xmin;
-        bb_coord_new->xmax = curr_bb_coord->xmax;
-        bb_edge_new->xmin = curr_bb_edge->xmin;
-        bb_edge_new->xmax = curr_bb_edge->xmax;
     }
 
-    /* Now account for the y-direction motion. */
-
-    if (ynew < yold) { /* Move down. */
-
-        /* Update the ymax fields for coordinates and number of edges first. */
-
-        if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
-            if (curr_bb_edge->ymax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+    if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmin) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
+            pin_new_loc.x > pin_old_loc.x) {
+            remove_block_from_bb_edge(net_id,
+                                      bb_edge_new,
+                                      bb_coord_new,
+                                      bb_pin_sink_count_new,
+                                      curr_bb_edge[old_layer].xmin,
+                                      curr_bb_coord[old_layer].xmin,
+                                      bb_edge_new[old_layer].xmin,
+                                      bb_coord_new[old_layer].xmin);
+            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
-            } else {
-                bb_edge_new->ymax = curr_bb_edge->ymax - 1;
-                bb_coord_new->ymax = curr_bb_coord->ymax;
             }
-        } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
-            bb_edge_new->ymax = curr_bb_edge->ymax;
         }
+    }
 
-        /* Now do the ymin fields for coordinates and number of edges. */
-
-        if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
-            bb_coord_new->ymin = ynew;
-            bb_edge_new->ymin = 1;
-        } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
-            bb_coord_new->ymin = ynew;
-            bb_edge_new->ymin = curr_bb_edge->ymin + 1;
-        } else { /* ymin unchanged. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
-            bb_edge_new->ymin = curr_bb_edge->ymin;
+    if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymax) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
+            pin_new_loc.y < pin_old_loc.y) {
+            remove_block_from_bb_edge(net_id,
+                                      bb_edge_new,
+                                      bb_coord_new,
+                                      bb_pin_sink_count_new,
+                                      curr_bb_edge[old_layer].ymax,
+                                      curr_bb_coord[old_layer].ymax,
+                                      bb_edge_new[old_layer].ymax,
+                                      bb_coord_new[old_layer].ymax);
+            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+                return;
+            }
         }
-        /* End of move down case. */
-
-    } else if (ynew > yold) { /* Moved up. */
-
-        /* Update the ymin fields for coordinates and number of edges first. */
+    }
 
-        if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
-            if (curr_bb_edge->ymin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+    if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymin) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
+            pin_new_loc.y > pin_old_loc.y) {
+            remove_block_from_bb_edge(net_id,
+                                      bb_edge_new,
+                                      bb_coord_new,
+                                      bb_pin_sink_count_new,
+                                      curr_bb_edge[old_layer].ymin,
+                                      curr_bb_coord[old_layer].ymin,
+                                      bb_edge_new[old_layer].ymin,
+                                      bb_coord_new[old_layer].ymin);
+            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
-            } else {
-                bb_edge_new->ymin = curr_bb_edge->ymin - 1;
-                bb_coord_new->ymin = curr_bb_coord->ymin;
             }
-        } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
-            bb_edge_new->ymin = curr_bb_edge->ymin;
         }
+    }
+
+}
+
+static void remove_block_from_bb_edge(ClusterNetId net_id,
+                                      std::vector<t_2D_tbb>& bb_edge_new,
+                                      std::vector<t_2D_tbb>& bb_coord_new,
+                                      std::vector<int>& bb_layer_pin_sink_count,
+                                      const int& old_num_block_on_edge,
+                                      const int& old_edge_coord,
+                                      int& new_num_block_on_edge,
+                                      int& new_edge_coord) {
+    if (old_num_block_on_edge == 1) {
+        get_bb_from_scratch(net_id,
+                            bb_edge_new,
+                            bb_coord_new,
+                            bb_layer_pin_sink_count);
+        bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+        return;
+    } else {
+        new_num_block_on_edge = old_num_block_on_edge - 1;
+        new_edge_coord = old_edge_coord;
+    }
 
-        /* Now do the ymax fields for coordinates and number of edges. */
+}
 
-        if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
-            bb_coord_new->ymax = ynew;
-            bb_edge_new->ymax = 1;
-        } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
-            bb_coord_new->ymax = ynew;
-            bb_edge_new->ymax = curr_bb_edge->ymax + 1;
+static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
+                            const t_2D_tbb& bb_coord_old,
+                            const t_physical_tile_loc& old_pin_loc,
+                            const t_physical_tile_loc& new_pin_loc,
+                            t_2D_tbb& bb_edge_new,
+                            t_2D_tbb& bb_coord_new) {
+    int xnew = new_pin_loc.x;
+    int xold = old_pin_loc.x;
+    int ynew = new_pin_loc.y;
+    int yold = old_pin_loc.y;
+
+    VTR_ASSERT(bb_edge_old.layer_num == bb_edge_new.layer_num);
+    VTR_ASSERT(bb_coord_old.layer_num == bb_coord_new.layer_num);
+    VTR_ASSERT(bb_edge_old.layer_num == bb_coord_old.layer_num);
+
+    if (xnew < xold) {
+        if (xnew < bb_coord_old.xmin) { /* Moved past xmin */
+            bb_coord_new.xmin = xnew;
+            bb_edge_new.xmin = 1;
+        } else if (xnew == bb_coord_old.xmin) { /* Moved to xmin */
+            bb_coord_new.xmin = xnew;
+            bb_edge_new.xmin = bb_edge_old.xmin + 1;
+        } else { /* Xmin unchanged. */
+            bb_coord_new.xmin = bb_coord_old.xmin;
+            bb_edge_new.xmin = bb_edge_old.xmin;
+        }
+    } else if (xnew > xold) {
+        if (xnew > bb_coord_old.xmax) { /* Moved past xmax. */
+            bb_coord_new.xmax = xnew;
+            bb_edge_new.xmax = 1;
+        } else if (xnew == bb_coord_old.xmax) { /* Moved to xmax */
+            bb_coord_new.xmax = xnew;
+            bb_edge_new.xmax = bb_edge_old.xmax + 1;
+        } else { /* Xmax unchanged. */
+            bb_coord_new.xmax = bb_coord_old.xmax;
+            bb_edge_new.xmax = bb_edge_old.xmax;
+        }
+    } else {
+        bb_coord_new.xmin = bb_coord_old.xmin;
+        bb_coord_new.xmax = bb_coord_old.xmax;
+        bb_edge_new.xmin = bb_edge_old.xmin;
+        bb_edge_new.xmax = bb_edge_old.xmax;
+    }
+
+    if (ynew < yold) {
+        if (ynew < bb_coord_old.ymin) { /* Moved past ymin */
+            bb_coord_new.ymin = ynew;
+            bb_edge_new.ymin = 1;
+        } else if (ynew == bb_coord_old.ymin) { /* Moved to ymin */
+            bb_coord_new.ymin = ynew;
+            bb_edge_new.ymin = bb_edge_old.ymin + 1;
+        } else { /* ymin unchanged. */
+            bb_coord_new.ymin = bb_coord_old.ymin;
+            bb_edge_new.ymin = bb_edge_old.ymin;
+        }
+        /* End of move down case. */
+    } else if (ynew > yold) {
+        if (ynew > bb_coord_old.ymax) { /* Moved past ymax. */
+            bb_coord_new.ymax = ynew;
+            bb_edge_new.ymax = 1;
+        } else if (ynew == bb_coord_old.ymax) { /* Moved to ymax */
+            bb_coord_new.ymax = ynew;
+            bb_edge_new.ymax = bb_edge_old.ymax + 1;
         } else { /* ymax unchanged. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
-            bb_edge_new->ymax = curr_bb_edge->ymax;
+            bb_coord_new.ymax = bb_coord_old.ymax;
+            bb_edge_new.ymax = bb_edge_old.ymax;
         }
         /* End of move up case. */
-
-    } else { /* ynew == yold -- no y motion. */
-        bb_coord_new->ymin = curr_bb_coord->ymin;
-        bb_coord_new->ymax = curr_bb_coord->ymax;
-        bb_edge_new->ymin = curr_bb_edge->ymin;
-        bb_edge_new->ymax = curr_bb_edge->ymax;
-    }
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
+    } else {
+        /* ynew == yold -- no change. */
+        bb_coord_new.ymin = bb_coord_old.ymin;
+        bb_coord_new.ymax = bb_coord_old.ymax;
+        bb_edge_new.ymin = bb_edge_old.ymin;
+        bb_edge_new.ymax = bb_edge_old.ymax;
     }
 }
 
+
+
 static void free_fast_cost_update() {
     chanx_place_cost_fac.clear();
     chany_place_cost_fac.clear();

From ba05fced484ffdce675eed09fac30835e54aca2b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 8 Aug 2023 19:56:53 -0400
Subject: [PATCH 059/257] make format

---
 vpr/src/base/vpr_types.h |  2 +-
 vpr/src/place/place.cpp  | 40 +++++++++++++++-------------------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index c6d5ed32326..0e6d6883a08 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -602,7 +602,7 @@ struct t_2D_tbb {
         , xmax(xmax_)
         , ymin(ymin_)
         , ymax(ymax_)
-        , layer_num (layer_num_){
+        , layer_num(layer_num_) {
         VTR_ASSERT(xmax_ >= xmin_);
         VTR_ASSERT(ymax_ >= ymin_);
     }
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3b94674874c..41a45974629 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2577,11 +2577,11 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector
             place_move_ctx.num_sink_pin_layer[net_id][layer_num]);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
+         * connection will be made with no bends and hence no x-cost.    *
+         * Same thing for y-cost.                                        */
 
         /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
+         * channel capacity.   Do this for x, then y direction and add.  */
 
         ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing;
 
@@ -2606,14 +2606,14 @@ static double get_net_cost(ClusterNetId /* net_id */,
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
-        * connection will be made with no bends and hence no x-cost.    *
-        * Same thing for y-cost.                                        */
+         * connection will be made with no bends and hence no x-cost.    *
+         * Same thing for y-cost.                                        */
 
         /* Cost = wire length along channel * cross_count / average      *
-        * channel capacity.   Do this for x, then y direction and add.  */
+         * channel capacity.   Do this for x, then y direction and add.  */
 
         ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing
-                * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1];
+                 * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1];
 
         ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing
                  * chany_place_cost_fac[bbptr[layer_num].xmax][bbptr[layer_num].xmin - 1];
@@ -2646,7 +2646,6 @@ static void get_non_updateable_bb(ClusterNetId net_id,
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
 
-
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
     pnum = net_pin_to_tile_pin_index(net_id, 0);
 
@@ -2666,9 +2665,9 @@ static void get_non_updateable_bb(ClusterNetId net_id,
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         pnum = tile_pin_index(pin_id);
         int x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
         int y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
 
         int layer_num = place_ctx.block_locs[bnum].loc.layer;
         VTR_ASSERT(layer_num >= 0 && layer_num < num_layers);
@@ -2720,8 +2719,8 @@ static void update_bb(ClusterNetId net_id,
      * The x and y coordinates are the pin's x and y coordinates.         */
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_tbb> *curr_bb_edge, *curr_bb_coord;
-    const std::vector<int> *curr_layer_pin_sink_count;
+    const std::vector<t_2D_tbb>*curr_bb_edge, *curr_bb_coord;
+    const std::vector<int>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -2791,7 +2790,6 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0);
     bb_pin_sink_count_new[pin_old_loc.layer_num] = curr_layer_pin_sink_count[pin_old_loc.layer_num] - 1;
     bb_pin_sink_count_new[pin_new_loc.layer_num] = curr_layer_pin_sink_count[pin_new_loc.layer_num] + 1;
-
 }
 
 static void update_bb_edges(ClusterNetId net_id,
@@ -2805,8 +2803,7 @@ static void update_bb_edges(ClusterNetId net_id,
     int old_layer = pin_old_loc.layer_num;
 
     if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmax) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
-            pin_new_loc.x < pin_old_loc.x) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.x < pin_old_loc.x) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2822,8 +2819,7 @@ static void update_bb_edges(ClusterNetId net_id,
     }
 
     if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmin) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
-            pin_new_loc.x > pin_old_loc.x) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.x > pin_old_loc.x) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2839,8 +2835,7 @@ static void update_bb_edges(ClusterNetId net_id,
     }
 
     if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymax) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
-            pin_new_loc.y < pin_old_loc.y) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.y < pin_old_loc.y) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2856,8 +2851,7 @@ static void update_bb_edges(ClusterNetId net_id,
     }
 
     if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymin) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num ||
-            pin_new_loc.y > pin_old_loc.y) {
+        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.y > pin_old_loc.y) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2871,7 +2865,6 @@ static void update_bb_edges(ClusterNetId net_id,
             }
         }
     }
-
 }
 
 static void remove_block_from_bb_edge(ClusterNetId net_id,
@@ -2893,7 +2886,6 @@ static void remove_block_from_bb_edge(ClusterNetId net_id,
         new_num_block_on_edge = old_num_block_on_edge - 1;
         new_edge_coord = old_edge_coord;
     }
-
 }
 
 static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
@@ -2973,8 +2965,6 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
     }
 }
 
-
-
 static void free_fast_cost_update() {
     chanx_place_cost_fac.clear();
     chany_place_cost_fac.clear();

From cde9f2887ba471fee78c44858c3972fae5ed0c07 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 10:38:06 -0400
Subject: [PATCH 060/257] add modified aman's arch for 3d experiments

---
 vtr_flow/arch/multi_die/aman_3d_coffe.xml   | 1597 +++++++++++++++++++
 vtr_flow/arch/multi_die/aman_3d_limited.xml | 1579 ++++++++++++++++++
 2 files changed, 3176 insertions(+)
 create mode 100644 vtr_flow/arch/multi_die/aman_3d_coffe.xml
 create mode 100644 vtr_flow/arch/multi_die/aman_3d_limited.xml

diff --git a/vtr_flow/arch/multi_die/aman_3d_coffe.xml b/vtr_flow/arch/multi_die/aman_3d_coffe.xml
new file mode 100644
index 00000000000..1825d967a18
--- /dev/null
+++ b/vtr_flow/arch/multi_die/aman_3d_coffe.xml
@@ -0,0 +1,1597 @@
+<architecture>
+  <models>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk" combinational_sink_ports="out"/>
+        <!-- control -->
+        <port name="addr" clock="clk" combinational_sink_ports="out"/>
+        <!-- address lines -->
+        <port name="data" clock="clk" combinational_sink_ports="out"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Fixed point multiplication.
+         ODIN infers these when * sign appears in RTL. -->
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_2">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    
+    <model name="mult_add_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+  </models>
+    <!-- Fixed point MAC inside DSP slices -->
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left" layer_offset="1">io.inpad</loc>
+          <loc side="left">io.outpad io.clock</loc>
+          <loc side="top" layer_offset="1">io.inpad</loc>
+          <loc side="top">io.outpad io.clock</loc>
+          <loc side="right" layer_offset="1">io.inpad</loc>
+          <loc side="right">io.outpad io.clock</loc>
+          <loc side="bottom" layer_offset="1">io.inpad</loc>
+          <loc side="bottom">io.outpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" height="1" width="1" area="27905">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I1" num_pins="10" equivalent="full"/>
+        <input name="I2" num_pins="10" equivalent="full"/>
+        <input name="I3" num_pins="10" equivalent="full"/>
+        <input name="I4" num_pins="10" equivalent="full"/>
+        <output name="O1" num_pins="2" equivalent="full"/>
+        <output name="O2" num_pins="2" equivalent="full"/>
+        <output name="O3" num_pins="2" equivalent="full"/>
+        <output name="O4" num_pins="2" equivalent="full"/>
+        <output name="O5" num_pins="2" equivalent="full"/>
+        <output name="O6" num_pins="2" equivalent="full"/>
+        <output name="O7" num_pins="2" equivalent="full"/>
+        <output name="O8" num_pins="2" equivalent="full"/>
+        <output name="O9" num_pins="2" equivalent="full"/>
+        <output name="O10" num_pins="2" equivalent="full"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.2" out_type="frac" out_val="0.025"/>
+        <!-- Two sided connectivity CLB architecture--> 
+        <pinlocations pattern="custom">
+          <loc side="right">clb.I1 clb.I3 clb.clk</loc>
+          <loc side="right" layer_offset="1">clb.O1 clb.O2 clb.O3 clb.O4 clb.O5</loc>
+          <loc side="bottom">clb.I2 clb.I4 clb.clk</loc>
+          <loc side="bottom" layer_offset="1">clb.O6 clb.O7 clb.O8 clb.O9 clb.O10</loc>    
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="dsp_top" height="1" width="1" area="253779">
+      <sub_tile name="dsp_top">
+        <equivalent_sites>
+          <site pb_type="dsp_top" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="reset" num_pins="1" is_non_clock_global="true"/>
+        <input name="dsp_I1" num_pins="37" />
+        <input name="dsp_I2" num_pins="37" />
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <!-- clock pins and chain ports do not connect to local routing -->
+          <fc_override port_name="clk" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainout" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="custom">
+            <loc side="left">dsp_top.dsp_I1 dsp_top.reset</loc>
+            <loc side="right">dsp_top.dsp_I2 dsp_top.clk</loc>
+            <loc side="top">dsp_top.chainin dsp_top.scanin</loc>
+            <loc side="bottom">dsp_top.chainout dsp_top.scanout</loc>
+            <loc side="right" layer_offset="1">dsp_top.result[36:0]</loc>
+            <loc side="left" layer_offset="1">dsp_top.result[73:37]</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="1" width="1" area="137668">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>  
+        <pinlocations pattern="custom">
+          <loc side="top"> memory.addr1[0] memory.addr1[8] memory.addr2[5] memory.data[2] memory.data[10] memory.data[18] memory.data[26] memory.data[34] memory.clk memory.addr1[1] memory.addr1[9] memory.addr2[6] memory.data[3] memory.data[11] memory.data[19] memory.data[27] memory.data[35]</loc>
+          <loc side="right"> memory.addr1[2] memory.addr1[10] memory.addr2[7] memory.data[4] memory.data[12] memory.data[20] memory.data[28] memory.data[36] memory.addr1[3] memory.addr2[0] memory.addr2[8] memory.data[5] memory.data[13] memory.data[21] memory.data[29] memory.data[37]</loc>
+          <loc side="bottom"> memory.addr1[4] memory.addr2[1] memory.addr2[9] memory.data[6] memory.data[14] memory.data[22] memory.data[30] memory.data[38] memory.addr1[5] memory.addr2[2] memory.addr2[10] memory.data[7] memory.data[15] memory.data[23] memory.data[31] memory.data[39]</loc>
+          <loc side="left" > memory.addr1[6] memory.addr2[3] memory.data[0] memory.data[8] memory.data[16] memory.data[24] memory.data[32] memory.we1 memory.addr1[7] memory.addr2[4] memory.data[1] memory.data[9] memory.data[17] memory.data[25] memory.data[33] memory.we2</loc>
+
+          <loc side="top" layer_offset="1"> memory.out[0] memory.out[8] memory.out[16] memory.out[24] memory.out[32] memory.out[1] memory.out[9] memory.out[17] memory.out[25] memory.out[33]</loc>
+          <loc side="right" layer_offset="1"> memory.out[2] memory.out[10] memory.out[18] memory.out[26] memory.out[34] memory.out[3] memory.out[11] memory.out[19] memory.out[27] memory.out[35]</loc>
+          <loc side="bottom" layer_offset="1"> memory.out[4] memory.out[12] memory.out[20] memory.out[28] memory.out[36] memory.out[5] memory.out[13] memory.out[21] memory.out[29] memory.out[37]</loc>
+          <loc side="left" layer_offset="1"> memory.out[6] memory.out[14] memory.out[22] memory.out[30] memory.out[38] memory.out[7] memory.out[15] memory.out[23] memory.out[31] memory.out[39]</loc>
+
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="tsv_hole" height="2" width="2" area="137668">
+      <sub_tile name="tsv_hole">
+        <equivalent_sites>
+          <site pb_type="tsv_hole"/>
+        </equivalent_sites>
+        <input name="in" num_pins="1"/>
+        <output name="out" num_pins="1"/>
+        <fc in_type="abs" in_val="0" out_type="abs" out_val="0"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <layout>
+    <!-- Physical descriptions begin -->
+    <fixed_layout name="coffe_7nm" width="328" height="288">
+      <layer die="0">
+        <perimeter type="io" priority="101"/>
+      
+        <corners type="EMPTY" priority="102"/>
+
+        <fill type="clb" priority="10"/>
+
+        <col type="memory" startx="11" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="25" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="37" starty="1" repeatx="41" priority="20"/>
+
+        <col type="dsp_top" startx="18" starty="1" repeatx="41" priority="20"/>
+        <col type="dsp_top" startx="31" starty="1" repeatx="41" priority="20"/>
+
+        <!-- PW -->
+        <col type="tsv_hole" startx="8" starty="5" repeatx="13" incry="12" priority="103"/>
+        
+        <!-- GND -->
+        <col type="tsv_hole" startx="14" starty="11" repeatx="13" incry="12" priority="103"/>
+
+      </layer>
+      <layer die="1">
+        <perimeter type="io" priority="101"/>
+      
+        <corners type="EMPTY" priority="102"/>
+
+        <fill type="clb" priority="10"/>
+
+        <col type="memory" startx="11" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="25" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="37" starty="1" repeatx="41" priority="20"/>
+
+        <col type="dsp_top" startx="18" starty="1" repeatx="41" priority="20"/>
+        <col type="dsp_top" startx="31" starty="1" repeatx="41" priority="20"/>
+        
+      </layer>
+    </fixed_layout>
+  </layout>
+  <device>
+    <sizing R_minW_nmos="13090" R_minW_pmos="19086.83"/>
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="custom"/>
+    <connection_block input_switch_name="ipin_cblock" input_inter_die_switch_name="die_connection"/>
+  </device>
+  <switchlist>
+    <switch type="mux" name="L4_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="9.877e-11" mux_trans_size="2.6482996805637553" buf_size="18.744014602932605"/>
+    <switch type="mux" name="L4_inter_layer_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="17.177e-11" mux_trans_size="2.6482996805637553" buf_size="18.744014602932605"/>
+    <!-- Delay of L16 driver is scaled from L4 by a factor of 1.5x (based on numbers from the Titan Stratix IV architecture file)
+   Area numbers will not be totally accurate because of the same buf_size -->
+    <switch type="mux" name="L16_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="2.016e-10" mux_trans_size="3.1851297470059468" buf_size="39.327334265524485"/>
+    <switch type="mux" name="L16_inter_layer_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="2.746e-10" mux_trans_size="3.1851297470059468" buf_size="39.327334265524485"/>
+    <switch type="mux" name="ipin_cblock" R="0.0" Cout="0.0" Cin="0.0" Tdel="5.636e-11" mux_trans_size="2.008" buf_size="9.624436045683868"/>
+    <switch type="mux" name="die_connection" R="0.0" Cout="0.0" Cin="0.0" Tdel="130e-12" mux_trans_size="1.508" buf_size="11.71"/>
+  </switchlist>
+  <segmentlist>
+    <segment name="L4" freq="280" length="4" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L4_driver"/>
+      <mux_inter_die name="L4_inter_layer_driver"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+    <segment name="L16" freq="40" length="16" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L16_driver"/>
+      <mux_inter_die name="L16_inter_layer_driver"/>
+      <!-- Vias from the top of the metal stack (global layers, where the long wires are 
+           implemented) down to the middle/bottom of the metal stack (semi-global layers, 
+           where the short wires are implemented) are expensive and restrictive.
+           As a result Startix IV only places long wire switch blocks every 4 LABs -->
+      <sb type="pattern">1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1</sb>
+      <!-- For the same reasons, long wires do not connect to block pins in Stratix IV -->
+      <cb type="pattern">0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <!-- Direct connect from one DSP to the DSP directly below it -->
+    <direct name="dsp_out_chain" from_pin="dsp_top.chainout" from_side="bottom" to_pin="dsp_top.chainin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+    <direct name="dsp_in_chain" from_pin="dsp_top.scanout" from_side="bottom" to_pin="dsp_top.scanin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+  </directlist>
+
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+       Delays below come from Ian Kuon. They are small, so they should be interpreted as
+       the delays to and from registers in the I/O (and generally I/Os are registered 
+       today and that is when you timing analyze them.
+       -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <pb_type name="clb">
+      <input name="I1" num_pins="10" equivalent="full"/>
+      <input name="I2" num_pins="10" equivalent="full"/>
+      <input name="I3" num_pins="10" equivalent="full"/>
+      <input name="I4" num_pins="10" equivalent="full"/>
+      <output name="O1" num_pins="2" equivalent="full"/>
+      <output name="O2" num_pins="2" equivalent="full"/>
+      <output name="O3" num_pins="2" equivalent="full"/>
+      <output name="O4" num_pins="2" equivalent="full"/>
+      <output name="O5" num_pins="2" equivalent="full"/>
+      <output name="O6" num_pins="2" equivalent="full"/>
+      <output name="O7" num_pins="2" equivalent="full"/>
+      <output name="O8" num_pins="2" equivalent="full"/>
+      <output name="O9" num_pins="2" equivalent="full"/>
+      <output name="O10" num_pins="2" equivalent="full"/>
+      <clock name="clk" num_pins="1"/>  <!-- Basic logic element definition -->
+      <pb_type name="fle" num_pb="10">
+        <input name="in_A" num_pins="1"/>
+        <input name="in_B" num_pins="1"/>
+        <input name="in_C" num_pins="1"/>
+        <input name="in_D" num_pins="1"/>
+        <input name="in_E" num_pins="1"/>
+        <input name="in_F" num_pins="1"/>
+        <output name="out_local" num_pins="2"/>
+        <output name="out_routing" num_pins="2"/>
+        <clock name="clk" num_pins="1"/> 
+        <mode name="n1_lut6">
+          <pb_type name="ble6" num_pb="1">
+            <input name="in_A" num_pins="1"/>
+            <input name="in_B" num_pins="1"/>
+            <input name="in_C" num_pins="1"/>
+            <input name="in_D" num_pins="1"/>
+            <input name="in_E" num_pins="1"/>
+            <input name="in_F" num_pins="1"/>
+            <output name="out_local" num_pins="1"/>
+            <output name="out_routing" num_pins="2"/>
+            <clock name="clk" num_pins="1"/> 
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <!-- We define the LUT delays on the LUT pins instead of through the LUT -->
+              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                 0
+                 0
+                 0
+                 0
+                 0
+                 0
+              </delay_matrix>
+            </pb_type>
+            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+              <input name="D" num_pins="1" port_class="D"/>
+              <output name="Q" num_pins="1" port_class="Q"/>
+              <clock name="clk" num_pins="1" port_class="clock"/>
+              <T_setup value="1.891e-11" port="ff.D" clock="clk"/>
+              <T_clock_to_Q max="6.032e-11" port="ff.Q" clock="clk"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct0" input="ble6.in_A" output="lut6.in[0:0]">
+                <delay_constant max="1.1287999999999999e-10" in_port="ble6.in_A" out_port="lut6.in[0:0]" />
+              </direct>
+              <direct name="direct1" input="ble6.in_B" output="lut6.in[1:1]">
+                <delay_constant max="1.1072500000000001e-10" in_port="ble6.in_B" out_port="lut6.in[1:1]" />
+              </direct>
+              <direct name="direct3" input="ble6.in_D" output="lut6.in[3:3]">
+                <delay_constant max="8.1212e-11" in_port="ble6.in_D" out_port="lut6.in[3:3]" />
+              </direct>
+              <direct name="direct4" input="ble6.in_E" output="lut6.in[4:4]">
+                <delay_constant max="7.961e-11" in_port="ble6.in_E" out_port="lut6.in[4:4]" />
+              </direct>
+              <direct name="direct5" input="ble6.in_F" output="lut6.in[5:5]">
+                <delay_constant max="4.9300999999999996e-11" in_port="ble6.in_F" out_port="lut6.in[5:5]" />
+              </direct>
+              <!--Clock -->
+              <direct name="direct6" input="ble6.clk" output="ff.clk"/>
+              <!-- Register feedback mux -->   
+              <mux name="mux1" input="ble6.in_C ff.Q" output="lut6.in[2:2]">
+                <delay_constant max="1.1347e-10" in_port="ble6.in_C" out_port="lut6.in[2:2]" />
+                <delay_constant max="1.1347e-10" in_port="ff.Q" out_port="lut6.in[2:2]" />  
+              </mux>
+              <!-- FF input selection mux -->
+              <mux name="2" input="lut6.out ble6.in_C" output="ff.D">
+                <delay_constant max="1.74588e-11" in_port="lut6.out" out_port="ff.D" />
+                <delay_constant max="1.74588e-11" in_port="ble6.in_C" out_port="ff.D" />
+              </mux>
+              <!-- BLE output (local) -->
+              <mux name="mux3" input="ff.Q lut6.out" output="ble6.out_local">
+                <delay_constant max="1.346e-10" in_port="ff.Q" out_port="ble6.out_local" />
+                <delay_constant max="1.346e-10" in_port="lut6.out" out_port="ble6.out_local" />
+              </mux>
+              <!-- BLE output (routing 1) --> 
+              <mux name="mux4" input="ff.Q lut6.out" output="ble6.out_routing[0:0]">
+                <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble6.out_routing[0:0]" />
+                <delay_constant max="3.771e-11" in_port="lut6.out" out_port="ble6.out_routing[0:0]" />
+              </mux>
+              <!-- BLE output (routing 2) --> 
+              <mux name="mux5" input="ff.Q lut6.out" output="ble6.out_routing[1:1]">
+                <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble6.out_routing[1:1]" />
+                <delay_constant max="3.771e-11" in_port="lut6.out" out_port="ble6.out_routing[1:1]" />
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in_A" output="ble6.in_A"/>
+            <direct name="direct2" input="fle.in_B" output="ble6.in_B"/>
+            <direct name="direct3" input="fle.in_C" output="ble6.in_C"/>
+            <direct name="direct4" input="fle.in_D" output="ble6.in_D"/>
+            <direct name="direct5" input="fle.in_E" output="ble6.in_E"/>
+            <direct name="direct6" input="fle.in_F" output="ble6.in_F"/>
+            <direct name="direct7" input="ble6.out_local" output="fle.out_local[0:0]"/>
+            <direct name="direct8" input="ble6.out_routing" output="fle.out_routing"/>
+            <direct name="direct9" input="fle.clk" output="ble6.clk"/>
+          </interconnect>
+        </mode>  
+        <mode name="n2_lut5">
+          <pb_type name="lut5inter" num_pb="1">
+            <input name="in_A" num_pins="1"/>
+            <input name="in_B" num_pins="1"/>
+            <input name="in_C" num_pins="1"/>
+            <input name="in_D" num_pins="1"/>
+            <input name="in_E" num_pins="1"/>
+            <output name="out_local" num_pins="2"/>
+            <output name="out_routing" num_pins="2"/>
+            <clock name="clk" num_pins="1"/> 
+            <pb_type name="ble5" num_pb="2">
+              <input name="in_A" num_pins="1"/>
+              <input name="in_B" num_pins="1"/>
+              <input name="in_C" num_pins="1"/>
+              <input name="in_D" num_pins="1"/>
+              <input name="in_E" num_pins="1"/>
+              <output name="out_local" num_pins="1"/>
+              <output name="out_routing" num_pins="1"/>
+              <clock name="clk" num_pins="1"/> 
+              <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <!-- We define the LUT delays on the LUT pins instead of through the LUT -->
+                <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                   0
+                   0
+                   0
+                   0
+                   0
+                </delay_matrix>
+              </pb_type>
+              <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                <input name="D" num_pins="1" port_class="D"/>
+                <output name="Q" num_pins="1" port_class="Q"/>
+                <clock name="clk" num_pins="1" port_class="clock"/>
+                <T_setup value="1.891e-11" port="ff.D" clock="clk"/>
+                <T_clock_to_Q max="6.032e-11" port="ff.Q" clock="clk"/>
+              </pb_type>
+              <interconnect>
+                <direct name="direct0" input="ble5.in_A" output="lut5.in[0:0]">
+                  <delay_constant max="1.1287999999999999e-10" in_port="ble5.in_A" out_port="lut5.in[0:0]" />
+                </direct>
+                <direct name="direct1" input="ble5.in_B" output="lut5.in[1:1]">
+                  <delay_constant max="1.1072500000000001e-10" in_port="ble5.in_B" out_port="lut5.in[1:1]" />
+                </direct>
+                <direct name="direct3" input="ble5.in_D" output="lut5.in[3:3]">
+                  <delay_constant max="8.1212e-11" in_port="ble5.in_D" out_port="lut5.in[3:3]" />
+                </direct>
+                <direct name="direct4" input="ble5.in_E" output="lut5.in[4:4]">
+                  <delay_constant max="7.961e-11" in_port="ble5.in_E" out_port="lut5.in[4:4]" />
+                </direct>
+                  <!--Clock -->
+                <direct name="direct5" input="ble5.clk" output="ff.clk"/>
+                <!-- Register feedback mux -->   
+                <mux name="mux1" input="ble5.in_C ff.Q" output="lut5.in[2:2]">
+                  <delay_constant max="1.1347e-10" in_port="ble5.in_C" out_port="lut5.in[2:2]" />
+                  <delay_constant max="1.1347e-10" in_port="ff.Q" out_port="lut5.in[2:2]" />  
+                </mux>
+                <!-- FF input selection mux -->
+                <mux name="2" input="lut5.out ble5.in_C" output="ff.D">
+                  <delay_constant max="1.74588e-11" in_port="lut5.out" out_port="ff.D" />
+                  <delay_constant max="1.74588e-11" in_port="ble5.in_C" out_port="ff.D" />
+                </mux>
+                <!-- BLE output (local) -->
+                <mux name="mux3" input="ff.Q lut5.out" output="ble5.out_local">
+                  <delay_constant max="1.346e-10" in_port="ff.Q" out_port="ble5.out_local" />
+                  <delay_constant max="1.346e-10" in_port="lut5.out" out_port="ble5.out_local" />
+                </mux>
+                <!-- BLE output (routing 1) --> 
+                <mux name="mux4" input="ff.Q lut5.out" output="ble5.out_routing[0:0]">
+                  <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble5.out_routing[0:0]" />
+                  <delay_constant max="3.771e-11" in_port="lut5.out" out_port="ble5.out_routing[0:0]" />
+                </mux>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="lut5inter.in_A" output="ble5[0:0].in_A"/>
+              <direct name="direct2" input="lut5inter.in_B" output="ble5[0:0].in_B"/>
+              <direct name="direct3" input="lut5inter.in_C" output="ble5[0:0].in_C"/>
+              <direct name="direct4" input="lut5inter.in_D" output="ble5[0:0].in_D"/>
+              <direct name="direct5" input="lut5inter.in_E" output="ble5[0:0].in_E"/>
+              <direct name="direct6" input="lut5inter.in_A" output="ble5[1:1].in_A"/>
+              <direct name="direct7" input="lut5inter.in_B" output="ble5[1:1].in_B"/>
+              <direct name="direct8" input="lut5inter.in_C" output="ble5[1:1].in_C"/>
+              <direct name="direct9" input="lut5inter.in_D" output="ble5[1:1].in_D"/>
+              <direct name="direct10" input="lut5inter.in_E" output="ble5[1:1].in_E"/>
+              <direct name="direct11" input="ble5[1:0].out_local" output="lut5inter.out_local"/>
+              <direct name="direct12" input="ble5[1:0].out_routing" output="lut5inter.out_routing"/>
+              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/> 
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in_A" output="lut5inter.in_A"/>
+            <direct name="direct2" input="fle.in_B" output="lut5inter.in_B"/>
+            <direct name="direct3" input="fle.in_C" output="lut5inter.in_C"/>
+            <direct name="direct4" input="fle.in_D" output="lut5inter.in_D"/>
+            <direct name="direct5" input="fle.in_E" output="lut5inter.in_E"/>
+            <direct name="direct7" input="lut5inter.out_local" output="fle.out_local"/>
+            <direct name="direct8" input="lut5inter.out_routing" output="fle.out_routing"/>
+            <direct name="direct9" input="fle.clk" output="lut5inter.clk"/>
+          </interconnect>
+        </mode> 
+        </pb_type>
+        <interconnect>
+        <!-- 50% sparsely populated local routing -->
+        <complete name="lutA" input="clb.I4 clb.I3 fle[1:0].out_local fle[3:2].out_local fle[8:8].out_local" output="fle[9:0].in_A">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_A" />
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_A" />
+          </complete>
+        <complete name="lutB" input="clb.I3 clb.I2 fle[3:2].out_local fle[5:4].out_local fle[9:9].out_local" output="fle[9:0].in_B">
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_B" />
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_B" />
+          </complete>
+        <complete name="lutC" input="clb.I2 clb.I1 fle[5:4].out_local fle[7:6].out_local fle[8:8].out_local" output="fle[9:0].in_C">
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_C" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_C" />
+          </complete>
+        <complete name="lutD" input="clb.I4 clb.I2 fle[1:0].out_local fle[5:4].out_local fle[9:9].out_local" output="fle[9:0].in_D">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_D" />
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_D" />
+          </complete>
+        <complete name="lutE" input="clb.I3 clb.I1 fle[3:2].out_local fle[7:6].out_local fle[8:8].out_local" output="fle[9:0].in_E">
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_E" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_E" />
+          </complete>
+        <complete name="lutF" input="clb.I4 clb.I1 fle[1:0].out_local fle[7:6].out_local fle[9:9].out_local" output="fle[9:0].in_F">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_F" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_F" />
+          </complete>
+          <complete name="clks" input="clb.clk" output="fle[9:0].clk">
+          </complete>
+          <!-- Direct connections to CLB outputs -->
+          <direct name="clbouts1" input="fle[0:0].out_routing" output="clb.O1"/>
+          <direct name="clbouts2" input="fle[1:1].out_routing" output="clb.O2"/>
+          <direct name="clbouts3" input="fle[2:2].out_routing" output="clb.O3"/>
+          <direct name="clbouts4" input="fle[3:3].out_routing" output="clb.O4"/>
+          <direct name="clbouts5" input="fle[4:4].out_routing" output="clb.O5"/>
+          <direct name="clbouts6" input="fle[5:5].out_routing" output="clb.O6"/>
+          <direct name="clbouts7" input="fle[6:6].out_routing" output="clb.O7"/>
+          <direct name="clbouts8" input="fle[7:7].out_routing" output="clb.O8"/>
+          <direct name="clbouts9" input="fle[8:8].out_routing" output="clb.O9"/>
+          <direct name="clbouts10" input="fle[9:9].out_routing" output="clb.O10"/>
+        </interconnect>
+      </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+
+    <!-- Define DSP slice begin -->
+    <pb_type name="dsp_top">
+      <input name="reset" num_pins="1" is_non_clock_global="true"/>
+      <input name="dsp_I1" num_pins="37" />
+      <input name="dsp_I2" num_pins="37" />
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+    <pb_type name="dsp" num_pb="1">
+      <input name="reset" num_pins="1"/>
+      <input name="dsp_I1" num_pins="37"/>
+      <input name="dsp_I2" num_pins="37"/>
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+      <pb_type name="dsp_pb" num_pb="1">
+        <input name="reset" num_pins="1"/>
+        <input name="datain" num_pins="74"/>
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+
+        <!-- fixed-point multiplier mode (1 27x27 multiplier) result = ax*ay -->
+        <mode name="one_mult_27x27">
+          <pb_type name="one_mult_27x27" num_pb="1">
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <pb_type name="mult_27x27" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="27"/>
+              <input name="b" num_pins="27"/>
+              <output name="out" num_pins="54"/>
+              <delay_constant max="1.667e-9" in_port="mult_27x27.a" out_port="mult_27x27.out"/>
+              <delay_constant max="1.667e-9" in_port="mult_27x27.b" out_port="mult_27x27.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="one_mult_27x27.a" output="mult_27x27.a">
+              </direct>
+              <direct name="b2b" input="one_mult_27x27.b" output="mult_27x27.b">
+              </direct>
+              <direct name="out2out" input="mult_27x27.out" output="one_mult_27x27.out">
+              </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a" input="dsp_pb.datain[26:0]" output="one_mult_27x27.a">
+            </direct>
+            <direct name="datain2b" input="dsp_pb.datain[53:27]" output="one_mult_27x27.b">
+            </direct>
+            <direct name="out2dataout" input="one_mult_27x27.out" output="dsp_pb.result[53:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier mode (2 18x19 multipliers) result[some:bits] = ax*ay, result[other:bits] = bx*by -->
+        <mode name="two_mult_18x19">
+          <pb_type name="two_mult_18x19" num_pb="2">
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <pb_type name="mult_18x19" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="18"/>
+              <input name="b" num_pins="19"/>
+              <output name="out" num_pins="37"/>
+              <delay_constant max="1.667e-9" in_port="mult_18x19.a" out_port="mult_18x19.out"/>
+              <delay_constant max="1.667e-9" in_port="mult_18x19.b" out_port="mult_18x19.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="two_mult_18x19.a" output="mult_18x19.a">
+                 </direct>
+              <direct name="b2b" input="two_mult_18x19.b" output="mult_18x19.b">
+                 </direct>
+              <direct name="out2out" input="mult_18x19.out" output="two_mult_18x19.out">
+                 </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a1" input="dsp_pb.datain[17:0]" output="two_mult_18x19[0].a">
+            </direct>
+            <direct name="datain2b1" input="dsp_pb.datain[36:18]" output="two_mult_18x19[0].b">
+            </direct>
+            <direct name="datain2a2" input="dsp_pb.datain[54:37]" output="two_mult_18x19[1].a">
+            </direct>
+            <direct name="datain2b2" input="dsp_pb.datain[73:55]" output="two_mult_18x19[1].b">
+            </direct>
+            <direct name="out2result" input="two_mult_18x19.out" output="dsp_pb.result[73:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_2_mode">
+          <pb_type name="sop_2" num_pb="1" blif_model=".subckt int_sop_2">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="18"/>
+            <input name="by" num_pins="19"/>
+            <input name="chainin" num_pins="37"/>
+            <output name="result" num_pins="37"/>
+            <output name="chainout" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_2.reset" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ax" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ay" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.bx" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.by" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.chainin" out_port="sop_2.result"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_2.reset" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ax" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ay" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.bx" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.by" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.chainin" out_port="sop_2.chainout"/>
+
+            <T_setup value="1.891e-11" port="sop_2.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.by" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.by" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_2.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_2.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="sop_2.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="sop_2.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[54:37]" output="sop_2.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[73:55]" output="sop_2.by">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[36:0]" output="sop_2.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_2.result" output="dsp_pb.result[36:0]">
+            </direct>
+            <direct name="chainout" input="sop_2.chainout" output="dsp_pb.chainout[36:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_18_19_36">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_18x19">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="19"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="19"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="1.891e-11" port="mult_add.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[72:37]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[18:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[18:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_4_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_4">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+
+            <T_setup value="1.891e-11" port="sop_4.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.by" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.cx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.cy" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.dx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.dy" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+      </pb_type>
+
+      <interconnect>
+        <direct name="datain1" input="dsp.dsp_I1" output ="dsp_pb.datain[36:0]"/>
+        <direct name="datain2" input="dsp.dsp_I2" output ="dsp_pb.datain[73:37]"/>
+        <direct name="reset" input="dsp.reset" output="dsp_pb.reset"></direct>
+        <direct name="chainin" input="dsp.chainin"    output="dsp_pb.chainin"></direct>
+        <direct name="chainout" input="dsp_pb.chainout" output="dsp.chainout"></direct>
+        <direct name="scanin" input="dsp.scanin"    output="dsp_pb.scanin"></direct>
+        <direct name="scanout" input="dsp_pb.scanout" output="dsp.scanout"></direct>
+        <direct name="result" input="dsp_pb.result" output="dsp.result"></direct>
+        <direct name="clk" input="dsp.clk" output="dsp_pb.clk"></direct>
+      </interconnect>  
+    </pb_type>
+
+      
+    <interconnect>
+      <!--50% sparse crossbar means 50% of the lines can reach an actual input of the dsp 
+      We do this by splitting inputs into two buckets and having two full crossbars-->
+      <!--
+     <complete name="first_half" input="dsp_top.dsp_I1" output="dsp.dsp_I1">
+          <delay_constant max="333e-12" in_port="dsp_top.dsp_I1" out_port="dsp.dsp_I1"/>
+     </complete>
+
+      <complete name="second_half" input="dsp_top.dsp_I2" output="dsp.dsp_I2">
+          <delay_constant max="333e-12" in_port="dsp_top.dsp_I2" out_port="dsp.dsp_I2"/>
+      </complete>
+      -->
+      <direct name="datain1" input="dsp_top.dsp_I1" output ="dsp.dsp_I1"/>
+      <direct name="datain2" input="dsp_top.dsp_I2" output ="dsp.dsp_I2"/>
+
+      <direct name="reset" input="dsp_top.reset" output="dsp.reset"></direct>
+      <direct name="chainin" input="dsp_top.chainin" output="dsp.chainin">
+          <delay_constant max="1179e-12" in_port="dsp_top.chainin" out_port="dsp.chainin"/>
+      </direct>
+      <direct name="chainout" input="dsp.chainout" output="dsp_top.chainout">
+          <delay_constant max="1179e-12" in_port="dsp.chainout" out_port="dsp_top.chainout"/>
+      </direct>
+      <direct name="scanin" input="dsp_top.scanin" output="dsp.scanin">
+          <delay_constant max="1179e-12" in_port="dsp_top.scanin" out_port="dsp.scanin"/>
+      </direct>
+      <direct name="scanout" input="dsp.scanout" output="dsp_top.scanout">
+          <delay_constant max="1179e-12" in_port="dsp.scanout" out_port="dsp_top.scanout"/>
+      </direct>
+      <direct name="result" input="dsp.result" output="dsp_top.result"></direct>
+      <direct name="clk" input="dsp_top.clk" output="dsp.clk"></direct>
+    </interconnect>
+  </pb_type>
+    <!-- Define DSP slice end -->
+
+
+    <!-- Define fracturable memory begin -->
+    <!-- 
+    RAM blocks always have registered inputs. The input FFs appear before the address decoder & wordline driver,
+    and after the local input crossbar & level shifter.
+    RAM blocks optionally have registered outputs. The output FFs (if present) appear after the output crossbar.
+    If BRAM doesn't have registered outputs, then T_clk_to_q is the whole delay of the read/write operation.
+    If BRAM does have registered output, then T_clk_to_q is just the FF clk_to_q and then delay_constant
+    can be used to specify the whole delay of the read/write operation.
+
+    This RAM block has registered outputs.
+
+    The area and delay values of this RAM block were obtained (indirectly) from COFFE simulations.
+    COFFE only support widths and depths that are powers of 2. For M20K (20 Kilobit BRAM), we need
+    the width to be 40 bits and depth to be 512 (for the logically widest mode: 512x40). We can't
+    simulate these dimensions directly in COFFE. So, we simulated and obtained the results for M32K
+    (32 Kilobits BRAM) and (16 Kilobits BRAM). Then we interpolated the results.
+    For delay, a linear interpolation was used, based on the size of the Memory (16K->20K->32K).
+    For area, the value was calculated using two interpolations: (1) port based (change in number of 
+    ports in going from 16K->20K->32K) and (2) number of bits based (change in number of bits in
+    going from 16K->20K->32K). The interpolation that resulted in the larger area was picked.
+    
+
+    Here are the equations used to calculate the delays based on COFFE results:
+    T_setup (inputs) = T_level_shifter + T_register_micro_setup = 32.3ps + 18.91ps = 51.21ps
+    T_clk_to_q (inputs) = T_register_micro_clk_to_q = 60.32ps
+    T_setup (outputs) = T_register_micro_setup = 18.91ps 
+    T_clk_to_q (outputs) = T_register_micro_clk_to_q = 60.32ps
+
+    (Register setup and clk_to_q timings are actually from the FF used in the logic cluster.)
+
+    T_read = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver + Bit line delay) + (Sense amp + Output crossbar)
+
+    * Bit line delay is included in self.RAM.samp.delay time in COFFE. The Sense amp delay is actually
+    self.RAM.samp_part2.delay
+
+    T_write = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver) + (Write driver)
+
+    delay_constant values model the internal limits of a block (the combinatorial delay).
+    delay_constant = max (T_read, T_write) 
+
+    Overall internal delay of the RAM is T_clk_to_q (inputs) + delay_constant + T_setup (outputs)
+    -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x40_sp">
+        <pb_type name="mem_512x40_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="40" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="40" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_512x40_sp.addr" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0" in_port="mem_512x40_sp.data" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0" in_port="mem_512x40_sp.we"   out_port="mem_512x40_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x40_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data" output="mem_512x40_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x40_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_512x40_sp.out" output="memory.out">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x40_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_1024x20_sp">
+        <pb_type name="mem_1024x20_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="20" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="20" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_1024x20_sp.addr" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0" in_port="mem_1024x20_sp.data" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0" in_port="mem_1024x20_sp.we"   out_port="mem_1024x20_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_sp.out" output="memory.out[19:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_sp">
+        <pb_type name="mem_2048x10_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="10" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="10" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_2048x10_sp.addr" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0" in_port="mem_2048x10_sp.data" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0" in_port="mem_2048x10_sp.we"   out_port="mem_2048x10_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_sp.out" output="memory.out[9:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x20_dp">
+        <pb_type name="mem_1024x20_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="20" port_class="data_in1"/>
+          <input name="data2" num_pins="20" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="20" port_class="data_out1"/>
+          <output name="out2" num_pins="20" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_1024x20_dp.addr1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.data1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.we1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.addr2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.data2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.we2" out_port="mem_1024x20_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x20_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[39:20]" output="mem_1024x20_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x20_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_dp.out1" output="memory.out[19:0]">
+          </direct>
+          <direct name="dataout2" input="mem_1024x20_dp.out2" output="memory.out[39:20]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_dp">
+        <pb_type name="mem_2048x10_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="10" port_class="data_in1"/>
+          <input name="data2" num_pins="10" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="10" port_class="data_out1"/>
+          <output name="out2" num_pins="10" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_2048x10_dp.addr1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.data1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.we1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.addr2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.data2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.we2" out_port="mem_2048x10_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x10_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[19:10]" output="mem_2048x10_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x10_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_dp.out1" output="memory.out[9:0]">
+          </direct>
+          <direct name="dataout2" input="mem_2048x10_dp.out2" output="memory.out[19:10]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+
+    <pb_type name="tsv_hole">
+      <input name="I" num_pins="1"/>
+      <output name="O" num_pins="1"/>
+      <interconnect/>
+    </pb_type>
+
+  </complexblocklist>
+
+  <switchblocklist>
+    <!-- Stratix IV uses a uni-directional routing architecture with a Driver Input Mux (DIM) size of 12 (i.e.
+           each wire can be driven by one of 12 block/outputs or wires) for the L4s.
+           
+           In the Stratix IV architecture the long wires (L16 here) are accessible only from the short wires, 
+           and are not connected to the block pins (i.e. connection blocks). Furthermore, they only connect 
+           to switch blocks every 4 LABs (to avoid expensive deep via stacks).
+           We approximate the L16 DIM size as 40:1 (in reality it is a pair of 20:1 (?) muxes with a 2:1 swap mux
+           in front, which has nearly the same connectivity as a full 40:1).
+
+           L4 wires
+           ================
+           At a channel width of 300 there are 260 L4/L4prime wires. At an effective Fc_out of 0.075 
+           and 40 LAB outputs this yeilds:
+
+                40 * 2 = 80 outputs per channel  [2 LABs per-channel]
+
+                80 * 0.075 = 6 outputs drive each L4 wire [output connection block]
+
+           This leaves:
+
+                12 - 6 = 6 inputs to the DIMs from other routing wires [switch block]
+
+           Since L4s connect at every switch block, there are:
+
+                260 L16 wires per channel + direction which can drive wires at a particular switchblock
+                (via switchpoints 0, 1, 2, 3)
+
+           And for each direction (260 wires) only:
+
+               260 / 4 = 65 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there are 32 L4s starting, and 32 L4s ending; + 1 wire for the 65th)
+
+           Which we allocate as follows:
+
+                L4
+                =====
+                straight-through connection: 2 (from L4 or L16)
+                clock-wise turn            : 2 (from L4 or L16)
+                counter-clock-wise turn    : 2 (from L4 or L16)
+
+           L16 wires
+           =========
+           At a channel width of 300 there are 40 L16 wires (20 in each direction), which do not connect to the input/output connection blocks.
+           This leaves 40 inputs to the DIM to select from routing wires (long wires use larger DIMs to improve reachability,
+           the area cost is relatively small since they are so rare).
+
+           Since L16s only connect at every 4th switch block there are:
+
+                40 / 4 = 10 L16 wires per channel (5 in each direction) which can drive wires at a particular switchblock
+                (via switchpoints 0, 4, 8, 12)
+
+           And for each direction (20 wires) only:
+
+               40 / 16 = 2.25 => 2 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there is one L16 starting, and one L16 ending)
+           
+           We assign the 40 DIM inputs as follows:
+
+                L16
+                =====
+                straight-through connection:  3 (from L16)
+                straight-through connection: 11 (from L4)
+                clock-wise turn            :  3 (from L16)
+                clock-wise turn            : 10 (from L4)
+                counter clock-wise turn    :  3 (from L16)
+                counter clock-wise turn    : 10 (from L4)
+
+           Switch pattern
+           ==============
+           This switch block is based on the Wilton switch block (see Page 103 of Steve Wilton's PhD Thesis 
+           "Architecture and Algorithms for Field-Programmable Gate Arrays with Embedded Memory", 1997):
+
+                left-to-top: W - t
+                top-to-right: t + 1
+                right-to-bottom: 2*W - 2 - t
+                bottom-to-left: t + 1
+                left-to-right: t
+                top-to-bottom: t
+
+           Since Wilton assumed bidirection routing (while we use unidirectional routing),
+           we mirror the clock-wise turns to match the conter-clock-wise specification.
+           -->
+    <switchblock name="wilton_turn_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_turn_counter_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+
+               Note that a different from_switchpoints ordering is used to ensure a different shuffling occurs compared to 
+               wilton_turn_clockwise_core.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_straight" type="unidir">
+      <switchblock_location type="EVERYWHERE"/>
+      <switchfuncs>
+        <!-- Straight -->
+        <func type="lr" formula="t"/>
+        <!-- left to right -->
+        <func type="tb" formula="t"/>
+        <!-- top to bottom -->
+        <func type="rl" formula="t"/>
+        <!-- right to left -->
+        <func type="bt" formula="t"/>
+        <!-- bottom to top -->
+      </switchfuncs>
+      <!-- L16 Drivers 
+                Note that we order the switchpoints in order of preference, since VPR currently
+                iterates through the source sets in order, such that we connect first to wires
+                ending at the switchblock (switchpoint 0), and then fallback to switchpoints
+                in decreasing distance from the drive point (if we have more to's than from's
+                it then wraps around).
+
+                Note also that we multiply the number of expected connections by 'to', since while usually
+                there is only one 'to' wire, ocasionally there may be more, and we want to ensure they all
+                get the same number of connections.
+
+                For L16->L16:
+                  We allow any valid switchpoint to be used as the 'from' point.
+                  Allow 'low' switchpoints like '4' may seem counter-intuitive (i.e. why not use a cheaper L4)
+                  this makes it easier to bypass once on the L16 network (e.g. to get around congestion).
+           -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_straight_corner" type="unidir">
+      <!-- Same as wilton straight, but turning around a corner -->
+      <switchblock_location type="CORNER"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t"/>
+        <!-- top to right -->
+        <func type="rb" formula="t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t"/>
+        <!-- right to top -->
+        <func type="br" formula="t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 Drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_turn_fringe" type="unidir">
+      <!-- Non-corner perimeter SBs -->
+      <switchblock_location type="FRINGE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- We use 'max' style connections here to ensure there are no dangling wires, otherwise like core turns -->
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="21*max(from,to)" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers -->
+      <wireconn num_conns="1*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" from_order="fixed" to_type="L4" to_switchpoint="0"/>
+      <wireconn num_conns="1*max(from,to)" from_type="L4" from_switchpoint="0,1,2,3" from_order="shuffled" to_type="L4" to_switchpoint="0"/>
+    </switchblock>
+  </switchblocklist>
+
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+
+</architecture>
+
+
diff --git a/vtr_flow/arch/multi_die/aman_3d_limited.xml b/vtr_flow/arch/multi_die/aman_3d_limited.xml
new file mode 100644
index 00000000000..7d67b2c996c
--- /dev/null
+++ b/vtr_flow/arch/multi_die/aman_3d_limited.xml
@@ -0,0 +1,1579 @@
+<architecture>
+  <models>
+    <model name="single_port_ram">
+      <input_ports>
+        <port name="we" clock="clk" combinational_sink_ports="out"/>
+        <!-- control -->
+        <port name="addr" clock="clk" combinational_sink_ports="out"/>
+        <!-- address lines -->
+        <port name="data" clock="clk" combinational_sink_ports="out"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <model name="dual_port_ram">
+      <input_ports>
+        <port name="we1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- write enable -->
+        <port name="we2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- write enable -->
+        <port name="addr1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- address lines -->
+        <port name="addr2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- address lines -->
+        <port name="data1" clock="clk" combinational_sink_ports="out1"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="data2" clock="clk" combinational_sink_ports="out2"/>
+        <!-- data lines can be broken down into smaller bit widths minimum size 1 -->
+        <port name="clk" is_clock="1"/>
+        <!-- memories are often clocked -->
+      </input_ports>
+      <output_ports>
+        <port name="out1" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+        <port name="out2" clock="clk"/>
+        <!-- output can be broken down into smaller bit widths minimum size 1 -->
+      </output_ports>
+    </model>
+    <!-- Used inside DSPs. 
+         Fixed point multiplication.
+         ODIN infers these when * sign appears in RTL. -->
+    <model name="multiply">
+      <input_ports>
+        <port name="a" combinational_sink_ports="out"/>
+        <port name="b" combinational_sink_ports="out"/>
+      </input_ports>
+      <output_ports>
+        <port name="out"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_2">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    
+    <model name="mult_add_int_18x19">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+        <port name="scanin" clock="clk" combinational_sink_ports="result scanout chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+        <port name="scanout"/>
+      </output_ports>
+    </model>
+    <!--A mode in DSP slice-->
+    <model name="int_sop_4">
+      <input_ports>
+        <port name="clk" is_clock="1"/>  
+        <port name="reset" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ax" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="ay" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="bx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="by" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="cy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dx" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="dy" clock="clk" combinational_sink_ports="result chainout"/>
+        <port name="chainin" clock="clk" combinational_sink_ports="result chainout"/>
+      </input_ports>
+      <output_ports>
+        <port name="result" clock="clk"/>
+        <port name="chainout"/>
+      </output_ports>
+    </model>
+  </models>
+    <!-- Fixed point MAC inside DSP slices -->
+  <tiles>
+    <tile name="io" area="0">
+      <sub_tile name="io" capacity="8">
+        <equivalent_sites>
+          <site pb_type="io" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="outpad" num_pins="1"/>
+        <output name="inpad" num_pins="1"/>
+        <clock name="clock" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+        <pinlocations pattern="custom">
+          <loc side="left" layer_offset="1">io.inpad</loc>
+          <loc side="left">io.outpad io.clock</loc>
+          <loc side="top" layer_offset="1">io.inpad</loc>
+          <loc side="top">io.outpad io.clock</loc>
+          <loc side="right" layer_offset="1">io.inpad</loc>
+          <loc side="right">io.outpad io.clock</loc>
+          <loc side="bottom" layer_offset="1">io.inpad</loc>
+          <loc side="bottom">io.outpad io.clock</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="clb" height="1" width="1" area="27905">
+      <sub_tile name="clb">
+        <equivalent_sites>
+          <site pb_type="clb" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="I1" num_pins="10" equivalent="full"/>
+        <input name="I2" num_pins="10" equivalent="full"/>
+        <input name="I3" num_pins="10" equivalent="full"/>
+        <input name="I4" num_pins="10" equivalent="full"/>
+        <output name="O" num_pins="20" equivalent="instance"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.2" out_type="frac" out_val="0.025"/>
+        <!-- Two sided connectivity CLB architecture--> 
+        <pinlocations pattern="custom">
+          <loc side="right">clb.I1 clb.I3 clb.clk clb.O[9:6]</loc>
+          <loc side="right" layer_offset="1">clb.O[5:0]</loc>
+          <loc side="bottom">clb.I2 clb.I4 clb.clk clb.O[19:16]</loc>
+          <loc side="bottom" layer_offset="1">clb.O[15:10]</loc>    
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="dsp_top" height="1" width="1" area="253779">
+      <sub_tile name="dsp_top">
+        <equivalent_sites>
+          <site pb_type="dsp_top" pin_mapping="direct"/>
+        </equivalent_sites>
+        <input name="reset" num_pins="1" is_non_clock_global="true"/>
+        <input name="dsp_I1" num_pins="37" />
+        <input name="dsp_I2" num_pins="37" />
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <!-- clock pins and chain ports do not connect to local routing -->
+          <fc_override port_name="clk" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="chainout" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanin" fc_type="frac" fc_val="0"/>
+          <fc_override port_name="scanout" fc_type="frac" fc_val="0"/>
+        </fc>
+        <pinlocations pattern="custom">
+            <loc side="left">dsp_top.dsp_I1 dsp_top.reset</loc>
+            <loc side="right">dsp_top.dsp_I2 dsp_top.clk</loc>
+            <loc side="top">dsp_top.chainin dsp_top.scanin</loc>
+            <loc side="bottom">dsp_top.chainout dsp_top.scanout</loc>
+            <loc side="right" layer_offset="1">dsp_top.result[36:0]</loc>
+            <loc side="left" layer_offset="1">dsp_top.result[73:37]</loc>
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="memory" height="1" width="1" area="137668">
+      <sub_tile name="memory">
+        <equivalent_sites>
+          <site pb_type="memory" pin_mapping="direct"/>
+        </equivalent_sites>
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+        <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10">
+          <fc_override fc_type="frac" fc_val="0" port_name="clk"/>
+        </fc>  
+        <pinlocations pattern="custom">
+          <loc side="top"> memory.addr1[0] memory.addr1[8] memory.addr2[5] memory.data[2] memory.data[10] memory.data[18] memory.data[26] memory.data[34] memory.clk memory.addr1[1] memory.addr1[9] memory.addr2[6] memory.data[3] memory.data[11] memory.data[19] memory.data[27] memory.data[35]</loc>
+          <loc side="right"> memory.addr1[2] memory.addr1[10] memory.addr2[7] memory.data[4] memory.data[12] memory.data[20] memory.data[28] memory.data[36] memory.addr1[3] memory.addr2[0] memory.addr2[8] memory.data[5] memory.data[13] memory.data[21] memory.data[29] memory.data[37]</loc>
+          <loc side="bottom"> memory.addr1[4] memory.addr2[1] memory.addr2[9] memory.data[6] memory.data[14] memory.data[22] memory.data[30] memory.data[38] memory.addr1[5] memory.addr2[2] memory.addr2[10] memory.data[7] memory.data[15] memory.data[23] memory.data[31] memory.data[39]</loc>
+          <loc side="left" > memory.addr1[6] memory.addr2[3] memory.data[0] memory.data[8] memory.data[16] memory.data[24] memory.data[32] memory.we1 memory.addr1[7] memory.addr2[4] memory.data[1] memory.data[9] memory.data[17] memory.data[25] memory.data[33] memory.we2</loc>
+
+          <loc side="top" layer_offset="1"> memory.out[0] memory.out[8] memory.out[16] memory.out[24] memory.out[32] memory.out[1] memory.out[9] memory.out[17] memory.out[25] memory.out[33]</loc>
+          <loc side="right" layer_offset="1"> memory.out[2] memory.out[10] memory.out[18] memory.out[26] memory.out[34] memory.out[3] memory.out[11] memory.out[19] memory.out[27] memory.out[35]</loc>
+          <loc side="bottom" layer_offset="1"> memory.out[4] memory.out[12] memory.out[20] memory.out[28] memory.out[36] memory.out[5] memory.out[13] memory.out[21] memory.out[29] memory.out[37]</loc>
+          <loc side="left" layer_offset="1"> memory.out[6] memory.out[14] memory.out[22] memory.out[30] memory.out[38] memory.out[7] memory.out[15] memory.out[23] memory.out[31] memory.out[39]</loc>
+
+        </pinlocations>
+      </sub_tile>
+    </tile>
+    <tile name="tsv_hole" height="2" width="2" area="137668">
+      <sub_tile name="tsv_hole">
+        <equivalent_sites>
+          <site pb_type="tsv_hole"/>
+        </equivalent_sites>
+        <input name="in" num_pins="1"/>
+        <output name="out" num_pins="1"/>
+        <fc in_type="abs" in_val="0" out_type="abs" out_val="0"/>
+      </sub_tile>
+    </tile>
+  </tiles>
+  <!-- ODIN II specific config ends -->
+  <layout>
+    <!-- Physical descriptions begin -->
+    <fixed_layout name="coffe_7nm" width="328" height="288">
+      <layer die="0">
+        <perimeter type="io" priority="101"/>
+      
+        <corners type="EMPTY" priority="102"/>
+
+        <fill type="clb" priority="10"/>
+
+        <col type="memory" startx="11" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="25" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="37" starty="1" repeatx="41" priority="20"/>
+
+        <col type="dsp_top" startx="18" starty="1" repeatx="41" priority="20"/>
+        <col type="dsp_top" startx="31" starty="1" repeatx="41" priority="20"/>
+
+        <!-- PW -->
+        <col type="tsv_hole" startx="8" starty="5" repeatx="13" incry="12" priority="103"/>
+        
+        <!-- GND -->
+        <col type="tsv_hole" startx="14" starty="11" repeatx="13" incry="12" priority="103"/>
+
+      </layer>
+      <layer die="1">
+        <perimeter type="io" priority="101"/>
+      
+        <corners type="EMPTY" priority="102"/>
+
+        <fill type="clb" priority="10"/>
+
+        <col type="memory" startx="11" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="25" starty="1" repeatx="41" priority="20"/>
+        <col type="memory" startx="37" starty="1" repeatx="41" priority="20"/>
+
+        <col type="dsp_top" startx="18" starty="1" repeatx="41" priority="20"/>
+        <col type="dsp_top" startx="31" starty="1" repeatx="41" priority="20"/>
+        
+      </layer>
+    </fixed_layout>
+  </layout>
+  <device>
+    <sizing R_minW_nmos="13090" R_minW_pmos="19086.83"/>
+    <area grid_logic_tile_area="0"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.000000"/>
+      <y distr="uniform" peak="1.000000"/>
+    </chan_width_distr>
+    <switch_block type="custom"/>
+    <connection_block input_switch_name="ipin_cblock" input_inter_die_switch_name="die_connection"/>
+  </device>
+  <switchlist>
+    <switch type="mux" name="L4_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="9.877e-11" mux_trans_size="2.6482996805637553" buf_size="18.744014602932605"/>
+    <switch type="mux" name="L4_inter_layer_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="17.177e-11" mux_trans_size="2.6482996805637553" buf_size="18.744014602932605"/>
+    <!-- Delay of L16 driver is scaled from L4 by a factor of 1.5x (based on numbers from the Titan Stratix IV architecture file)
+   Area numbers will not be totally accurate because of the same buf_size -->
+    <switch type="mux" name="L16_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="2.016e-10" mux_trans_size="3.1851297470059468" buf_size="39.327334265524485"/>
+    <switch type="mux" name="L16_inter_layer_driver" R="0.0" Cin="0.0" Cout="0.0" Tdel="2.746e-10" mux_trans_size="3.1851297470059468" buf_size="39.327334265524485"/>
+    <switch type="mux" name="ipin_cblock" R="0.0" Cout="0.0" Cin="0.0" Tdel="5.636e-11" mux_trans_size="2.008" buf_size="9.624436045683868"/>
+    <switch type="mux" name="die_connection" R="0.0" Cout="0.0" Cin="0.0" Tdel="130e-12" mux_trans_size="1.508" buf_size="11.71"/>
+  </switchlist>
+  <segmentlist>
+    <segment name="L4" freq="280" length="4" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L4_driver"/>
+      <mux_inter_die name="L4_inter_layer_driver"/>
+      <sb type="pattern">1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1</cb>
+    </segment>
+    <segment name="L16" freq="40" length="16" type="unidir" Rmetal="0.0" Cmetal="0.0">
+      <mux name="L16_driver"/>
+      <mux_inter_die name="L16_inter_layer_driver"/>
+      <!-- Vias from the top of the metal stack (global layers, where the long wires are 
+           implemented) down to the middle/bottom of the metal stack (semi-global layers, 
+           where the short wires are implemented) are expensive and restrictive.
+           As a result Startix IV only places long wire switch blocks every 4 LABs -->
+      <sb type="pattern">1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1</sb>
+      <!-- For the same reasons, long wires do not connect to block pins in Stratix IV -->
+      <cb type="pattern">0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0</cb>
+    </segment>
+  </segmentlist>
+  <directlist>
+    <!-- Direct connect from one DSP to the DSP directly below it -->
+    <direct name="dsp_out_chain" from_pin="dsp_top.chainout" from_side="bottom" to_pin="dsp_top.chainin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+    <direct name="dsp_in_chain" from_pin="dsp_top.scanout" from_side="bottom" to_pin="dsp_top.scanin" to_side="top" x_offset="0" y_offset="-4" z_offset="0"/>
+  </directlist>
+
+  <complexblocklist>
+    <!-- Define I/O pads begin -->
+    <!-- Not sure of the area of an I/O (varies widely), and it's not relevant to the design of the FPGA core, so we're setting it to 0. -->
+    <pb_type name="io">
+      <input name="outpad" num_pins="1"/>
+      <output name="inpad" num_pins="1"/>
+      <clock name="clock" num_pins="1"/>
+      <!-- IOs can operate as either inputs or outputs.
+       Delays below come from Ian Kuon. They are small, so they should be interpreted as
+       the delays to and from registers in the I/O (and generally I/Os are registered 
+       today and that is when you timing analyze them.
+       -->
+      <mode name="inpad">
+        <pb_type name="inpad" blif_model=".input" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="inpad" input="inpad.inpad" output="io.inpad">
+            <delay_constant max="4.243e-11" in_port="inpad.inpad" out_port="io.inpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <mode name="outpad">
+        <pb_type name="outpad" blif_model=".output" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct name="outpad" input="io.outpad" output="outpad.outpad">
+            <delay_constant max="1.394e-11" in_port="io.outpad" out_port="outpad.outpad"/>
+          </direct>
+        </interconnect>
+      </mode>
+      <!-- Every input pin is driven by 15% of the tracks in a channel, every output pin is driven by 10% of the tracks in a channel -->
+      <!-- IOs go on the periphery of the FPGA, for consistency, 
+          make it physically equivalent on all sides so that only one definition of I/Os is needed.
+          If I do not make a physically equivalent definition, then I need to define 4 different I/Os, one for each side of the FPGA
+        -->
+      <!-- Place I/Os on the sides of the FPGA -->
+    </pb_type>
+    <!-- Define I/O pads ends -->
+    <!-- Define general purpose logic block (CLB) begin -->
+    <pb_type name="clb">
+      <input name="I1" num_pins="10" equivalent="full"/>
+      <input name="I2" num_pins="10" equivalent="full"/>
+      <input name="I3" num_pins="10" equivalent="full"/>
+      <input name="I4" num_pins="10" equivalent="full"/>
+      <output name="O" num_pins="20" equivalent="instance"/>
+      <clock name="clk" num_pins="1"/>  <!-- Basic logic element definition -->
+      <pb_type name="fle" num_pb="10">
+        <input name="in_A" num_pins="1"/>
+        <input name="in_B" num_pins="1"/>
+        <input name="in_C" num_pins="1"/>
+        <input name="in_D" num_pins="1"/>
+        <input name="in_E" num_pins="1"/>
+        <input name="in_F" num_pins="1"/>
+        <output name="out_local" num_pins="2"/>
+        <output name="out_routing" num_pins="2"/>
+        <clock name="clk" num_pins="1"/> 
+        <mode name="n1_lut6">
+          <pb_type name="ble6" num_pb="1">
+            <input name="in_A" num_pins="1"/>
+            <input name="in_B" num_pins="1"/>
+            <input name="in_C" num_pins="1"/>
+            <input name="in_D" num_pins="1"/>
+            <input name="in_E" num_pins="1"/>
+            <input name="in_F" num_pins="1"/>
+            <output name="out_local" num_pins="1"/>
+            <output name="out_routing" num_pins="2"/>
+            <clock name="clk" num_pins="1"/> 
+            <pb_type name="lut6" blif_model=".names" num_pb="1" class="lut">
+              <input name="in" num_pins="6" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <!-- We define the LUT delays on the LUT pins instead of through the LUT -->
+              <delay_matrix type="max" in_port="lut6.in" out_port="lut6.out">
+                 0
+                 0
+                 0
+                 0
+                 0
+                 0
+              </delay_matrix>
+            </pb_type>
+            <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+              <input name="D" num_pins="1" port_class="D"/>
+              <output name="Q" num_pins="1" port_class="Q"/>
+              <clock name="clk" num_pins="1" port_class="clock"/>
+              <T_setup value="1.891e-11" port="ff.D" clock="clk"/>
+              <T_clock_to_Q max="6.032e-11" port="ff.Q" clock="clk"/>
+            </pb_type>
+            <interconnect>
+              <direct name="direct0" input="ble6.in_A" output="lut6.in[0:0]">
+                <delay_constant max="1.1287999999999999e-10" in_port="ble6.in_A" out_port="lut6.in[0:0]" />
+              </direct>
+              <direct name="direct1" input="ble6.in_B" output="lut6.in[1:1]">
+                <delay_constant max="1.1072500000000001e-10" in_port="ble6.in_B" out_port="lut6.in[1:1]" />
+              </direct>
+              <direct name="direct3" input="ble6.in_D" output="lut6.in[3:3]">
+                <delay_constant max="8.1212e-11" in_port="ble6.in_D" out_port="lut6.in[3:3]" />
+              </direct>
+              <direct name="direct4" input="ble6.in_E" output="lut6.in[4:4]">
+                <delay_constant max="7.961e-11" in_port="ble6.in_E" out_port="lut6.in[4:4]" />
+              </direct>
+              <direct name="direct5" input="ble6.in_F" output="lut6.in[5:5]">
+                <delay_constant max="4.9300999999999996e-11" in_port="ble6.in_F" out_port="lut6.in[5:5]" />
+              </direct>
+              <!--Clock -->
+              <direct name="direct6" input="ble6.clk" output="ff.clk"/>
+              <!-- Register feedback mux -->   
+              <mux name="mux1" input="ble6.in_C ff.Q" output="lut6.in[2:2]">
+                <delay_constant max="1.1347e-10" in_port="ble6.in_C" out_port="lut6.in[2:2]" />
+                <delay_constant max="1.1347e-10" in_port="ff.Q" out_port="lut6.in[2:2]" />  
+              </mux>
+              <!-- FF input selection mux -->
+              <mux name="2" input="lut6.out ble6.in_C" output="ff.D">
+                <delay_constant max="1.74588e-11" in_port="lut6.out" out_port="ff.D" />
+                <delay_constant max="1.74588e-11" in_port="ble6.in_C" out_port="ff.D" />
+              </mux>
+              <!-- BLE output (local) -->
+              <mux name="mux3" input="ff.Q lut6.out" output="ble6.out_local">
+                <delay_constant max="1.346e-10" in_port="ff.Q" out_port="ble6.out_local" />
+                <delay_constant max="1.346e-10" in_port="lut6.out" out_port="ble6.out_local" />
+              </mux>
+              <!-- BLE output (routing 1) --> 
+              <mux name="mux4" input="ff.Q lut6.out" output="ble6.out_routing[0:0]">
+                <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble6.out_routing[0:0]" />
+                <delay_constant max="3.771e-11" in_port="lut6.out" out_port="ble6.out_routing[0:0]" />
+              </mux>
+              <!-- BLE output (routing 2) --> 
+              <mux name="mux5" input="ff.Q lut6.out" output="ble6.out_routing[1:1]">
+                <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble6.out_routing[1:1]" />
+                <delay_constant max="3.771e-11" in_port="lut6.out" out_port="ble6.out_routing[1:1]" />
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in_A" output="ble6.in_A"/>
+            <direct name="direct2" input="fle.in_B" output="ble6.in_B"/>
+            <direct name="direct3" input="fle.in_C" output="ble6.in_C"/>
+            <direct name="direct4" input="fle.in_D" output="ble6.in_D"/>
+            <direct name="direct5" input="fle.in_E" output="ble6.in_E"/>
+            <direct name="direct6" input="fle.in_F" output="ble6.in_F"/>
+            <direct name="direct7" input="ble6.out_local" output="fle.out_local[0:0]"/>
+            <direct name="direct8" input="ble6.out_routing" output="fle.out_routing"/>
+            <direct name="direct9" input="fle.clk" output="ble6.clk"/>
+          </interconnect>
+        </mode>  
+        <mode name="n2_lut5">
+          <pb_type name="lut5inter" num_pb="1">
+            <input name="in_A" num_pins="1"/>
+            <input name="in_B" num_pins="1"/>
+            <input name="in_C" num_pins="1"/>
+            <input name="in_D" num_pins="1"/>
+            <input name="in_E" num_pins="1"/>
+            <output name="out_local" num_pins="2"/>
+            <output name="out_routing" num_pins="2"/>
+            <clock name="clk" num_pins="1"/> 
+            <pb_type name="ble5" num_pb="2">
+              <input name="in_A" num_pins="1"/>
+              <input name="in_B" num_pins="1"/>
+              <input name="in_C" num_pins="1"/>
+              <input name="in_D" num_pins="1"/>
+              <input name="in_E" num_pins="1"/>
+              <output name="out_local" num_pins="1"/>
+              <output name="out_routing" num_pins="1"/>
+              <clock name="clk" num_pins="1"/> 
+              <pb_type name="lut5" blif_model=".names" num_pb="1" class="lut">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <!-- We define the LUT delays on the LUT pins instead of through the LUT -->
+                <delay_matrix type="max" in_port="lut5.in" out_port="lut5.out">
+                   0
+                   0
+                   0
+                   0
+                   0
+                </delay_matrix>
+              </pb_type>
+              <pb_type name="ff" blif_model=".latch" num_pb="1" class="flipflop">
+                <input name="D" num_pins="1" port_class="D"/>
+                <output name="Q" num_pins="1" port_class="Q"/>
+                <clock name="clk" num_pins="1" port_class="clock"/>
+                <T_setup value="1.891e-11" port="ff.D" clock="clk"/>
+                <T_clock_to_Q max="6.032e-11" port="ff.Q" clock="clk"/>
+              </pb_type>
+              <interconnect>
+                <direct name="direct0" input="ble5.in_A" output="lut5.in[0:0]">
+                  <delay_constant max="1.1287999999999999e-10" in_port="ble5.in_A" out_port="lut5.in[0:0]" />
+                </direct>
+                <direct name="direct1" input="ble5.in_B" output="lut5.in[1:1]">
+                  <delay_constant max="1.1072500000000001e-10" in_port="ble5.in_B" out_port="lut5.in[1:1]" />
+                </direct>
+                <direct name="direct3" input="ble5.in_D" output="lut5.in[3:3]">
+                  <delay_constant max="8.1212e-11" in_port="ble5.in_D" out_port="lut5.in[3:3]" />
+                </direct>
+                <direct name="direct4" input="ble5.in_E" output="lut5.in[4:4]">
+                  <delay_constant max="7.961e-11" in_port="ble5.in_E" out_port="lut5.in[4:4]" />
+                </direct>
+                  <!--Clock -->
+                <direct name="direct5" input="ble5.clk" output="ff.clk"/>
+                <!-- Register feedback mux -->   
+                <mux name="mux1" input="ble5.in_C ff.Q" output="lut5.in[2:2]">
+                  <delay_constant max="1.1347e-10" in_port="ble5.in_C" out_port="lut5.in[2:2]" />
+                  <delay_constant max="1.1347e-10" in_port="ff.Q" out_port="lut5.in[2:2]" />  
+                </mux>
+                <!-- FF input selection mux -->
+                <mux name="2" input="lut5.out ble5.in_C" output="ff.D">
+                  <delay_constant max="1.74588e-11" in_port="lut5.out" out_port="ff.D" />
+                  <delay_constant max="1.74588e-11" in_port="ble5.in_C" out_port="ff.D" />
+                </mux>
+                <!-- BLE output (local) -->
+                <mux name="mux3" input="ff.Q lut5.out" output="ble5.out_local">
+                  <delay_constant max="1.346e-10" in_port="ff.Q" out_port="ble5.out_local" />
+                  <delay_constant max="1.346e-10" in_port="lut5.out" out_port="ble5.out_local" />
+                </mux>
+                <!-- BLE output (routing 1) --> 
+                <mux name="mux4" input="ff.Q lut5.out" output="ble5.out_routing[0:0]">
+                  <delay_constant max="3.771e-11" in_port="ff.Q" out_port="ble5.out_routing[0:0]" />
+                  <delay_constant max="3.771e-11" in_port="lut5.out" out_port="ble5.out_routing[0:0]" />
+                </mux>
+              </interconnect>
+            </pb_type>
+            <interconnect>
+              <direct name="direct1" input="lut5inter.in_A" output="ble5[0:0].in_A"/>
+              <direct name="direct2" input="lut5inter.in_B" output="ble5[0:0].in_B"/>
+              <direct name="direct3" input="lut5inter.in_C" output="ble5[0:0].in_C"/>
+              <direct name="direct4" input="lut5inter.in_D" output="ble5[0:0].in_D"/>
+              <direct name="direct5" input="lut5inter.in_E" output="ble5[0:0].in_E"/>
+              <direct name="direct6" input="lut5inter.in_A" output="ble5[1:1].in_A"/>
+              <direct name="direct7" input="lut5inter.in_B" output="ble5[1:1].in_B"/>
+              <direct name="direct8" input="lut5inter.in_C" output="ble5[1:1].in_C"/>
+              <direct name="direct9" input="lut5inter.in_D" output="ble5[1:1].in_D"/>
+              <direct name="direct10" input="lut5inter.in_E" output="ble5[1:1].in_E"/>
+              <direct name="direct11" input="ble5[1:0].out_local" output="lut5inter.out_local"/>
+              <direct name="direct12" input="ble5[1:0].out_routing" output="lut5inter.out_routing"/>
+              <complete name="complete1" input="lut5inter.clk" output="ble5[1:0].clk"/> 
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="direct1" input="fle.in_A" output="lut5inter.in_A"/>
+            <direct name="direct2" input="fle.in_B" output="lut5inter.in_B"/>
+            <direct name="direct3" input="fle.in_C" output="lut5inter.in_C"/>
+            <direct name="direct4" input="fle.in_D" output="lut5inter.in_D"/>
+            <direct name="direct5" input="fle.in_E" output="lut5inter.in_E"/>
+            <direct name="direct7" input="lut5inter.out_local" output="fle.out_local"/>
+            <direct name="direct8" input="lut5inter.out_routing" output="fle.out_routing"/>
+            <direct name="direct9" input="fle.clk" output="lut5inter.clk"/>
+          </interconnect>
+        </mode> 
+        </pb_type>
+        <interconnect>
+        <!-- 50% sparsely populated local routing -->
+        <complete name="lutA" input="clb.I4 clb.I3 fle[1:0].out_local fle[3:2].out_local fle[8:8].out_local" output="fle[9:0].in_A">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_A" />
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_A" />
+          </complete>
+        <complete name="lutB" input="clb.I3 clb.I2 fle[3:2].out_local fle[5:4].out_local fle[9:9].out_local" output="fle[9:0].in_B">
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_B" />
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_B" />
+          </complete>
+        <complete name="lutC" input="clb.I2 clb.I1 fle[5:4].out_local fle[7:6].out_local fle[8:8].out_local" output="fle[9:0].in_C">
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_C" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_C" />
+          </complete>
+        <complete name="lutD" input="clb.I4 clb.I2 fle[1:0].out_local fle[5:4].out_local fle[9:9].out_local" output="fle[9:0].in_D">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_D" />
+          <delay_constant max="2.842e-11" in_port="clb.I2" out_port="fle.in_D" />
+          </complete>
+        <complete name="lutE" input="clb.I3 clb.I1 fle[3:2].out_local fle[7:6].out_local fle[8:8].out_local" output="fle[9:0].in_E">
+          <delay_constant max="2.842e-11" in_port="clb.I3" out_port="fle.in_E" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_E" />
+          </complete>
+        <complete name="lutF" input="clb.I4 clb.I1 fle[1:0].out_local fle[7:6].out_local fle[9:9].out_local" output="fle[9:0].in_F">
+          <delay_constant max="2.842e-11" in_port="clb.I4" out_port="fle.in_F" />
+          <delay_constant max="2.842e-11" in_port="clb.I1" out_port="fle.in_F" />
+          </complete>
+          <complete name="clks" input="clb.clk" output="fle[9:0].clk">
+          </complete>
+          <!-- Direct connections to CLB outputs -->
+          <direct name="clbouts1" input="fle[0:0].out_routing" output="clb.O[1:0]"/>
+          <direct name="clbouts2" input="fle[1:1].out_routing" output="clb.O[3:2]"/>
+          <direct name="clbouts3" input="fle[2:2].out_routing" output="clb.O[5:4]"/>
+          <direct name="clbouts4" input="fle[3:3].out_routing" output="clb.O[7:6]"/>
+          <direct name="clbouts5" input="fle[4:4].out_routing" output="clb.O[9:8]"/>
+          <direct name="clbouts6" input="fle[5:5].out_routing" output="clb.O[11:10]"/>
+          <direct name="clbouts7" input="fle[6:6].out_routing" output="clb.O[13:12]"/>
+          <direct name="clbouts8" input="fle[7:7].out_routing" output="clb.O[15:14]"/>
+          <direct name="clbouts9" input="fle[8:8].out_routing" output="clb.O[17:16]"/>
+          <direct name="clbouts10" input="fle[9:9].out_routing" output="clb.O[19:18]"/>
+        </interconnect>
+      </pb_type>
+    <!-- Define general purpose logic block (CLB) ends -->
+
+    <!-- Define DSP slice begin -->
+    <pb_type name="dsp_top">
+      <input name="reset" num_pins="1" is_non_clock_global="true"/>
+      <input name="dsp_I1" num_pins="37" />
+      <input name="dsp_I2" num_pins="37" />
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+    <pb_type name="dsp" num_pb="1">
+      <input name="reset" num_pins="1"/>
+      <input name="dsp_I1" num_pins="37"/>
+      <input name="dsp_I2" num_pins="37"/>
+      <input name="chainin" num_pins="64"/>
+      <input name="scanin" num_pins="27"/>
+      <output name="result" num_pins="74"/>
+      <output name="chainout" num_pins="64"/>
+      <output name="scanout" num_pins="27"/>
+      <clock name="clk" num_pins="1"/>
+
+      <pb_type name="dsp_pb" num_pb="1">
+        <input name="reset" num_pins="1"/>
+        <input name="datain" num_pins="74"/>
+        <input name="chainin" num_pins="64"/>
+        <input name="scanin" num_pins="27"/>
+        <output name="result" num_pins="74"/>
+        <output name="chainout" num_pins="64"/>
+        <output name="scanout" num_pins="27"/>
+        <clock name="clk" num_pins="1"/>
+
+        <!-- fixed-point multiplier mode (1 27x27 multiplier) result = ax*ay -->
+        <mode name="one_mult_27x27">
+          <pb_type name="one_mult_27x27" num_pb="1">
+            <input name="a" num_pins="27"/>
+            <input name="b" num_pins="27"/>
+            <output name="out" num_pins="54"/>
+            <pb_type name="mult_27x27" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="27"/>
+              <input name="b" num_pins="27"/>
+              <output name="out" num_pins="54"/>
+              <delay_constant max="1.667e-9" in_port="mult_27x27.a" out_port="mult_27x27.out"/>
+              <delay_constant max="1.667e-9" in_port="mult_27x27.b" out_port="mult_27x27.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="one_mult_27x27.a" output="mult_27x27.a">
+              </direct>
+              <direct name="b2b" input="one_mult_27x27.b" output="mult_27x27.b">
+              </direct>
+              <direct name="out2out" input="mult_27x27.out" output="one_mult_27x27.out">
+              </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a" input="dsp_pb.datain[26:0]" output="one_mult_27x27.a">
+            </direct>
+            <direct name="datain2b" input="dsp_pb.datain[53:27]" output="one_mult_27x27.b">
+            </direct>
+            <direct name="out2dataout" input="one_mult_27x27.out" output="dsp_pb.result[53:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier mode (2 18x19 multipliers) result[some:bits] = ax*ay, result[other:bits] = bx*by -->
+        <mode name="two_mult_18x19">
+          <pb_type name="two_mult_18x19" num_pb="2">
+            <input name="a" num_pins="18"/>
+            <input name="b" num_pins="19"/>
+            <output name="out" num_pins="37"/>
+            <pb_type name="mult_18x19" blif_model=".subckt multiply" num_pb="1">
+              <input name="a" num_pins="18"/>
+              <input name="b" num_pins="19"/>
+              <output name="out" num_pins="37"/>
+              <delay_constant max="1.667e-9" in_port="mult_18x19.a" out_port="mult_18x19.out"/>
+              <delay_constant max="1.667e-9" in_port="mult_18x19.b" out_port="mult_18x19.out"/>
+            </pb_type>
+            <interconnect>
+              <direct name="a2a" input="two_mult_18x19.a" output="mult_18x19.a">
+                 </direct>
+              <direct name="b2b" input="two_mult_18x19.b" output="mult_18x19.b">
+                 </direct>
+              <direct name="out2out" input="mult_18x19.out" output="two_mult_18x19.out">
+                 </direct>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct name="datain2a1" input="dsp_pb.datain[17:0]" output="two_mult_18x19[0].a">
+            </direct>
+            <direct name="datain2b1" input="dsp_pb.datain[36:18]" output="two_mult_18x19[0].b">
+            </direct>
+            <direct name="datain2a2" input="dsp_pb.datain[54:37]" output="two_mult_18x19[1].a">
+            </direct>
+            <direct name="datain2b2" input="dsp_pb.datain[73:55]" output="two_mult_18x19[1].b">
+            </direct>
+            <direct name="out2result" input="two_mult_18x19.out" output="dsp_pb.result[73:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_2_mode">
+          <pb_type name="sop_2" num_pb="1" blif_model=".subckt int_sop_2">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="18"/>
+            <input name="by" num_pins="19"/>
+            <input name="chainin" num_pins="37"/>
+            <output name="result" num_pins="37"/>
+            <output name="chainout" num_pins="37"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_2.reset" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ax" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ay" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.bx" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.by" out_port="sop_2.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.chainin" out_port="sop_2.result"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_2.reset" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ax" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.ay" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.bx" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.by" out_port="sop_2.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_2.chainin" out_port="sop_2.chainout"/>
+
+            <T_setup value="1.891e-11" port="sop_2.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.by" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_2.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.by" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_2.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_2.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_2.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="sop_2.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="sop_2.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[54:37]" output="sop_2.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[73:55]" output="sop_2.by">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[36:0]" output="sop_2.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_2.result" output="dsp_pb.result[36:0]">
+            </direct>
+            <direct name="chainout" input="sop_2.chainout" output="dsp_pb.chainout[36:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point multiplier-add-sum mode result = (ax * ay) + bx + chainin. chainout = result. with scanin-scanout support -->
+        <mode name="mult_add_mode_18_19_36">
+          <pb_type name="mult_add" num_pb="1" blif_model=".subckt mult_add_int_18x19">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="18"/>
+            <input name="ay" num_pins="19"/>
+            <input name="bx" num_pins="36"/>
+            <input name="chainin" num_pins="64"/>
+            <input name="scanin" num_pins="19"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <output name="scanout" num_pins="19"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.result"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.result"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.chainout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.chainout"/>
+
+            <delay_constant max="1.667e-9" in_port="mult_add.reset" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ax" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.ay" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.bx" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.chainin" out_port="mult_add.scanout"/>
+            <delay_constant max="1.667e-9" in_port="mult_add.scanin" out_port="mult_add.scanout"/>
+
+            <T_setup value="1.891e-11" port="mult_add.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.scanin" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="mult_add.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.scanin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="mult_add.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="mult_add.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="mult_add.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[17:0]" output="mult_add.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[36:18]" output="mult_add.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[72:37]" output="mult_add.bx">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="mult_add.chainin">
+            </direct>
+            <direct name="scanin"   input="dsp_pb.scanin[18:0]" output="mult_add.scanin">
+            </direct>
+            <direct name="dataout2result" input="mult_add.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="mult_add.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+            <direct name="scanout" input="mult_add.scanout" output="dsp_pb.scanout[18:0]">
+            </direct>
+          </interconnect>
+        </mode>
+
+        <!-- fixed-point sum-of-4 mode result = (dx * dy) + (cx * cy) + (bx * by) + (ax * ay) + chainin. chainout = result -->
+        <mode name="sop_4_mode">
+          <pb_type name="sop_4" num_pb="1" blif_model=".subckt int_sop_4">
+            <input name="reset" num_pins="1"/>
+            <input name="ax" num_pins="9"/>
+            <input name="ay" num_pins="9"/>
+            <input name="bx" num_pins="9"/>
+            <input name="by" num_pins="9"/>
+            <input name="cx" num_pins="9"/>
+            <input name="cy" num_pins="9"/>
+            <input name="dx" num_pins="9"/>
+            <input name="dy" num_pins="9"/>
+            <input name="chainin" num_pins="64"/>
+            <output name="result" num_pins="64"/>
+            <output name="chainout" num_pins="64"/>
+            <clock name="clk" num_pins="1"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_4.reset" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ax" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ay" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.bx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.by" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cy" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dx" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dy" out_port="sop_4.result"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.chainin" out_port="sop_4.result"/>
+
+            <delay_constant max="1.667e-9" in_port="sop_4.reset" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ax" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.ay" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.bx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.by" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.cy" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dx" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.dy" out_port="sop_4.chainout"/>
+            <delay_constant max="1.667e-9" in_port="sop_4.chainin" out_port="sop_4.chainout"/>
+
+            <T_setup value="1.891e-11" port="sop_4.ax" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.ay" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.bx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.by" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.cx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.cy" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.dx" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.dy" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.chainin" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.reset" clock="clk"/>
+            <T_setup value="1.891e-11" port="sop_4.result" clock="clk"/>
+
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.ax" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.ay" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.bx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.by" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.cx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.cy" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.dx" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.dy" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.chainin" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.reset" clock="clk"/>
+            <T_clock_to_Q max="6.032e-11" min="6.032e-11" port="sop_4.result" clock="clk"/>
+          </pb_type>
+          <interconnect>
+            <direct name="clk" input="dsp_pb.clk" output="sop_4.clk">
+            </direct>
+            <direct name="reset" input="dsp_pb.reset" output="sop_4.reset">
+            </direct>
+            <direct name="datain2ax" input="dsp_pb.datain[8:0]" output="sop_4.ax">
+            </direct>
+            <direct name="datain2ay" input="dsp_pb.datain[17:9]" output="sop_4.ay">
+            </direct>
+            <direct name="datain2bx" input="dsp_pb.datain[26:18]" output="sop_4.bx">
+            </direct>
+            <direct name="datain2by" input="dsp_pb.datain[35:27]" output="sop_4.by">
+            </direct>
+            <direct name="datain2cx" input="dsp_pb.datain[44:36]" output="sop_4.cx">
+            </direct>
+            <direct name="datain2cy" input="dsp_pb.datain[53:45]" output="sop_4.cy">
+            </direct>
+            <direct name="datain2dx" input="dsp_pb.datain[62:54]" output="sop_4.dx">
+            </direct>
+            <direct name="datain2dy" input="dsp_pb.datain[71:63]" output="sop_4.dy">
+            </direct>
+            <direct name="chainin"   input="dsp_pb.chainin[63:0]" output="sop_4.chainin">
+            </direct>
+            <direct name="dataout2result" input="sop_4.result" output="dsp_pb.result[63:0]">
+            </direct>
+            <direct name="chainout" input="sop_4.chainout" output="dsp_pb.chainout[63:0]">
+            </direct>
+          </interconnect>
+        </mode>
+      </pb_type>
+
+      <interconnect>
+        <direct name="datain1" input="dsp.dsp_I1" output ="dsp_pb.datain[36:0]"/>
+        <direct name="datain2" input="dsp.dsp_I2" output ="dsp_pb.datain[73:37]"/>
+        <direct name="reset" input="dsp.reset" output="dsp_pb.reset"></direct>
+        <direct name="chainin" input="dsp.chainin"    output="dsp_pb.chainin"></direct>
+        <direct name="chainout" input="dsp_pb.chainout" output="dsp.chainout"></direct>
+        <direct name="scanin" input="dsp.scanin"    output="dsp_pb.scanin"></direct>
+        <direct name="scanout" input="dsp_pb.scanout" output="dsp.scanout"></direct>
+        <direct name="result" input="dsp_pb.result" output="dsp.result"></direct>
+        <direct name="clk" input="dsp.clk" output="dsp_pb.clk"></direct>
+      </interconnect>  
+    </pb_type>
+
+      
+    <interconnect>
+      <!--50% sparse crossbar means 50% of the lines can reach an actual input of the dsp 
+      We do this by splitting inputs into two buckets and having two full crossbars-->
+      <!--
+     <complete name="first_half" input="dsp_top.dsp_I1" output="dsp.dsp_I1">
+          <delay_constant max="333e-12" in_port="dsp_top.dsp_I1" out_port="dsp.dsp_I1"/>
+     </complete>
+
+      <complete name="second_half" input="dsp_top.dsp_I2" output="dsp.dsp_I2">
+          <delay_constant max="333e-12" in_port="dsp_top.dsp_I2" out_port="dsp.dsp_I2"/>
+      </complete>
+      -->
+      <direct name="datain1" input="dsp_top.dsp_I1" output ="dsp.dsp_I1"/>
+      <direct name="datain2" input="dsp_top.dsp_I2" output ="dsp.dsp_I2"/>
+
+      <direct name="reset" input="dsp_top.reset" output="dsp.reset"></direct>
+      <direct name="chainin" input="dsp_top.chainin" output="dsp.chainin">
+          <delay_constant max="1179e-12" in_port="dsp_top.chainin" out_port="dsp.chainin"/>
+      </direct>
+      <direct name="chainout" input="dsp.chainout" output="dsp_top.chainout">
+          <delay_constant max="1179e-12" in_port="dsp.chainout" out_port="dsp_top.chainout"/>
+      </direct>
+      <direct name="scanin" input="dsp_top.scanin" output="dsp.scanin">
+          <delay_constant max="1179e-12" in_port="dsp_top.scanin" out_port="dsp.scanin"/>
+      </direct>
+      <direct name="scanout" input="dsp.scanout" output="dsp_top.scanout">
+          <delay_constant max="1179e-12" in_port="dsp.scanout" out_port="dsp_top.scanout"/>
+      </direct>
+      <direct name="result" input="dsp.result" output="dsp_top.result"></direct>
+      <direct name="clk" input="dsp_top.clk" output="dsp.clk"></direct>
+    </interconnect>
+  </pb_type>
+    <!-- Define DSP slice end -->
+
+
+    <!-- Define fracturable memory begin -->
+    <!-- 
+    RAM blocks always have registered inputs. The input FFs appear before the address decoder & wordline driver,
+    and after the local input crossbar & level shifter.
+    RAM blocks optionally have registered outputs. The output FFs (if present) appear after the output crossbar.
+    If BRAM doesn't have registered outputs, then T_clk_to_q is the whole delay of the read/write operation.
+    If BRAM does have registered output, then T_clk_to_q is just the FF clk_to_q and then delay_constant
+    can be used to specify the whole delay of the read/write operation.
+
+    This RAM block has registered outputs.
+
+    The area and delay values of this RAM block were obtained (indirectly) from COFFE simulations.
+    COFFE only support widths and depths that are powers of 2. For M20K (20 Kilobit BRAM), we need
+    the width to be 40 bits and depth to be 512 (for the logically widest mode: 512x40). We can't
+    simulate these dimensions directly in COFFE. So, we simulated and obtained the results for M32K
+    (32 Kilobits BRAM) and (16 Kilobits BRAM). Then we interpolated the results.
+    For delay, a linear interpolation was used, based on the size of the Memory (16K->20K->32K).
+    For area, the value was calculated using two interpolations: (1) port based (change in number of 
+    ports in going from 16K->20K->32K) and (2) number of bits based (change in number of bits in
+    going from 16K->20K->32K). The interpolation that resulted in the larger area was picked.
+    
+
+    Here are the equations used to calculate the delays based on COFFE results:
+    T_setup (inputs) = T_level_shifter + T_register_micro_setup = 32.3ps + 18.91ps = 51.21ps
+    T_clk_to_q (inputs) = T_register_micro_clk_to_q = 60.32ps
+    T_setup (outputs) = T_register_micro_setup = 18.91ps 
+    T_clk_to_q (outputs) = T_register_micro_clk_to_q = 60.32ps
+
+    (Register setup and clk_to_q timings are actually from the FF used in the logic cluster.)
+
+    T_read = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver + Bit line delay) + (Sense amp + Output crossbar)
+
+    * Bit line delay is included in self.RAM.samp.delay time in COFFE. The Sense amp delay is actually
+    self.RAM.samp_part2.delay
+
+    T_write = T1 + T2 + T3
+    = max (Row decoder, Pre-charge time) + (Wordline driver) + (Write driver)
+
+    delay_constant values model the internal limits of a block (the combinatorial delay).
+    delay_constant = max (T_read, T_write) 
+
+    Overall internal delay of the RAM is T_clk_to_q (inputs) + delay_constant + T_setup (outputs)
+    -->
+    <pb_type name="memory">
+      <input name="addr1" num_pins="11"/>
+      <input name="addr2" num_pins="11"/>
+      <input name="data" num_pins="40"/>
+      <input name="we1" num_pins="1"/>
+      <input name="we2" num_pins="1"/>
+      <output name="out" num_pins="40"/>
+      <clock name="clk" num_pins="1"/>
+      <!-- Specify single port mode first -->
+      <mode name="mem_512x40_sp">
+        <pb_type name="mem_512x40_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="9" port_class="address"/>
+          <input name="data" num_pins="40" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="40" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_512x40_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_512x40_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_512x40_sp.addr" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0" in_port="mem_512x40_sp.data" out_port="mem_512x40_sp.out"/>
+          <delay_constant max="0" in_port="mem_512x40_sp.we"   out_port="mem_512x40_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[8:0]" output="mem_512x40_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data" output="mem_512x40_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_512x40_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_512x40_sp.out" output="memory.out">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_512x40_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_1024x20_sp">
+        <pb_type name="mem_1024x20_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="10" port_class="address"/>
+          <input name="data" num_pins="20" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="20" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_1024x20_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_1024x20_sp.addr" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0" in_port="mem_1024x20_sp.data" out_port="mem_1024x20_sp.out"/>
+          <delay_constant max="0" in_port="mem_1024x20_sp.we"   out_port="mem_1024x20_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_sp.out" output="memory.out[19:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_sp">
+        <pb_type name="mem_2048x10_sp" blif_model=".subckt single_port_ram" class="memory" num_pb="1">
+          <input name="addr" num_pins="11" port_class="address"/>
+          <input name="data" num_pins="10" port_class="data_in"/>
+          <input name="we" num_pins="1" port_class="write_en"/>
+          <output name="out" num_pins="10" port_class="data_out"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_setup value="-3.5e-11" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_hold value="7.4e-11" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.addr" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.data" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.we" clock="clk"/>
+          <T_clock_to_Q max="3.7e-10" port="mem_2048x10_sp.out" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_2048x10_sp.addr" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0" in_port="mem_2048x10_sp.data" out_port="mem_2048x10_sp.out"/>
+          <delay_constant max="0" in_port="mem_2048x10_sp.we"   out_port="mem_2048x10_sp.out"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_sp.addr">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_sp.data">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_sp.we">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_sp.out" output="memory.out[9:0]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_sp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <!-- Specify true dual port mode next -->
+      <mode name="mem_1024x20_dp">
+        <pb_type name="mem_1024x20_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="10" port_class="address1"/>
+          <input name="addr2" num_pins="10" port_class="address2"/>
+          <input name="data1" num_pins="20" port_class="data_in1"/>
+          <input name="data2" num_pins="20" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="20" port_class="data_out1"/>
+          <output name="out2" num_pins="20" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_1024x20_dp.out2" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_1024x20_dp.addr1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.data1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.we1" out_port="mem_1024x20_dp.out1"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.addr2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.data2" out_port="mem_1024x20_dp.out2"/>
+          <delay_constant max="0" in_port="mem_1024x20_dp.we2" out_port="mem_1024x20_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[9:0]" output="mem_1024x20_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[9:0]" output="mem_1024x20_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[19:0]" output="mem_1024x20_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[39:20]" output="mem_1024x20_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_1024x20_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_1024x20_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_1024x20_dp.out1" output="memory.out[19:0]">
+          </direct>
+          <direct name="dataout2" input="mem_1024x20_dp.out2" output="memory.out[39:20]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_1024x20_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+
+      <mode name="mem_2048x10_dp">
+        <pb_type name="mem_2048x10_dp" blif_model=".subckt dual_port_ram" class="memory" num_pb="1">
+          <input name="addr1" num_pins="11" port_class="address1"/>
+          <input name="addr2" num_pins="11" port_class="address2"/>
+          <input name="data1" num_pins="10" port_class="data_in1"/>
+          <input name="data2" num_pins="10" port_class="data_in2"/>
+          <input name="we1" num_pins="1" port_class="write_en1"/>
+          <input name="we2" num_pins="1" port_class="write_en2"/>
+          <output name="out1" num_pins="10" port_class="data_out1"/>
+          <output name="out2" num_pins="10" port_class="data_out2"/>
+          <clock name="clk" num_pins="1" port_class="clock"/>
+
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_setup value="-2.6e-11" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_hold value="1.13e-10" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.addr1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.data1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.we1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.addr2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.data2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.we2" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.out1" clock="clk"/>
+          <T_clock_to_Q max="3.57e-10" port="mem_2048x10_dp.out2" clock="clk"/>
+
+          <delay_constant max="0" in_port="mem_2048x10_dp.addr1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.data1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.we1" out_port="mem_2048x10_dp.out1"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.addr2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.data2" out_port="mem_2048x10_dp.out2"/>
+          <delay_constant max="0" in_port="mem_2048x10_dp.we2" out_port="mem_2048x10_dp.out2"/>
+
+        </pb_type>
+        <interconnect>
+          <direct name="address1" input="memory.addr1[10:0]" output="mem_2048x10_dp.addr1">
+          </direct>
+          <direct name="address2" input="memory.addr2[10:0]" output="mem_2048x10_dp.addr2">
+          </direct>
+          <direct name="data1" input="memory.data[9:0]" output="mem_2048x10_dp.data1">
+          </direct>
+          <direct name="data2" input="memory.data[19:10]" output="mem_2048x10_dp.data2">
+          </direct>
+          <direct name="writeen1" input="memory.we1" output="mem_2048x10_dp.we1">
+          </direct>
+          <direct name="writeen2" input="memory.we2" output="mem_2048x10_dp.we2">
+          </direct>
+          <direct name="dataout1" input="mem_2048x10_dp.out1" output="memory.out[9:0]">
+          </direct>
+          <direct name="dataout2" input="mem_2048x10_dp.out2" output="memory.out[19:10]">
+          </direct>
+          <direct name="clk" input="memory.clk" output="mem_2048x10_dp.clk">
+          </direct>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <!-- Define fracturable memory end -->
+
+    <pb_type name="tsv_hole">
+      <input name="I" num_pins="1"/>
+      <output name="O" num_pins="1"/>
+      <interconnect/>
+    </pb_type>
+
+  </complexblocklist>
+
+  <switchblocklist>
+    <!-- Stratix IV uses a uni-directional routing architecture with a Driver Input Mux (DIM) size of 12 (i.e.
+           each wire can be driven by one of 12 block/outputs or wires) for the L4s.
+           
+           In the Stratix IV architecture the long wires (L16 here) are accessible only from the short wires, 
+           and are not connected to the block pins (i.e. connection blocks). Furthermore, they only connect 
+           to switch blocks every 4 LABs (to avoid expensive deep via stacks).
+           We approximate the L16 DIM size as 40:1 (in reality it is a pair of 20:1 (?) muxes with a 2:1 swap mux
+           in front, which has nearly the same connectivity as a full 40:1).
+
+           L4 wires
+           ================
+           At a channel width of 300 there are 260 L4/L4prime wires. At an effective Fc_out of 0.075 
+           and 40 LAB outputs this yeilds:
+
+                40 * 2 = 80 outputs per channel  [2 LABs per-channel]
+
+                80 * 0.075 = 6 outputs drive each L4 wire [output connection block]
+
+           This leaves:
+
+                12 - 6 = 6 inputs to the DIMs from other routing wires [switch block]
+
+           Since L4s connect at every switch block, there are:
+
+                260 L16 wires per channel + direction which can drive wires at a particular switchblock
+                (via switchpoints 0, 1, 2, 3)
+
+           And for each direction (260 wires) only:
+
+               260 / 4 = 65 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there are 32 L4s starting, and 32 L4s ending; + 1 wire for the 65th)
+
+           Which we allocate as follows:
+
+                L4
+                =====
+                straight-through connection: 2 (from L4 or L16)
+                clock-wise turn            : 2 (from L4 or L16)
+                counter-clock-wise turn    : 2 (from L4 or L16)
+
+           L16 wires
+           =========
+           At a channel width of 300 there are 40 L16 wires (20 in each direction), which do not connect to the input/output connection blocks.
+           This leaves 40 inputs to the DIM to select from routing wires (long wires use larger DIMs to improve reachability,
+           the area cost is relatively small since they are so rare).
+
+           Since L16s only connect at every 4th switch block there are:
+
+                40 / 4 = 10 L16 wires per channel (5 in each direction) which can drive wires at a particular switchblock
+                (via switchpoints 0, 4, 8, 12)
+
+           And for each direction (20 wires) only:
+
+               40 / 16 = 2.25 => 2 wires starting/ending per channel + direction at each switch block
+               (i.e. from each direction, north/south/east/west, there is one L16 starting, and one L16 ending)
+           
+           We assign the 40 DIM inputs as follows:
+
+                L16
+                =====
+                straight-through connection:  3 (from L16)
+                straight-through connection: 11 (from L4)
+                clock-wise turn            :  3 (from L16)
+                clock-wise turn            : 10 (from L4)
+                counter clock-wise turn    :  3 (from L16)
+                counter clock-wise turn    : 10 (from L4)
+
+           Switch pattern
+           ==============
+           This switch block is based on the Wilton switch block (see Page 103 of Steve Wilton's PhD Thesis 
+           "Architecture and Algorithms for Field-Programmable Gate Arrays with Embedded Memory", 1997):
+
+                left-to-top: W - t
+                top-to-right: t + 1
+                right-to-bottom: 2*W - 2 - t
+                bottom-to-left: t + 1
+                left-to-right: t
+                top-to-bottom: t
+
+           Since Wilton assumed bidirection routing (while we use unidirectional routing),
+           we mirror the clock-wise turns to match the conter-clock-wise specification.
+           -->
+    <switchblock name="wilton_turn_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_turn_counter_clockwise_core" type="unidir">
+      <switchblock_location type="CORE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+      </switchfuncs>
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="10*to" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers 
+
+               Driving from L16 (few) to L4 (many) preferr driving from end-point of L16, although since there are many they will
+               all be multiply connected.
+               
+               Driving from L4 (many) to L4 (many) shuffle the switchpoints so the L4's are driven from a variety of switchpoints.
+               Since the actual number L4s starting/ending are equal, using 'fixed' from_order would mean only switchpoint 0 -> 0
+               connections. A 'shuffled' order will mix-up the from switchpoints for more diversity.
+
+               Note that a different from_switchpoints ordering is used to ensure a different shuffling occurs compared to 
+               wilton_turn_clockwise_core.
+               -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0,1,2,3"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+    </switchblock>
+    <switchblock name="wilton_straight" type="unidir">
+      <switchblock_location type="EVERYWHERE"/>
+      <switchfuncs>
+        <!-- Straight -->
+        <func type="lr" formula="t"/>
+        <!-- left to right -->
+        <func type="tb" formula="t"/>
+        <!-- top to bottom -->
+        <func type="rl" formula="t"/>
+        <!-- right to left -->
+        <func type="bt" formula="t"/>
+        <!-- bottom to top -->
+      </switchfuncs>
+      <!-- L16 Drivers 
+                Note that we order the switchpoints in order of preference, since VPR currently
+                iterates through the source sets in order, such that we connect first to wires
+                ending at the switchblock (switchpoint 0), and then fallback to switchpoints
+                in decreasing distance from the drive point (if we have more to's than from's
+                it then wraps around).
+
+                Note also that we multiply the number of expected connections by 'to', since while usually
+                there is only one 'to' wire, ocasionally there may be more, and we want to ensure they all
+                get the same number of connections.
+
+                For L16->L16:
+                  We allow any valid switchpoint to be used as the 'from' point.
+                  Allow 'low' switchpoints like '4' may seem counter-intuitive (i.e. why not use a cheaper L4)
+                  this makes it easier to bypass once on the L16 network (e.g. to get around congestion).
+           -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_straight_corner" type="unidir">
+      <!-- Same as wilton straight, but turning around a corner -->
+      <switchblock_location type="CORNER"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t"/>
+        <!-- top to right -->
+        <func type="rb" formula="t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t"/>
+        <!-- right to top -->
+        <func type="br" formula="t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- L16 Drivers -->
+      <wireconn num_conns="3*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="11*to" from_type="L4" from_switchpoint="0,3,2,1" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 Drivers -->
+      <wireconn num_conns="2*to" from_order="shuffled">
+        <from type="L16" switchpoint="0,12,8,4"/>
+        <from type="L4" switchpoint="0"/>
+        <to type="L4" switchpoint="0"/>
+      </wireconn>
+      <!--<wireconn num_conns="1*to" from_type="L4" from_switchpoint="0" to_type="L4" to_switchpoint="0"/>-->
+      <!--<wireconn num_conns="1*to" from_type="L16" from_switchpoint="0,12,8,4" to_type="L4" to_switchpoint="0"/>-->
+    </switchblock>
+    <switchblock name="wilton_turn_fringe" type="unidir">
+      <!-- Non-corner perimeter SBs -->
+      <switchblock_location type="FRINGE"/>
+      <switchfuncs>
+        <!-- Counter-clock-wise turns -->
+        <func type="lt" formula="W-t"/>
+        <!-- left to top -->
+        <func type="tr" formula="t+1"/>
+        <!-- top to right -->
+        <func type="rb" formula="2*W-2-t"/>
+        <!-- right to bottom -->
+        <func type="bl" formula="t+1"/>
+        <!-- bottom to left -->
+        <!-- Clock-wise turns -->
+        <func type="tl" formula="W-t"/>
+        <!-- top to left -->
+        <func type="rt" formula="t+1"/>
+        <!-- right to top -->
+        <func type="br" formula="2*W-2-t"/>
+        <!-- bottom to right -->
+        <func type="lb" formula="t+1"/>
+        <!-- left to bottom -->
+      </switchfuncs>
+      <!-- We use 'max' style connections here to ensure there are no dangling wires, otherwise like core turns -->
+      <!-- L16 drivers -->
+      <wireconn num_conns="3*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" to_type="L16" to_switchpoint="0"/>
+      <wireconn num_conns="21*max(from,to)" from_type="L4" from_switchpoint="0" to_type="L16" to_switchpoint="0"/>
+      <!-- L4 drivers -->
+      <wireconn num_conns="1*max(from,to)" from_type="L16" from_switchpoint="0,12,8,4" from_order="fixed" to_type="L4" to_switchpoint="0"/>
+      <wireconn num_conns="1*max(from,to)" from_type="L4" from_switchpoint="0,1,2,3" from_order="shuffled" to_type="L4" to_switchpoint="0"/>
+    </switchblock>
+  </switchblocklist>
+
+  <clocks>
+    <clock buffer_size="auto" C_wire="2.5e-10"/>
+  </clocks>
+
+</architecture>
+
+

From 7532f9a65f9b72500c81c197f61c4a8ef3f1ebea Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 12:33:05 -0400
Subject: [PATCH 061/257] add a helper function to move utils to get the union
 of a vector of bbs

---
 vpr/src/place/move_utils.cpp | 27 +++++++++++++++++++++++++++
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 29 insertions(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index f04bedc42df..2043dc57e24 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1255,3 +1255,30 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 
     return free_layer;
 }
+
+t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
+    t_2D_tbb merged_bb(std::numeric_limits<int>::min(),
+                       std::numeric_limits<int>::max(),
+                       std::numeric_limits<int>::min(),
+                       std::numeric_limits<int>::max(),
+                       -1);
+
+    for (const auto& bb : tbb_vec) {
+        if (bb.xmin == OPEN){
+            continue;
+        }
+        if (bb.xmin < merged_bb.xmin) {
+            merged_bb.xmin = bb.xmin;
+        }
+        if (bb.xmax > merged_bb.xmax) {
+            merged_bb.xmax = bb.xmax;
+        }
+        if (bb.ymin < merged_bb.ymin) {
+            merged_bb.ymin = bb.ymin;
+        }
+        if (bb.ymax > merged_bb.ymax) {
+            merged_bb.ymax = bb.ymax;
+        }
+    }
+
+}
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index f8d69fc072a..79a5bc4fca2 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -349,4 +349,6 @@ int get_num_agent_types();
 
 int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc);
 
+t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
+
 #endif

From 6411e311262bf4e815b08d5fa8a093778ca358ba Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 12:34:52 -0400
Subject: [PATCH 062/257] fix the issue with 3d bb in median move

---
 vpr/src/place/median_move_generator.cpp | 188 +++++++++++++-----------
 1 file changed, 106 insertions(+), 82 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 7d75926085b..f431e45e467 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -5,9 +5,9 @@
 #include "placer_globals.h"
 #include "move_utils.h"
 
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xold, int yold, int xnew, int ynew);
+static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew);
 
-static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_coord_new, ClusterBlockId block_id, bool& skip_net);
+static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, ClusterBlockId block_id, bool& skip_net);
 
 e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, e_move_type& /*move_type*/, t_logical_block_type& blk_type, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) {
     //Find a movable block based on blk_type
@@ -22,6 +22,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
+    int num_layers = device_ctx.grid.get_num_layers();
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
@@ -30,7 +31,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     /* Calculate the median region */
     t_pl_loc to;
 
-    t_bb coords, limit_coords;
+    std::vector<t_2D_tbb> coords;
+    t_2D_tbb limit_coords;
     ClusterBlockId bnum;
     int pnum, xnew, xold, ynew, yold;
 
@@ -55,7 +57,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             continue;
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() < SMALL_NET) {
             //calculate the bb from scratch
-            get_bb_from_scratch_excluding_block(net_id, &coords, b_from, skip_net);
+            get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net);
             if (skip_net)
                 continue;
         } else {
@@ -67,32 +69,34 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             yold = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
             xold = std::max(std::min(xold, (int)device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
             yold = std::max(std::min(yold, (int)device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+            int block_layer = place_ctx.block_locs[bnum].loc.layer;
 
             //To calulate the bb incrementally while excluding the moving block
             //assume that the moving block is moved to a non-critical coord of the bb
-            if (place_move_ctx.bb_coords[net_id].xmin == xold) {
-                xnew = place_move_ctx.bb_coords[net_id].xmax;
+            if (place_move_ctx.bb_coords[net_id][block_layer].xmin == xold) {
+                xnew = place_move_ctx.bb_coords[net_id][block_layer].xmax;
             } else {
-                xnew = place_move_ctx.bb_coords[net_id].xmin;
+                xnew = place_move_ctx.bb_coords[net_id][block_layer].xmin;
             }
 
-            if (place_move_ctx.bb_coords[net_id].ymin == yold) {
-                ynew = place_move_ctx.bb_coords[net_id].ymax;
+            if (place_move_ctx.bb_coords[net_id][block_layer].ymin == yold) {
+                ynew = place_move_ctx.bb_coords[net_id][block_layer].ymax;
             } else {
-                ynew = place_move_ctx.bb_coords[net_id].ymin;
+                ynew = place_move_ctx.bb_coords[net_id][block_layer].ymin;
             }
 
-            if (!get_bb_incrementally(net_id, &coords, xold, yold, xnew, ynew)) {
-                get_bb_from_scratch_excluding_block(net_id, &coords, b_from, skip_net);
+            if (!get_bb_incrementally(net_id, coords, block_layer, xold, yold, xnew, ynew)) {
+                get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net);
                 if (skip_net)
                     continue;
             }
         }
         //push the calculated coorinates into X,Y coord vectors
-        place_move_ctx.X_coord.push_back(coords.xmin);
-        place_move_ctx.X_coord.push_back(coords.xmax);
-        place_move_ctx.Y_coord.push_back(coords.ymin);
-        place_move_ctx.Y_coord.push_back(coords.ymax);
+        auto merged_coords = union_2d_tbb(coords);
+        place_move_ctx.X_coord.push_back(merged_coords.xmin);
+        place_move_ctx.X_coord.push_back(merged_coords.xmax);
+        place_move_ctx.Y_coord.push_back(merged_coords.ymin);
+        place_move_ctx.Y_coord.push_back(merged_coords.ymax);
     }
 
     if ((place_move_ctx.X_coord.size() == 0) || (place_move_ctx.Y_coord.size() == 0))
@@ -149,17 +153,18 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
  * Currently assumes channels on both sides of the CLBs forming the   *
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
-static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_coord_new, ClusterBlockId block_id, bool& skip_net) {
+static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, ClusterBlockId block_id, bool& skip_net) {
     //TODO: account for multiple physical pin instances per logical pin
 
     skip_net = true;
 
-    int xmin = 0;
-    int xmax = 0;
-    int ymin = 0;
-    int ymax = 0;
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    std::vector<int> xmin(num_layers, OPEN);
+    std::vector<int> xmax(num_layers, OPEN);
+    std::vector<int> ymin(num_layers, OPEN);
+    std::vector<int> ymax(num_layers, OPEN);
 
-    int x, y;
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -167,20 +172,21 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co
     auto& device_ctx = g_vpr_ctx.device();
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    bool first_block = false;
+    std::vector<bool> first_block(num_layers, false);
 
     if (bnum != block_id) {
         skip_net = false;
         pnum = net_pin_to_tile_pin_index(net_id, 0);
-        x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        xmin = x;
-        ymin = y;
-        xmax = x;
-        ymax = y;
-
-        first_block = true;
+        int src_x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
+        int src_y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+            xmin[layer_num] = src_x;
+            ymin[layer_num] = src_y;
+            xmax[layer_num] = src_x;
+            ymax[layer_num] = src_y;
+            first_block[layer_num] = true;
+        }
     }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
@@ -189,26 +195,28 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co
         if (bnum == block_id)
             continue;
         skip_net = false;
-        x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        if (!first_block) {
-            xmin = x;
-            ymin = y;
-            xmax = x;
-            ymax = y;
-            first_block = true;
+        int x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
+        int y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
+        int layer_num = place_ctx.block_locs[bnum].loc.layer;
+
+        if (!first_block[layer_num]) {
+            xmin[layer_num] = x;
+            ymin[layer_num] = y;
+            xmax[layer_num] = x;
+            ymax[layer_num] = y;
+            first_block[layer_num] = true;
+            continue;
         }
-        if (x < xmin) {
-            xmin = x;
-        } else if (x > xmax) {
-            xmax = x;
+        if (x < xmin[layer_num]) {
+            xmin[layer_num] = x;
+        } else if (x > xmax[layer_num]) {
+            xmax[layer_num] = x;
         }
 
-        if (y < ymin) {
-            ymin = y;
-        } else if (y > ymax) {
-            ymax = y;
+        if (y < ymin[layer_num]) {
+            ymin[layer_num] = y;
+        } else if (y > ymax[layer_num]) {
+            ymax[layer_num] = y;
         }
     }
 
@@ -219,11 +227,18 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co
      * channel immediately to the left of the bounding box, I want to    *
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
-
-    bb_coord_new->xmin = std::max(std::min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymin = std::max(std::min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    bb_coord_new->xmax = std::max(std::min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new->ymax = std::max(std::min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        if (!first_block[layer_num]) {
+            bb_coord_new[layer_num].xmin = OPEN;
+            bb_coord_new[layer_num].ymin = OPEN;
+            bb_coord_new[layer_num].xmax = OPEN;
+            bb_coord_new[layer_num].ymax = OPEN;
+        }
+        bb_coord_new[layer_num].xmin = std::max(std::min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymin = std::max(std::min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].xmax = std::max(std::min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymax = std::max(std::min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    }
 }
 
 /*
@@ -239,22 +254,31 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co
  * the pins always lie on the outside of the bounding box.            *
  * The x and y coordinates are the pin's x and y coordinates.         */
 /* IO blocks are considered to be one cell in for simplicity.         */
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xold, int yold, int xnew, int ynew) {
+static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    const t_bb *curr_bb_edge, *curr_bb_coord;
+    const t_2D_tbb *curr_bb_edge, *curr_bb_coord;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
+    int num_layers = device_ctx.grid.get_num_layers();
+
     xnew = std::max(std::min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     ynew = std::max(std::min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     yold = std::max(std::min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        if (layer_num == layer) {
+            continue;
+        }
+        bb_coord_new[layer_num] = place_move_ctx.bb_coords[net_id][layer];
+    }
+
     /* The net had NOT been updated before, could use the old values */
-    curr_bb_coord = &(place_move_ctx.bb_coords[net_id]);
-    curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id]);
+    curr_bb_coord = &(place_move_ctx.bb_coords[net_id][layer]);
+    curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id][layer]);
 
     /* Check if I can update the bounding box incrementally. */
 
@@ -266,20 +290,20 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo
             if (curr_bb_edge->xmax == 1) {
                 return false;
             } else {
-                bb_coord_new->xmax = curr_bb_coord->xmax;
+                bb_coord_new[layer].xmax = curr_bb_coord->xmax;
             }
         } else { /* Move to left, old postion was not at xmax. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
+            bb_coord_new[layer].xmax = curr_bb_coord->xmax;
         }
 
         /* Now do the xmin fields for coordinates and number of edges. */
 
         if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
-            bb_coord_new->xmin = xnew;
+            bb_coord_new[layer].xmin = xnew;
         } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
-            bb_coord_new->xmin = xnew;
+            bb_coord_new[layer].xmin = xnew;
         } else { /* Xmin unchanged. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
+            bb_coord_new[layer].xmin = curr_bb_coord->xmin;
         }
         /* End of move to left case. */
 
@@ -291,25 +315,25 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo
             if (curr_bb_edge->xmin == 1) {
                 return false;
             } else {
-                bb_coord_new->xmin = curr_bb_coord->xmin;
+                bb_coord_new[layer].xmin = curr_bb_coord->xmin;
             }
         } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new->xmin = curr_bb_coord->xmin;
+            bb_coord_new[layer].xmin = curr_bb_coord->xmin;
         }
         /* Now do the xmax fields for coordinates and number of edges. */
 
         if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
-            bb_coord_new->xmax = xnew;
+            bb_coord_new[layer].xmax = xnew;
         } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
-            bb_coord_new->xmax = xnew;
+            bb_coord_new[layer].xmax = xnew;
         } else { /* Xmax unchanged. */
-            bb_coord_new->xmax = curr_bb_coord->xmax;
+            bb_coord_new[layer].xmax = curr_bb_coord->xmax;
         }
         /* End of move to right case. */
 
     } else { /* xnew == xold -- no x motion. */
-        bb_coord_new->xmin = curr_bb_coord->xmin;
-        bb_coord_new->xmax = curr_bb_coord->xmax;
+        bb_coord_new[layer].xmin = curr_bb_coord->xmin;
+        bb_coord_new[layer].xmax = curr_bb_coord->xmax;
     }
 
     /* Now account for the y-direction motion. */
@@ -322,20 +346,20 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo
             if (curr_bb_edge->ymax == 1) {
                 return false;
             } else {
-                bb_coord_new->ymax = curr_bb_coord->ymax;
+                bb_coord_new[layer].ymax = curr_bb_coord->ymax;
             }
         } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
+            bb_coord_new[layer].ymax = curr_bb_coord->ymax;
         }
 
         /* Now do the ymin fields for coordinates and number of edges. */
 
         if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
-            bb_coord_new->ymin = ynew;
+            bb_coord_new[layer].ymin = ynew;
         } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
-            bb_coord_new->ymin = ynew;
+            bb_coord_new[layer].ymin = ynew;
         } else { /* ymin unchanged. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
+            bb_coord_new[layer].ymin = curr_bb_coord->ymin;
         }
         /* End of move down case. */
 
@@ -347,26 +371,26 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo
             if (curr_bb_edge->ymin == 1) {
                 return false;
             } else {
-                bb_coord_new->ymin = curr_bb_coord->ymin;
+                bb_coord_new[layer].ymin = curr_bb_coord->ymin;
             }
         } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new->ymin = curr_bb_coord->ymin;
+            bb_coord_new[layer].ymin = curr_bb_coord->ymin;
         }
 
         /* Now do the ymax fields for coordinates and number of edges. */
 
         if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
-            bb_coord_new->ymax = ynew;
+            bb_coord_new[layer].ymax = ynew;
         } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
-            bb_coord_new->ymax = ynew;
+            bb_coord_new[layer].ymax = ynew;
         } else { /* ymax unchanged. */
-            bb_coord_new->ymax = curr_bb_coord->ymax;
+            bb_coord_new[layer].ymax = curr_bb_coord->ymax;
         }
         /* End of move up case. */
 
     } else { /* ynew == yold -- no y motion. */
-        bb_coord_new->ymin = curr_bb_coord->ymin;
-        bb_coord_new->ymax = curr_bb_coord->ymax;
+        bb_coord_new[layer].ymin = curr_bb_coord->ymin;
+        bb_coord_new[layer].ymax = curr_bb_coord->ymax;
     }
     return true;
 }

From 853c7552468598bd1340f6aa0d550b20b2b8ae7b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 12:55:11 -0400
Subject: [PATCH 063/257] remove unused vars

---
 vpr/src/place/median_move_generator.cpp | 1 -
 vpr/src/place/move_utils.cpp            | 3 ++-
 vpr/src/place/place.cpp                 | 6 ++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index f431e45e467..323c0e4a389 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -22,7 +22,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
-    int num_layers = device_ctx.grid.get_num_layers();
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 2043dc57e24..4501d540915 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1264,7 +1264,7 @@ t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
                        -1);
 
     for (const auto& bb : tbb_vec) {
-        if (bb.xmin == OPEN){
+        if (bb.xmin == OPEN) {
             continue;
         }
         if (bb.xmin < merged_bb.xmin) {
@@ -1281,4 +1281,5 @@ t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
         }
     }
 
+    return merged_bb;
 }
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 41a45974629..6efce38011a 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2548,7 +2548,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
         num_on_edges[layer].ymin = ymin_edge[layer];
         num_on_edges[layer].ymax = ymax_edge[layer];
     }
-    g_placer_ctx.mutable_move().num_sink_pin_layer[net_id] = num_sink_pin_layer;
+    layer_pin_sink_count = num_sink_pin_layer;
 }
 
 static double wirelength_crossing_count(size_t fanout) {
@@ -2568,7 +2568,6 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector
 
     double ncost = 0.;
     double crossing = 0.;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& place_move_ctx = g_placer_ctx.move();
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
@@ -2599,7 +2598,6 @@ static double get_net_cost(ClusterNetId /* net_id */,
 
     double ncost = 0.;
     double crossing = 0.;
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
@@ -2782,7 +2780,7 @@ static void update_bb(ClusterNetId net_id,
     }
 }
 
-static void update_bb_pin_sink_count(ClusterNetId net_id,
+static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const std::vector<int>& curr_layer_pin_sink_count,

From 21c586cb4abf0ac91d3e15e4e4f2ad48b32813fe Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 14:34:03 -0400
Subject: [PATCH 064/257] allocate enough space for num_sink_pin_layer

---
 vpr/src/place/place.cpp        | 4 +++-
 vpr/src/place/placer_context.h | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6efce38011a..0692126e494 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2357,9 +2357,11 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
+    place_move_ctx.bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+
     place_move_ctx.bb_coords.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
 
-    place_move_ctx.bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+    place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 9971699ece2..74eaa1647a4 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -91,12 +91,13 @@ struct PlacerRuntimeContext : public Context {
  */
 struct PlacerMoveContext : public Context {
   public:
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
-    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_num_on_edges;
 
+    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
+    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_coords;
+
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
     vtr::vector<ClusterNetId, std::vector<int>> num_sink_pin_layer;
 

From 47575778f602e480924dfea45fbe7704e7c0f415 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 14:38:29 -0400
Subject: [PATCH 065/257] allocate enough space for xmin_edge

---
 vpr/src/place/place.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 0692126e494..99ac50a42cf 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2456,10 +2456,10 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     std::vector<int> xmax(num_layers);
     std::vector<int> ymin(num_layers);
     std::vector<int> ymax(num_layers);
-    std::vector<int> xmin_edge;
-    std::vector<int> xmax_edge;
-    std::vector<int> ymin_edge;
-    std::vector<int> ymax_edge;
+    std::vector<int> xmin_edge(num_layers);
+    std::vector<int> xmax_edge(num_layers);
+    std::vector<int> ymin_edge(num_layers);
+    std::vector<int> ymax_edge(num_layers);
 
     std::vector<int> num_sink_pin_layer(num_layers, 0);
 

From 3082c2265f49c2161cd3baef67c96f67e7161e5b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 14:43:21 -0400
Subject: [PATCH 066/257] if layer doesn't have a valid bb, pass

---
 vpr/src/place/median_move_generator.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 323c0e4a389..7a8778aed60 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -232,6 +232,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
             bb_coord_new[layer_num].ymin = OPEN;
             bb_coord_new[layer_num].xmax = OPEN;
             bb_coord_new[layer_num].ymax = OPEN;
+            continue;
         }
         bb_coord_new[layer_num].xmin = std::max(std::min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
         bb_coord_new[layer_num].ymin = std::max(std::min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels

From a20b1ca56a73a836e9cbedb483bbd53b7d5d9c38 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 9 Aug 2023 14:49:16 -0400
Subject: [PATCH 067/257] fix a bug in median_move

---
 vpr/src/place/median_move_generator.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 7a8778aed60..786f1988df1 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -22,6 +22,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
+    int num_layers = device_ctx.grid.get_num_layers();
+
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
@@ -30,7 +32,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     /* Calculate the median region */
     t_pl_loc to;
 
-    std::vector<t_2D_tbb> coords;
+    std::vector<t_2D_tbb> coords (num_layers, t_2D_tbb(OPEN, OPEN, OPEN, OPEN, OPEN));
     t_2D_tbb limit_coords;
     ClusterBlockId bnum;
     int pnum, xnew, xold, ynew, yold;

From 7be4426d324462014d19485ab9a1dcd35783f1aa Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 15:55:33 -0400
Subject: [PATCH 068/257] direct conversion from distance

---
 vpr/src/place/initial_placement.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index b5139653248..72babd1a232 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -425,9 +425,8 @@ static std::vector<ClusterBlockId> find_centroid_loc(t_pl_macro pl_macro, t_pl_l
         centroid.y = acc_y / acc_weight;
         if (find_layer) {
             auto max_element = std::max_element(layer_count.begin(), layer_count.end());
-            VTR_ASSERT(*max_element != 0);
-            auto index = std::distance(layer_count.begin(), max_element);
-            centroid.layer = static_cast<int>(index);
+            VTR_ASSERT((*max_element) != 0);
+            centroid.layer = (int)std::distance(layer_count.begin(), max_element);
         } else {
             centroid.layer = head_layer_num;
         }

From ca6dc603f5963814e8370baa727731f64265f94c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 15:59:47 -0400
Subject: [PATCH 069/257] fix the bugs of 3d bb for placement

---
 vpr/src/place/place.cpp | 252 +++++++++++++++++++++++++---------------
 1 file changed, 156 insertions(+), 96 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 99ac50a42cf..81c624e67a8 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -342,7 +342,7 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const std::vector<int>& curr_layer_pin_sink_count,
                                      std::vector<int>& bb_pin_sink_count_new);
 
-static void update_bb_edges(ClusterNetId net_id,
+static void try_remove_block_from_bb_edge(ClusterNetId net_id,
                             const t_physical_tile_loc& pin_old_loc,
                             const t_physical_tile_loc& pin_new_loc,
                             const std::vector<t_2D_tbb>& curr_bb_edge,
@@ -394,7 +394,7 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
 static double get_net_cost(ClusterNetId net_id,
-                           const std::vector<t_2D_tbb>& bb_ptr,
+                           const std::vector<t_2D_tbb>& bbptr,
                            const std::vector<int>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
@@ -402,7 +402,9 @@ static void get_bb_from_scratch(ClusterNetId net_id,
                                 std::vector<t_2D_tbb>& coords,
                                 std::vector<int>& layer_pin_sink_count);
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector<t_2D_tbb>& bbptr);
+static double get_net_wirelength_estimate(ClusterNetId net_id,
+                                          const std::vector<t_2D_tbb>& bbptr,
+                                          const std::vector<int>& layer_pin_sink_count);
 
 static void free_try_swap_arrays();
 
@@ -1417,6 +1419,7 @@ static void update_move_nets(int num_nets_affected) {
 
         place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
         place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
+        VTR_ASSERT(ts_layer_sink_pin_count[net_id][0] == cluster_ctx.clb_nlist.net_pins(net_id).size()-1);
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
             place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
 
@@ -2281,7 +2284,8 @@ static double comp_bb_cost(e_cost_methods method) {
             cost += net_cost[net_id];
             if (method == CHECK)
                 expected_wirelength += get_net_wirelength_estimate(net_id,
-                                                                   place_move_ctx.bb_coords[net_id]);
+                                                                   place_move_ctx.bb_coords[net_id],
+                                                                   place_move_ctx.num_sink_pin_layer[net_id]);
         }
     }
 
@@ -2422,9 +2426,9 @@ static void alloc_and_load_try_swap_structs() {
 
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
     ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, 0));
+    ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2432,8 +2436,8 @@ static void alloc_and_load_try_swap_structs() {
 }
 
 static void free_try_swap_structs() {
-    vtr::release_memory(ts_bb_coord_new);
     vtr::release_memory(ts_bb_edge_new);
+    vtr::release_memory(ts_bb_coord_new);
     vtr::release_memory(ts_layer_sink_pin_count);
     vtr::release_memory(ts_nets_to_update);
 
@@ -2451,15 +2455,18 @@ static void get_bb_from_scratch(ClusterNetId net_id,
                                 std::vector<int>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
+    num_on_edges.resize(num_layers, t_2D_tbb());
+    coords.resize(num_layers, t_2D_tbb());
+    layer_pin_sink_count.resize(num_layers, 0);
     int pnum, x, y, layer;
-    std::vector<int> xmin(num_layers);
-    std::vector<int> xmax(num_layers);
-    std::vector<int> ymin(num_layers);
-    std::vector<int> ymax(num_layers);
-    std::vector<int> xmin_edge(num_layers);
-    std::vector<int> xmax_edge(num_layers);
-    std::vector<int> ymin_edge(num_layers);
-    std::vector<int> ymax_edge(num_layers);
+    std::vector<int> xmin(num_layers, OPEN);
+    std::vector<int> xmax(num_layers, OPEN);
+    std::vector<int> ymin(num_layers, OPEN);
+    std::vector<int> ymax(num_layers, OPEN);
+    std::vector<int> xmin_edge(num_layers, OPEN);
+    std::vector<int> xmax_edge(num_layers, OPEN);
+    std::vector<int> ymin_edge(num_layers, OPEN);
+    std::vector<int> ymax_edge(num_layers, OPEN);
 
     std::vector<int> num_sink_pin_layer(num_layers, 0);
 
@@ -2540,17 +2547,33 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     /* Copy the coordinates and number on edges information into the proper   *
      * structures.                                                            */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        coords[layer].xmin = xmin[layer];
-        coords[layer].xmax = xmax[layer];
-        coords[layer].ymin = ymin[layer];
-        coords[layer].ymax = ymax[layer];
-
-        num_on_edges[layer].xmin = xmin_edge[layer];
-        num_on_edges[layer].xmax = xmax_edge[layer];
-        num_on_edges[layer].ymin = ymin_edge[layer];
-        num_on_edges[layer].ymax = ymax_edge[layer];
+        layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num];
+        if (num_sink_pin_layer[layer_num] == 0) {
+            coords[layer].xmin = OPEN;
+            coords[layer].xmax = OPEN;
+            coords[layer].ymin = OPEN;
+            coords[layer].ymax = OPEN;
+            coords[layer].layer_num = OPEN;
+
+            num_on_edges[layer].xmin = OPEN;
+            num_on_edges[layer].xmax = OPEN;
+            num_on_edges[layer].ymin = OPEN;
+            num_on_edges[layer].ymax = OPEN;
+            num_on_edges[layer].layer_num = OPEN;
+        } else {
+            coords[layer].xmin = xmin[layer];
+            coords[layer].xmax = xmax[layer];
+            coords[layer].ymin = ymin[layer];
+            coords[layer].ymax = ymax[layer];
+            coords[layer].layer_num = layer_num;
+
+            num_on_edges[layer].xmin = xmin_edge[layer];
+            num_on_edges[layer].xmax = xmax_edge[layer];
+            num_on_edges[layer].ymin = ymin_edge[layer];
+            num_on_edges[layer].ymax = ymax_edge[layer];
+            num_on_edges[layer].layer_num = layer_num;
+        }
     }
-    layer_pin_sink_count = num_sink_pin_layer;
 }
 
 static double wirelength_crossing_count(size_t fanout) {
@@ -2564,7 +2587,9 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector<t_2D_tbb>& bbptr) {
+static double get_net_wirelength_estimate(ClusterNetId net_id,
+                                          const std::vector<t_2D_tbb>& bbptr,
+                                          const std::vector<int>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
@@ -2574,8 +2599,11 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const std::vector
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        crossing = wirelength_crossing_count(
-            place_move_ctx.num_sink_pin_layer[net_id][layer_num]);
+        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
+        if (layer_pin_sink_count[layer_num] == 0){
+            continue;
+        }
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
          * connection will be made with no bends and hence no x-cost.    *
@@ -2603,6 +2631,10 @@ static double get_net_cost(ClusterNetId /* net_id */,
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
+        if (layer_pin_sink_count[layer_num] == 0){
+            continue;
+        }
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
@@ -2637,10 +2669,10 @@ static void get_non_updateable_bb(ClusterNetId net_id,
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
     num_sink_layer = std::vector<int>(num_layers, 0);
-    std::vector<int> xmax(num_layers);
-    std::vector<int> ymax(num_layers);
-    std::vector<int> xmin(num_layers);
-    std::vector<int> ymin(num_layers);
+    std::vector<int> xmin(num_layers, OPEN);
+    std::vector<int> ymin(num_layers, OPEN);
+    std::vector<int> xmax(num_layers, OPEN);
+    std::vector<int> ymax(num_layers, OPEN);
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -2693,10 +2725,19 @@ static void get_non_updateable_bb(ClusterNetId net_id,
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        if (num_sink_layer[layer_num] == 0) {
+            bb_coord_new[layer_num].xmin = OPEN;
+            bb_coord_new[layer_num].ymin = OPEN;
+            bb_coord_new[layer_num].xmax = OPEN;
+            bb_coord_new[layer_num].ymax = OPEN;
+            bb_coord_new[layer_num].layer_num = OPEN;
+        } else {
+            bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+            bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+            bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+            bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+            bb_coord_new[layer_num].layer_num = layer_num;
+        }
     }
 }
 
@@ -2757,7 +2798,7 @@ static void update_bb(ClusterNetId net_id,
                              *curr_layer_pin_sink_count,
                              bb_pin_sink_count_new);
 
-    update_bb_edges(net_id,
+    try_remove_block_from_bb_edge(net_id,
                     pin_old_loc,
                     pin_new_loc,
                     *curr_bb_edge,
@@ -2782,17 +2823,20 @@ static void update_bb(ClusterNetId net_id,
     }
 }
 
-static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
+static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const std::vector<int>& curr_layer_pin_sink_count,
                                      std::vector<int>& bb_pin_sink_count_new) {
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0);
-    bb_pin_sink_count_new[pin_old_loc.layer_num] = curr_layer_pin_sink_count[pin_old_loc.layer_num] - 1;
-    bb_pin_sink_count_new[pin_new_loc.layer_num] = curr_layer_pin_sink_count[pin_new_loc.layer_num] + 1;
+    bb_pin_sink_count_new[pin_old_loc.layer_num] = curr_layer_pin_sink_count[pin_old_loc.layer_num];
+    bb_pin_sink_count_new[pin_new_loc.layer_num] = curr_layer_pin_sink_count[pin_new_loc.layer_num];
+
+    bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
+    bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
 }
 
-static void update_bb_edges(ClusterNetId net_id,
+static void try_remove_block_from_bb_edge(ClusterNetId net_id,
                             const t_physical_tile_loc& pin_old_loc,
                             const t_physical_tile_loc& pin_new_loc,
                             const std::vector<t_2D_tbb>& curr_bb_edge,
@@ -2801,9 +2845,13 @@ static void update_bb_edges(ClusterNetId net_id,
                             std::vector<t_2D_tbb>& bb_coord_new,
                             std::vector<int>& bb_pin_sink_count_new) {
     int old_layer = pin_old_loc.layer_num;
+    int new_layer = pin_new_loc.layer_num;
 
-    if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmax) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.x < pin_old_loc.x) {
+    bb_edge_new[old_layer] = curr_bb_edge[old_layer];
+    bb_coord_new[old_layer] = curr_bb_coord[old_layer];
+
+    if (pin_old_loc.x == curr_bb_coord[old_layer].xmax) {
+        if (old_layer != new_layer || pin_new_loc.x < pin_old_loc.x) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2818,8 +2866,8 @@ static void update_bb_edges(ClusterNetId net_id,
         }
     }
 
-    if (pin_old_loc.x == curr_bb_coord[pin_old_loc.layer_num].xmin) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.x > pin_old_loc.x) {
+    if (pin_old_loc.x == curr_bb_coord[old_layer].xmin) {
+        if (old_layer != new_layer || pin_new_loc.x > pin_old_loc.x) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2834,8 +2882,8 @@ static void update_bb_edges(ClusterNetId net_id,
         }
     }
 
-    if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymax) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.y < pin_old_loc.y) {
+    if (pin_old_loc.y == curr_bb_coord[old_layer].ymax) {
+        if (old_layer != new_layer || pin_new_loc.y < pin_old_loc.y) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2850,8 +2898,8 @@ static void update_bb_edges(ClusterNetId net_id,
         }
     }
 
-    if (pin_old_loc.y == curr_bb_coord[pin_old_loc.layer_num].ymin) {
-        if (pin_old_loc.layer_num != pin_new_loc.layer_num || pin_new_loc.y > pin_old_loc.y) {
+    if (pin_old_loc.y == curr_bb_coord[old_layer].ymin) {
+        if (old_layer != new_layer || pin_new_loc.y > pin_old_loc.y) {
             remove_block_from_bb_edge(net_id,
                                       bb_edge_new,
                                       bb_coord_new,
@@ -2894,74 +2942,86 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
                             const t_physical_tile_loc& new_pin_loc,
                             t_2D_tbb& bb_edge_new,
                             t_2D_tbb& bb_coord_new) {
-    int xnew = new_pin_loc.x;
     int xold = old_pin_loc.x;
-    int ynew = new_pin_loc.y;
+    int xnew = new_pin_loc.x;
     int yold = old_pin_loc.y;
+    int ynew = new_pin_loc.y;
+    int layer_old = old_pin_loc.layer_num;
+    int layer_new = new_pin_loc.layer_num;
 
     VTR_ASSERT(bb_edge_old.layer_num == bb_edge_new.layer_num);
     VTR_ASSERT(bb_coord_old.layer_num == bb_coord_new.layer_num);
     VTR_ASSERT(bb_edge_old.layer_num == bb_coord_old.layer_num);
 
-    if (xnew < xold) {
-        if (xnew < bb_coord_old.xmin) { /* Moved past xmin */
-            bb_coord_new.xmin = xnew;
-            bb_edge_new.xmin = 1;
-        } else if (xnew == bb_coord_old.xmin) { /* Moved to xmin */
-            bb_coord_new.xmin = xnew;
-            bb_edge_new.xmin = bb_edge_old.xmin + 1;
-        } else { /* Xmin unchanged. */
+    if (xold == xnew && layer_old == layer_new) {
+        bb_edge_new.xmin = bb_edge_old.xmin;
+        bb_coord_new.xmin = bb_coord_old.xmin;
+        bb_edge_new.xmax = bb_edge_old.xmax;
+        bb_coord_new.xmax = bb_coord_old.xmax;
+    } else if (xnew > bb_coord_old.xmax) {
+        bb_edge_new.xmax = 1;
+        bb_coord_new.xmax = xnew;
+        if (layer_old != layer_new) {
+            bb_edge_new.xmin = bb_edge_old.xmin;
             bb_coord_new.xmin = bb_coord_old.xmin;
+        }
+    } else if (xnew == bb_coord_old.xmax) {
+        bb_edge_new.xmax = bb_edge_old.xmax + 1;
+        bb_coord_new.xmax = xnew;
+        if (layer_old != layer_new) {
             bb_edge_new.xmin = bb_edge_old.xmin;
+            bb_coord_new.xmin = bb_coord_old.xmin;
         }
-    } else if (xnew > xold) {
-        if (xnew > bb_coord_old.xmax) { /* Moved past xmax. */
-            bb_coord_new.xmax = xnew;
-            bb_edge_new.xmax = 1;
-        } else if (xnew == bb_coord_old.xmax) { /* Moved to xmax */
-            bb_coord_new.xmax = xnew;
-            bb_edge_new.xmax = bb_edge_old.xmax + 1;
-        } else { /* Xmax unchanged. */
+    } else if (xnew < bb_coord_old.xmin) {
+        bb_edge_new.xmin = 1;
+        bb_coord_new.xmin = xnew;
+        if (layer_old != layer_new) {
+            bb_edge_new.xmax = bb_edge_old.xmax;
             bb_coord_new.xmax = bb_coord_old.xmax;
+        }
+    } else if (xnew == bb_coord_old.xmin) {
+        bb_edge_new.xmin = bb_edge_old.xmin + 1;
+        bb_coord_new.xmin = xnew;
+        if (layer_old != layer_new) {
             bb_edge_new.xmax = bb_edge_old.xmax;
+            bb_coord_new.xmax = bb_coord_old.xmax;
         }
-    } else {
-        bb_coord_new.xmin = bb_coord_old.xmin;
-        bb_coord_new.xmax = bb_coord_old.xmax;
-        bb_edge_new.xmin = bb_edge_old.xmin;
-        bb_edge_new.xmax = bb_edge_old.xmax;
     }
 
-    if (ynew < yold) {
-        if (ynew < bb_coord_old.ymin) { /* Moved past ymin */
-            bb_coord_new.ymin = ynew;
-            bb_edge_new.ymin = 1;
-        } else if (ynew == bb_coord_old.ymin) { /* Moved to ymin */
-            bb_coord_new.ymin = ynew;
-            bb_edge_new.ymin = bb_edge_old.ymin + 1;
-        } else { /* ymin unchanged. */
+    if (yold == ynew && layer_old == layer_new) {
+        bb_edge_new.ymin = bb_edge_old.ymin;
+        bb_coord_new.ymin = bb_coord_old.ymin;
+        bb_edge_new.ymax = bb_edge_old.ymax;
+        bb_coord_new.ymax = bb_coord_old.ymax;
+    } else if (ynew > bb_coord_old.ymax) {
+        bb_edge_new.ymax = bb_edge_old.ymax + 1;
+        bb_coord_new.ymax = ynew;
+        if (layer_new != layer_old) {
+            bb_edge_new.ymin = bb_edge_old.ymin;
             bb_coord_new.ymin = bb_coord_old.ymin;
+        }
+    } else if (ynew == bb_coord_old.ymax) {
+        bb_edge_new.ymax = 1;
+        bb_coord_new.ymax = ynew;
+        if (layer_new != layer_old) {
             bb_edge_new.ymin = bb_edge_old.ymin;
+            bb_coord_new.ymin = bb_coord_old.ymin;
         }
-        /* End of move down case. */
-    } else if (ynew > yold) {
-        if (ynew > bb_coord_old.ymax) { /* Moved past ymax. */
-            bb_coord_new.ymax = ynew;
-            bb_edge_new.ymax = 1;
-        } else if (ynew == bb_coord_old.ymax) { /* Moved to ymax */
-            bb_coord_new.ymax = ynew;
-            bb_edge_new.ymax = bb_edge_old.ymax + 1;
-        } else { /* ymax unchanged. */
+
+    } else if (ynew < bb_coord_old.ymin) {
+        bb_edge_new.ymin = 1;
+        bb_coord_new.ymin = ynew;
+        if (layer_new != layer_old) {
+            bb_edge_new.ymax = bb_edge_old.ymax;
             bb_coord_new.ymax = bb_coord_old.ymax;
+        }
+    } else if (ynew == bb_coord_old.ymin) {
+        bb_edge_new.ymin = bb_edge_old.ymin + 1;
+        bb_coord_new.ymin = ynew;
+        if (layer_new != layer_old) {
             bb_edge_new.ymax = bb_edge_old.ymax;
+            bb_coord_new.ymax = bb_coord_old.ymax;
         }
-        /* End of move up case. */
-    } else {
-        /* ynew == yold -- no change. */
-        bb_coord_new.ymin = bb_coord_old.ymin;
-        bb_coord_new.ymax = bb_coord_old.ymax;
-        bb_edge_new.ymin = bb_edge_old.ymin;
-        bb_edge_new.ymax = bb_edge_old.ymax;
     }
 }
 

From 143188fbb24a7084fdd79c6ca0180b0ddbbe9f7a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 16:00:41 -0400
Subject: [PATCH 070/257] make format

---
 vpr/src/place/median_move_generator.cpp |  2 +-
 vpr/src/place/place.cpp                 | 48 ++++++++++++-------------
 vpr/src/place/placer_context.h          |  1 -
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 786f1988df1..84d0df9efc1 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -32,7 +32,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     /* Calculate the median region */
     t_pl_loc to;
 
-    std::vector<t_2D_tbb> coords (num_layers, t_2D_tbb(OPEN, OPEN, OPEN, OPEN, OPEN));
+    std::vector<t_2D_tbb> coords(num_layers, t_2D_tbb(OPEN, OPEN, OPEN, OPEN, OPEN));
     t_2D_tbb limit_coords;
     ClusterBlockId bnum;
     int pnum, xnew, xold, ynew, yold;
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 81c624e67a8..d80f569a394 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -343,13 +343,13 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      std::vector<int>& bb_pin_sink_count_new);
 
 static void try_remove_block_from_bb_edge(ClusterNetId net_id,
-                            const t_physical_tile_loc& pin_old_loc,
-                            const t_physical_tile_loc& pin_new_loc,
-                            const std::vector<t_2D_tbb>& curr_bb_edge,
-                            const std::vector<t_2D_tbb>& curr_bb_coord,
-                            std::vector<t_2D_tbb>& bb_edge_new,
-                            std::vector<t_2D_tbb>& bb_coord_new,
-                            std::vector<int>& bb_pin_sink_count_new);
+                                          const t_physical_tile_loc& pin_old_loc,
+                                          const t_physical_tile_loc& pin_new_loc,
+                                          const std::vector<t_2D_tbb>& curr_bb_edge,
+                                          const std::vector<t_2D_tbb>& curr_bb_coord,
+                                          std::vector<t_2D_tbb>& bb_edge_new,
+                                          std::vector<t_2D_tbb>& bb_coord_new,
+                                          std::vector<int>& bb_pin_sink_count_new);
 
 static void remove_block_from_bb_edge(ClusterNetId net_id,
                                       std::vector<t_2D_tbb>& bb_edge_new,
@@ -1419,7 +1419,7 @@ static void update_move_nets(int num_nets_affected) {
 
         place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
         place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
-        VTR_ASSERT(ts_layer_sink_pin_count[net_id][0] == cluster_ctx.clb_nlist.net_pins(net_id).size()-1);
+        VTR_ASSERT(ts_layer_sink_pin_count[net_id][0] == cluster_ctx.clb_nlist.net_pins(net_id).size() - 1);
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
             place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
 
@@ -2600,7 +2600,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id,
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
-        if (layer_pin_sink_count[layer_num] == 0){
+        if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
@@ -2632,7 +2632,7 @@ static double get_net_cost(ClusterNetId /* net_id */,
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
-        if (layer_pin_sink_count[layer_num] == 0){
+        if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
         crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
@@ -2799,13 +2799,13 @@ static void update_bb(ClusterNetId net_id,
                              bb_pin_sink_count_new);
 
     try_remove_block_from_bb_edge(net_id,
-                    pin_old_loc,
-                    pin_new_loc,
-                    *curr_bb_edge,
-                    *curr_bb_coord,
-                    bb_edge_new,
-                    bb_coord_new,
-                    bb_pin_sink_count_new);
+                                  pin_old_loc,
+                                  pin_new_loc,
+                                  *curr_bb_edge,
+                                  *curr_bb_coord,
+                                  bb_edge_new,
+                                  bb_coord_new,
+                                  bb_pin_sink_count_new);
 
     if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
         return;
@@ -2837,13 +2837,13 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
 }
 
 static void try_remove_block_from_bb_edge(ClusterNetId net_id,
-                            const t_physical_tile_loc& pin_old_loc,
-                            const t_physical_tile_loc& pin_new_loc,
-                            const std::vector<t_2D_tbb>& curr_bb_edge,
-                            const std::vector<t_2D_tbb>& curr_bb_coord,
-                            std::vector<t_2D_tbb>& bb_edge_new,
-                            std::vector<t_2D_tbb>& bb_coord_new,
-                            std::vector<int>& bb_pin_sink_count_new) {
+                                          const t_physical_tile_loc& pin_old_loc,
+                                          const t_physical_tile_loc& pin_new_loc,
+                                          const std::vector<t_2D_tbb>& curr_bb_edge,
+                                          const std::vector<t_2D_tbb>& curr_bb_coord,
+                                          std::vector<t_2D_tbb>& bb_edge_new,
+                                          std::vector<t_2D_tbb>& bb_coord_new,
+                                          std::vector<int>& bb_pin_sink_count_new) {
     int old_layer = pin_old_loc.layer_num;
     int new_layer = pin_new_loc.layer_num;
 
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 74eaa1647a4..8cb80942fe7 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -91,7 +91,6 @@ struct PlacerRuntimeContext : public Context {
  */
 struct PlacerMoveContext : public Context {
   public:
-
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_num_on_edges;
 

From e96e8306b85af55697c514a86f695db306bf4829 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 16:05:53 -0400
Subject: [PATCH 071/257] comment unused var - remove redundant assertions

---
 vpr/src/place/place.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d80f569a394..c05b8760aad 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1419,7 +1419,6 @@ static void update_move_nets(int num_nets_affected) {
 
         place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
         place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
-        VTR_ASSERT(ts_layer_sink_pin_count[net_id][0] == cluster_ctx.clb_nlist.net_pins(net_id).size() - 1);
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
             place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
 
@@ -2587,7 +2586,7 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_wirelength_estimate(ClusterNetId net_id,
+static double get_net_wirelength_estimate(ClusterNetId /* net_id */,
                                           const std::vector<t_2D_tbb>& bbptr,
                                           const std::vector<int>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
@@ -2595,7 +2594,6 @@ static double get_net_wirelength_estimate(ClusterNetId net_id,
 
     double ncost = 0.;
     double crossing = 0.;
-    const auto& place_move_ctx = g_placer_ctx.move();
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {

From 79af555d808095c97bf20ab10dab75ffe24cb7dc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 17:52:18 -0400
Subject: [PATCH 072/257] remove choosing free layer from centroid move

---
 vpr/src/place/centroid_move_generator.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index a8eae29ba63..e9225bab091 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,10 +39,6 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
         return e_create_move::ABORT;
     }
 
-    int new_layer = find_free_layer(cluster_from_type, to);
-    VTR_ASSERT(new_layer != OPEN);
-    to.layer = new_layer;
-
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 2b37a245ee01a8e7ddb6737a98864123270c281d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 17:52:41 -0400
Subject: [PATCH 073/257] remove choosing free layer fro median move

---
 vpr/src/place/median_move_generator.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 84d0df9efc1..feea577a0ea 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -130,10 +130,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
         return e_create_move::ABORT;
     }
 
-    int new_layer = find_free_layer(cluster_from_type, to);
-    VTR_ASSERT(new_layer != OPEN);
-    to.layer = new_layer;
-
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 3d465c5ab608bf0f1da5d142013f10720cf393b1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 17:53:20 -0400
Subject: [PATCH 074/257] remove choosing free layer from uniform move

---
 vpr/src/place/uniform_move_generator.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index 21d8980fdea..0e935b794dc 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -39,10 +39,6 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_LOG("\n");
 #endif
 
-    int new_layer = find_free_layer(cluster_from_type, to);
-    VTR_ASSERT(new_layer != OPEN);
-    to.layer = new_layer;
-
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From f77d024fe8e6a704e51c75a1404231d405846950 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 17:55:10 -0400
Subject: [PATCH 075/257] add get_random_layer function to move utils +
 find_to_loc_uniform also choose a random layer

---
 vpr/src/place/move_utils.cpp | 15 +++++++++++++--
 vpr/src/place/move_utils.h   |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 4501d540915..ac388fcc8ef 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -713,8 +713,8 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    const int to_layer_num = to.layer;
-    VTR_ASSERT(to.layer != OPEN);
+    const int to_layer_num = get_random_layer(type);
+    VTR_ASSERT(to_layer_num != OPEN);
 
     //Determine the coordinates in the compressed grid space of the current block
     std::vector<t_physical_tile_loc> compressed_locs = get_compressed_loc(compressed_block_grid,
@@ -1256,6 +1256,17 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
     return free_layer;
 }
 
+int get_random_layer(t_logical_block_type_ptr logical_block) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& place_ctx = g_vpr_ctx.placement();
+
+    const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();
+    VTR_ASSERT(!compatible_layers.empty());
+    int layer_num = compatible_layers[vtr::irand(compatible_layers.size()-1)];
+
+    return layer_num;
+}
+
 t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
     t_2D_tbb merged_bb(std::numeric_limits<int>::min(),
                        std::numeric_limits<int>::max(),
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 79a5bc4fca2..46e451179cd 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -349,6 +349,8 @@ int get_num_agent_types();
 
 int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc);
 
+int get_random_layer(t_logical_block_type_ptr logical_block);
+
 t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
 
 #endif

From 26f595ab82f605bca8fa8ac46c553f0455bd094c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 17:56:32 -0400
Subject: [PATCH 076/257] make format

---
 vpr/src/place/move_utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index ac388fcc8ef..1b80e12c6db 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1262,7 +1262,7 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
 
     const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();
     VTR_ASSERT(!compatible_layers.empty());
-    int layer_num = compatible_layers[vtr::irand(compatible_layers.size()-1)];
+    int layer_num = compatible_layers[vtr::irand(compatible_layers.size() - 1)];
 
     return layer_num;
 }

From 3582150581364676b16a031d4238090e66928f37 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 10 Aug 2023 18:37:30 -0400
Subject: [PATCH 077/257] remove unused vars

---
 vpr/src/place/move_utils.cpp | 5 +----
 vpr/src/place/place.cpp      | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 1b80e12c6db..218a566a3b0 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1257,10 +1257,7 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 }
 
 int get_random_layer(t_logical_block_type_ptr logical_block) {
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& place_ctx = g_vpr_ctx.placement();
-
-    const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums();
+    const auto& compatible_layers = g_vpr_ctx.placement().compressed_block_grids[logical_block->index].get_layer_nums();
     VTR_ASSERT(!compatible_layers.empty());
     int layer_num = compatible_layers[vtr::irand(compatible_layers.size() - 1)];
 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index c05b8760aad..0d5554a59f1 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2821,7 +2821,7 @@ static void update_bb(ClusterNetId net_id,
     }
 }
 
-static void update_bb_pin_sink_count(ClusterNetId net_id,
+static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const std::vector<int>& curr_layer_pin_sink_count,

From 3c1098a431a22396bb2c6398006b59abfb72149c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 11 Aug 2023 18:19:40 -0400
Subject: [PATCH 078/257] fix the bug with layer_num

---
 vpr/src/place/place.cpp | 77 ++++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 0d5554a59f1..9efc944aba9 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2457,7 +2457,6 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     num_on_edges.resize(num_layers, t_2D_tbb());
     coords.resize(num_layers, t_2D_tbb());
     layer_pin_sink_count.resize(num_layers, 0);
-    int pnum, x, y, layer;
     std::vector<int> xmin(num_layers, OPEN);
     std::vector<int> xmax(num_layers, OPEN);
     std::vector<int> ymin(num_layers, OPEN);
@@ -2474,21 +2473,21 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     auto& grid = device_ctx.grid;
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum >= 0);
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
+    int pnum_src = net_pin_to_tile_pin_index(net_id, 0);
+    VTR_ASSERT(pnum_src >= 0);
+    int x_src = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum_src];
+    int y_src = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum_src];
 
-    x = max(min<int>(x, grid.width() - 2), 1);
-    y = max(min<int>(y, grid.height() - 2), 1);
+    x_src = max(min<int>(x_src, grid.width() - 2), 1);
+    y_src = max(min<int>(y_src, grid.height() - 2), 1);
 
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        xmin[layer_num] = x;
-        ymin[layer_num] = y;
-        xmax[layer_num] = x;
-        ymax[layer_num] = y;
+        xmin[layer_num] = x_src;
+        ymin[layer_num] = y_src;
+        xmax[layer_num] = x_src;
+        ymax[layer_num] = y_src;
         xmin_edge[layer_num] = 1;
         ymin_edge[layer_num] = 1;
         xmax_edge[layer_num] = 1;
@@ -2497,13 +2496,13 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        layer = place_ctx.block_locs[bnum].loc.layer;
+        int pnum = tile_pin_index(pin_id);
+        int layer = place_ctx.block_locs[bnum].loc.layer;
         VTR_ASSERT(layer >= 0 && layer < num_layers);
         num_sink_pin_layer[layer]++;
-        x = place_ctx.block_locs[bnum].loc.x
+        int x = place_ctx.block_locs[bnum].loc.x
             + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
+        int y = place_ctx.block_locs[bnum].loc.y
             + physical_tile_type(bnum)->pin_height_offset[pnum];
 
         /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
@@ -2548,29 +2547,29 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num];
         if (num_sink_pin_layer[layer_num] == 0) {
-            coords[layer].xmin = OPEN;
-            coords[layer].xmax = OPEN;
-            coords[layer].ymin = OPEN;
-            coords[layer].ymax = OPEN;
-            coords[layer].layer_num = OPEN;
-
-            num_on_edges[layer].xmin = OPEN;
-            num_on_edges[layer].xmax = OPEN;
-            num_on_edges[layer].ymin = OPEN;
-            num_on_edges[layer].ymax = OPEN;
-            num_on_edges[layer].layer_num = OPEN;
+            coords[layer_num].xmin = OPEN;
+            coords[layer_num].xmax = OPEN;
+            coords[layer_num].ymin = OPEN;
+            coords[layer_num].ymax = OPEN;
+            coords[layer_num].layer_num = OPEN;
+
+            num_on_edges[layer_num].xmin = OPEN;
+            num_on_edges[layer_num].xmax = OPEN;
+            num_on_edges[layer_num].ymin = OPEN;
+            num_on_edges[layer_num].ymax = OPEN;
+            num_on_edges[layer_num].layer_num = OPEN;
         } else {
-            coords[layer].xmin = xmin[layer];
-            coords[layer].xmax = xmax[layer];
-            coords[layer].ymin = ymin[layer];
-            coords[layer].ymax = ymax[layer];
-            coords[layer].layer_num = layer_num;
-
-            num_on_edges[layer].xmin = xmin_edge[layer];
-            num_on_edges[layer].xmax = xmax_edge[layer];
-            num_on_edges[layer].ymin = ymin_edge[layer];
-            num_on_edges[layer].ymax = ymax_edge[layer];
-            num_on_edges[layer].layer_num = layer_num;
+            coords[layer_num].xmin = xmin[layer_num];
+            coords[layer_num].xmax = xmax[layer_num];
+            coords[layer_num].ymin = ymin[layer_num];
+            coords[layer_num].ymax = ymax[layer_num];
+            coords[layer_num].layer_num = layer_num;
+
+            num_on_edges[layer_num].xmin = xmin_edge[layer_num];
+            num_on_edges[layer_num].xmax = xmax_edge[layer_num];
+            num_on_edges[layer_num].ymin = ymin_edge[layer_num];
+            num_on_edges[layer_num].ymax = ymax_edge[layer_num];
+            num_on_edges[layer_num].layer_num = layer_num;
         }
     }
 }

From eec5e6217e126ab34794ad0113f8aaf6a50b5d13 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 11 Aug 2023 18:28:05 -0400
Subject: [PATCH 079/257] assign pin_sink_count of all layer to new array to
 avoid getting non-initialized element

---
 vpr/src/place/place.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9efc944aba9..a1dfb8a87db 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2826,8 +2826,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const std::vector<int>& curr_layer_pin_sink_count,
                                      std::vector<int>& bb_pin_sink_count_new) {
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0);
-    bb_pin_sink_count_new[pin_old_loc.layer_num] = curr_layer_pin_sink_count[pin_old_loc.layer_num];
-    bb_pin_sink_count_new[pin_new_loc.layer_num] = curr_layer_pin_sink_count[pin_new_loc.layer_num];
+    bb_pin_sink_count_new = curr_layer_pin_sink_count;
 
     bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
     bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;

From 145ce3168aba02d60d0570408af4800588af8510 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 11 Aug 2023 18:38:38 -0400
Subject: [PATCH 080/257] initialize bb for all layers

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a1dfb8a87db..020ee032087 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2843,8 +2843,8 @@ static void try_remove_block_from_bb_edge(ClusterNetId net_id,
     int old_layer = pin_old_loc.layer_num;
     int new_layer = pin_new_loc.layer_num;
 
-    bb_edge_new[old_layer] = curr_bb_edge[old_layer];
-    bb_coord_new[old_layer] = curr_bb_coord[old_layer];
+    bb_edge_new = curr_bb_edge;
+    bb_coord_new = curr_bb_coord;
 
     if (pin_old_loc.x == curr_bb_coord[old_layer].xmax) {
         if (old_layer != new_layer || pin_new_loc.x < pin_old_loc.x) {

From fcec33205038e7e448bcb8268f533ba425f1fe56 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 12 Aug 2023 10:19:09 -0400
Subject: [PATCH 081/257] initialze the bb for each layer around the source
 block

---
 vpr/src/place/place.cpp | 54 ++++++++++++-----------------------------
 1 file changed, 16 insertions(+), 38 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 020ee032087..efe24cc5d3f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2546,31 +2546,17 @@ static void get_bb_from_scratch(ClusterNetId net_id,
      * structures.                                                            */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num];
-        if (num_sink_pin_layer[layer_num] == 0) {
-            coords[layer_num].xmin = OPEN;
-            coords[layer_num].xmax = OPEN;
-            coords[layer_num].ymin = OPEN;
-            coords[layer_num].ymax = OPEN;
-            coords[layer_num].layer_num = OPEN;
-
-            num_on_edges[layer_num].xmin = OPEN;
-            num_on_edges[layer_num].xmax = OPEN;
-            num_on_edges[layer_num].ymin = OPEN;
-            num_on_edges[layer_num].ymax = OPEN;
-            num_on_edges[layer_num].layer_num = OPEN;
-        } else {
-            coords[layer_num].xmin = xmin[layer_num];
-            coords[layer_num].xmax = xmax[layer_num];
-            coords[layer_num].ymin = ymin[layer_num];
-            coords[layer_num].ymax = ymax[layer_num];
-            coords[layer_num].layer_num = layer_num;
-
-            num_on_edges[layer_num].xmin = xmin_edge[layer_num];
-            num_on_edges[layer_num].xmax = xmax_edge[layer_num];
-            num_on_edges[layer_num].ymin = ymin_edge[layer_num];
-            num_on_edges[layer_num].ymax = ymax_edge[layer_num];
-            num_on_edges[layer_num].layer_num = layer_num;
-        }
+        coords[layer_num].xmin = xmin[layer_num];
+        coords[layer_num].xmax = xmax[layer_num];
+        coords[layer_num].ymin = ymin[layer_num];
+        coords[layer_num].ymax = ymax[layer_num];
+        coords[layer_num].layer_num = layer_num;
+
+        num_on_edges[layer_num].xmin = xmin_edge[layer_num];
+        num_on_edges[layer_num].xmax = xmax_edge[layer_num];
+        num_on_edges[layer_num].ymin = ymin_edge[layer_num];
+        num_on_edges[layer_num].ymax = ymax_edge[layer_num];
+        num_on_edges[layer_num].layer_num = layer_num;
     }
 }
 
@@ -2722,19 +2708,11 @@ static void get_non_updateable_bb(ClusterNetId net_id,
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        if (num_sink_layer[layer_num] == 0) {
-            bb_coord_new[layer_num].xmin = OPEN;
-            bb_coord_new[layer_num].ymin = OPEN;
-            bb_coord_new[layer_num].xmax = OPEN;
-            bb_coord_new[layer_num].ymax = OPEN;
-            bb_coord_new[layer_num].layer_num = OPEN;
-        } else {
-            bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-            bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-            bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-            bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-            bb_coord_new[layer_num].layer_num = layer_num;
-        }
+        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].layer_num = layer_num;
     }
 }
 

From 4174061ad248bc8373733c580c986c7a260fe84f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 12 Aug 2023 10:37:54 -0400
Subject: [PATCH 082/257] update ts_layer_sink_pin_count only if the moving pin
 is of type receiver

---
 vpr/src/place/place.cpp | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index efe24cc5d3f..c546358a916 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -334,13 +334,15 @@ static void update_bb(ClusterNetId net_id,
                       std::vector<t_2D_tbb>& bb_coord_new,
                       std::vector<int>& bb_pin_sink_count_new,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc);
+                      t_physical_tile_loc pin_new_loc,
+                      bool is_output_pin);
 
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const std::vector<int>& curr_layer_pin_sink_count,
-                                     std::vector<int>& bb_pin_sink_count_new);
+                                     std::vector<int>& bb_pin_sink_count_new,
+                                     bool is_output_pin);
 
 static void try_remove_block_from_bb_edge(ClusterNetId net_id,
                                           const t_physical_tile_loc& pin_old_loc,
@@ -1901,12 +1903,14 @@ static void update_net_bb(const ClusterNetId net,
             blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
             blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
             blocks_affected.moved_blocks[iblk].new_loc.layer);
+        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
         update_bb(net,
                   ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
                   ts_layer_sink_pin_count[net],
                   pin_old_loc,
-                  pin_new_loc);
+                  pin_new_loc,
+                  pin_dir == e_pin_type::DRIVER);
     }
 }
 
@@ -2721,7 +2725,8 @@ static void update_bb(ClusterNetId net_id,
                       std::vector<t_2D_tbb>& bb_coord_new,
                       std::vector<int>& bb_pin_sink_count_new,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc) {
+                      t_physical_tile_loc pin_new_loc,
+                      bool is_output_pin) {
     /* Updates the bounding box of a net by storing its coordinates in    *
      * the bb_coord_new data structure and the number of blocks on each   *
      * edge in the bb_edge_new data structure.  This routine should only  *
@@ -2771,7 +2776,8 @@ static void update_bb(ClusterNetId net_id,
                              pin_old_loc,
                              pin_new_loc,
                              *curr_layer_pin_sink_count,
-                             bb_pin_sink_count_new);
+                             bb_pin_sink_count_new,
+                             is_output_pin);
 
     try_remove_block_from_bb_edge(net_id,
                                   pin_old_loc,
@@ -2802,12 +2808,15 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
                                      const std::vector<int>& curr_layer_pin_sink_count,
-                                     std::vector<int>& bb_pin_sink_count_new) {
-    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0);
+                                     std::vector<int>& bb_pin_sink_count_new,
+                                     bool is_output_pin) {
+    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 ||
+               is_output_pin == 1);
     bb_pin_sink_count_new = curr_layer_pin_sink_count;
-
-    bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
-    bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
+    if (!is_output_pin) {
+        bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
+        bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
+    }
 }
 
 static void try_remove_block_from_bb_edge(ClusterNetId net_id,

From 8626f3f583033f2bc1a70d4979c67beab8268bc8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 12 Sep 2023 15:25:27 -0400
Subject: [PATCH 083/257] fix a few formatting issues

---
 vpr/src/place/place.cpp             | 11 +++++------
 vpr/src/route/connection_router.cpp |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 857d2113487..4284d117309 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2486,9 +2486,9 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     int pnum_src = net_pin_to_tile_pin_index(net_id, 0);
     VTR_ASSERT(pnum_src >= 0);
     int x_src = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum_src];
+                + physical_tile_type(bnum)->pin_width_offset[pnum_src];
     int y_src = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum_src];
+                + physical_tile_type(bnum)->pin_height_offset[pnum_src];
 
     x_src = max(min<int>(x_src, grid.width() - 2), 1);
     y_src = max(min<int>(y_src, grid.height() - 2), 1);
@@ -2511,9 +2511,9 @@ static void get_bb_from_scratch(ClusterNetId net_id,
         VTR_ASSERT(layer >= 0 && layer < num_layers);
         num_sink_pin_layer[layer]++;
         int x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
         int y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
 
         /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
          * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
@@ -2816,8 +2816,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const std::vector<int>& curr_layer_pin_sink_count,
                                      std::vector<int>& bb_pin_sink_count_new,
                                      bool is_output_pin) {
-    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 ||
-               is_output_pin == 1);
+    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
     bb_pin_sink_count_new = curr_layer_pin_sink_count;
     if (!is_output_pin) {
         bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index bf9a06b406d..ff96b23eb82 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -594,8 +594,8 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
                        " (to node location %d,%d,%d x %d,%d,%d outside of expanded"
                        " net bounding box %d,%d,%d x %d,%d,%d)\n",
                        from_node, size_t(from_edge), size_t(to_node),
-                       to_xlow, to_ylow,to_layer,
-                       to_xhigh, to_yhigh,to_layer,
+                       to_xlow, to_ylow, to_layer,
+                       to_xhigh, to_yhigh, to_layer,
                        bounding_box.xmin, bounding_box.ymin, bounding_box.layer_min,
                        bounding_box.xmax, bounding_box.ymax, bounding_box.layer_max);
         return; /* Node is outside (expanded) bounding box. */

From 8f370cba50b967dc283c882d4b68ddc8706c4cfc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 12 Sep 2023 17:14:12 -0400
Subject: [PATCH 084/257] add to vpr command line argument:
 write_initial_place_file

---
 vpr/src/base/read_options.cpp | 5 +++++
 vpr/src/base/read_options.h   | 1 +
 2 files changed, 6 insertions(+)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 78d79f10d5e..e1b94e30f63 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1569,6 +1569,11 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .metavar("RR_GRAPH_FILE")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    file_grp.add_argument(args.write_initial_place_file, "--write_initial_place_file")
+        .help("Writes out the initial placement of blocks to the specified file")
+        .metavar("INITIAL_PLACE_FILE")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     file_grp.add_argument(args.read_vpr_constraints_file, "--read_vpr_constraints")
         .help("Reads the floorplanning constraints that packing and placement must respect from the specified XML file.")
         .show_in(argparse::ShowIn::HELP_ONLY);
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 90f82ac07fb..e8bb28f7e43 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -27,6 +27,7 @@ struct t_options {
     argparse::ArgValue<std::string> constraints_file;
     argparse::ArgValue<std::string> write_rr_graph_file;
     argparse::ArgValue<std::string> read_rr_graph_file;
+    argparse::ArgValue<std::string> write_initial_place_file;
     argparse::ArgValue<std::string> read_vpr_constraints_file;
     argparse::ArgValue<std::string> write_vpr_constraints_file;
 

From e8ed84909b2eb4195b1f5a9a106f965638610eca Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 12 Sep 2023 18:55:38 -0400
Subject: [PATCH 085/257] fix a bug with uniform inter layer

---
 vpr/src/place/uniform_inter_layer_move_generator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
index c10edc1055f..e7b41f32acc 100644
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ b/vpr/src/place/uniform_inter_layer_move_generator.cpp
@@ -3,11 +3,11 @@
 #include "place_constraints.h"
 #include "move_utils.h"
 
-e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float /*rlim*/, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) {
+e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float /*rlim*/, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) {
     // If this moved is called, we know that there are at least two layers.
     VTR_ASSERT(g_vpr_ctx.device().grid.get_num_layers() > 1);
     //Find a movable block based on blk_type
-    ClusterBlockId b_from = propose_block_to_move(proposed_action.logical_blk_type_index, false, nullptr, nullptr);
+    ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, false, nullptr, nullptr);
 
     if (!b_from) { //No movable block found
         return e_create_move::ABORT;

From d631206539f75f7611d7367a2cfbfaa8fc41e2d3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 09:22:44 -0400
Subject: [PATCH 086/257] increament the number passed to wirelength crossing
 by 1 since it should also include the soruce pin

---
 vpr/src/place/place.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d5132ddcf9c..aded9130c23 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2616,7 +2616,7 @@ static double get_net_wirelength_estimate(ClusterNetId /* net_id */,
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]+1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
          * connection will be made with no bends and hence no x-cost.    *

From 3c3842ddd8719ff7abcb8eda5b18799c7aaade40 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 09:24:21 -0400
Subject: [PATCH 087/257] consider source pin in the number passed to
 wirelength_crossing_count

---
 vpr/src/place/place.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index aded9130c23..de5eb7b1d25 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2648,7 +2648,7 @@ static double get_net_cost(ClusterNetId /* net_id */,
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]);
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]+1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
          * connection will be made with no bends and hence no x-cost.    *

From 1d26ac090e0c9fb7f49334377ac2b6e1dca7b5fa Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 10:33:50 -0400
Subject: [PATCH 088/257] print search range in debug info

---
 vpr/src/place/move_utils.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 3113de2dfb7..2fdcf70477f 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -794,7 +794,10 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     VTR_ASSERT_MSG(grid.get_width_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location");
     VTR_ASSERT_MSG(grid.get_height_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location");
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tA legal position at %d,%d,%d is found\n", to.x, to.y, to.layer);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n",
+                   search_range.xmin, search_range.ymin, search_range.layer_min,
+                   search_range.xmax, search_range.ymax, search_range.layer_max
+                   to.x, to.y, to.layer);
     return true;
 }
 
@@ -887,7 +890,10 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
     VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
     VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tA legal position at %d,%d,%d is found\n", to_loc.x, to_loc.y, to_loc.layer);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n",
+                   search_range.xmin, search_range.ymin, search_range.layer_min,
+                   search_range.xmax, search_range.ymax, search_range.layer_max,
+                   to_loc.x, to_loc.y, to_loc.layer);
     return true;
 }
 
@@ -970,7 +976,10 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
     VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
     VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tA legal position at %d,%d,%d is found\n", to_loc.x, to_loc.y, to_loc.layer);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n",
+                   search_range.xmin, search_range.ymin, search_range.layer_min,
+                   search_range.xmax, search_range.ymax, search_range.layer_max,
+                   to_loc.x, to_loc.y, to_loc.layer);
     return true;
 }
 

From a8ed7e1878f377fc38a023a478ee71b4c0850ee6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 11:36:42 -0400
Subject: [PATCH 089/257] choose a random layer if number of layers is more
 than one

---
 vpr/src/place/move_utils.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 2fdcf70477f..877654835cd 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -796,7 +796,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
 
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n",
                    search_range.xmin, search_range.ymin, search_range.layer_min,
-                   search_range.xmax, search_range.ymax, search_range.layer_max
+                   search_range.xmax, search_range.ymax, search_range.layer_max,
                    to.x, to.y, to.layer);
     return true;
 }
@@ -1302,7 +1302,12 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 int get_random_layer(t_logical_block_type_ptr logical_block) {
     const auto& compatible_layers = g_vpr_ctx.placement().compressed_block_grids[logical_block->index].get_layer_nums();
     VTR_ASSERT(!compatible_layers.empty());
-    int layer_num = compatible_layers[vtr::irand(compatible_layers.size() - 1)];
+    int layer_num = OPEN;
+    if (compatible_layers.size() == 1) {
+        layer_num = compatible_layers[0];
+    } else {
+        layer_num = compatible_layers[vtr::irand(compatible_layers.size() - 1)];
+    }
 
     return layer_num;
 }

From 900109eeff6ab1cbc464404b442482b19fa8f66c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 19:37:34 -0400
Subject: [PATCH 090/257] assign layer num to search range

---
 vpr/src/place/move_utils.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 877654835cd..f0c81d1cb8b 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1162,6 +1162,9 @@ t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& comp
     search_ranges.ymin = std::max(0, compressed_loc.y - rlim_y_max_range);
     search_ranges.ymax = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, compressed_loc.y + rlim_y_max_range);
 
+    search_ranges.layer_min = compressed_loc.layer_num;
+    search_ranges.layer_max = compressed_loc.layer_num;
+
     return search_ranges;
 }
 

From d619202f2d23fdb647106374c459eb41f1505953 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 13 Sep 2023 19:38:02 -0400
Subject: [PATCH 091/257] solve the bug in union 2d tbb

---
 vpr/src/place/move_utils.cpp | 21 +++++++++++----------
 vpr/src/place/move_utils.h   |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index f0c81d1cb8b..40fc5079d0d 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1315,27 +1315,28 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
     return layer_num;
 }
 
-t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
-    t_2D_tbb merged_bb(std::numeric_limits<int>::min(),
-                       std::numeric_limits<int>::max(),
-                       std::numeric_limits<int>::min(),
-                       std::numeric_limits<int>::max(),
-                       -1);
+t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
+    t_bb merged_bb(OPEN,
+                   OPEN,
+                   OPEN,
+                   OPEN,
+                   0,
+                   tbb_vec.size()-1);
 
     for (const auto& bb : tbb_vec) {
         if (bb.xmin == OPEN) {
             continue;
         }
-        if (bb.xmin < merged_bb.xmin) {
+        if (merged_bb.xmin == OPEN || bb.xmin < merged_bb.xmin) {
             merged_bb.xmin = bb.xmin;
         }
-        if (bb.xmax > merged_bb.xmax) {
+        if (merged_bb.xmax == OPEN || bb.xmax > merged_bb.xmax) {
             merged_bb.xmax = bb.xmax;
         }
-        if (bb.ymin < merged_bb.ymin) {
+        if (merged_bb.ymin == OPEN || bb.ymin < merged_bb.ymin) {
             merged_bb.ymin = bb.ymin;
         }
-        if (bb.ymax > merged_bb.ymax) {
+        if (merged_bb.ymax == OPEN || bb.ymax > merged_bb.ymax) {
             merged_bb.ymax = bb.ymax;
         }
     }
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 920d61b8f42..a22db910be2 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -331,7 +331,7 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 
 int get_random_layer(t_logical_block_type_ptr logical_block);
 
-t_2D_tbb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
+t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
 
 void enable_placer_debug(const t_placer_opts& placer_opts,
                          int blk_id_num,

From b661f37441fc5762959004921ba8c29b26249822 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 20 Sep 2023 12:49:56 -0400
Subject: [PATCH 092/257] fix node string layer num

---
 libs/librrgraph/src/base/rr_graph_view.h | 43 ++++++++++++------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index 71102b193af..f1a9efcaa13 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -234,7 +234,7 @@ class RRGraphView {
     }
 
     /** @brief Get string of information about routing resource node. The string will contain the following information.
-     * type, side, x_low, x_high, y_low, y_high, length, direction, segment_name
+     * type, side, x_low, x_high, y_low, y_high, length, direction, segment_name, layer num
      * This function is inlined for runtime optimization.
      */
     inline const std::string node_coordinate_to_string(RRNodeId node) const {
@@ -258,14 +258,14 @@ class RRGraphView {
             coordinate_string += ")"; //add the side of the routing resource node
             // For OPINs and IPINs the starting and ending coordinate are identical, so we can just arbitrarily assign the start to larger values
             // and the end to the lower coordinate
-            start_layer_num = " (" + std::to_string(node_layer(node)) + ",";
-            start_x =  std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
-            start_y = std::to_string(node_yhigh(node)) + ")";
+            start_x =  " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
+            start_y = std::to_string(node_yhigh(node)) + ",";
+            start_layer_num = std::to_string(node_layer(node)) + ")";
         } else if (node_type(node) == SOURCE || node_type(node) == SINK) {
             // For SOURCE and SINK the starting and ending coordinate are identical, so just use start
-            start_layer_num = " (" + std::to_string(node_layer(node)) + ",";
-            start_x = std::to_string(node_xhigh(node)) + ",";
-            start_y = std::to_string(node_yhigh(node)) + ")";
+            start_x = " (" + std::to_string(node_xhigh(node)) + ",";
+            start_y = std::to_string(node_yhigh(node)) + ",";
+            start_layer_num = std::to_string(node_layer(node)) + ")";
         } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information
             RRIndexedDataId cost_index = node_cost_index(node);
             int seg_index = rr_indexed_data_[cost_index].seg_index;
@@ -276,30 +276,31 @@ class RRGraphView {
             arrow = "->"; //we will point the coordinates from start to finish, left to right
 
             if (node_direction(node) == Direction::DEC) {                //signal travels along decreasing direction
-                start_layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
-                start_x = std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
-                start_y = std::to_string(node_yhigh(node)) + ")";
-                end_layer_num = " (" + std::to_string(node_layer(node)) + ",";
-                end_x = std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
-                end_y = std::to_string(node_ylow(node)) + ")";
+
+                start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
+                start_y = std::to_string(node_yhigh(node)) + ",";
+                start_layer_num = std::to_string(node_layer(node)) + ")"; //layer number
+                end_x = " (" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
+                end_y = std::to_string(node_ylow(node)) + ",";
+                end_layer_num = std::to_string(node_layer(node)) + ")";
             }
 
             else {                                                      // signal travels in increasing direction, stays at same point, or can travel both directions
-                start_layer_num = " (" + std::to_string(node_layer(node)) + ","; //layer number
-                start_x = std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
-                start_y = std::to_string(node_ylow(node)) + ")";
-                end_layer_num = " (" + std::to_string(node_layer(node)) + ",";
-                end_x = std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
-                end_y = std::to_string(node_yhigh(node)) + ")";
+                start_x = " (" + std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
+                start_y = std::to_string(node_ylow(node)) + ",";
+                start_layer_num = std::to_string(node_layer(node)) + ")"; //layer number
+                end_x = " (" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
+                end_y = std::to_string(node_yhigh(node)) + ",";
+                end_layer_num = std::to_string(node_layer(node)) + ")";
                 if (node_direction(node) == Direction::BIDIR) {
                     arrow = "<->"; //indicate that signal can travel both direction
                 }
             }
         }
 
-        coordinate_string += start_layer_num + start_x + start_y; //Write the starting coordinates
+        coordinate_string +=  start_x + start_y + start_layer_num; //Write the starting coordinates
         coordinate_string += arrow;             //Indicate the direction
-        coordinate_string += end_layer_num + end_x + end_y;     //Write the end coordinates
+        coordinate_string += end_x + end_y + end_layer_num;     //Write the end coordinates
         return coordinate_string;
     }
 

From 48380fda8d3bab77a20530335169c944b10fe0f3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 20 Sep 2023 12:53:08 -0400
Subject: [PATCH 093/257] comment on tbb data structures

---
 vpr/src/base/vpr_types.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 19485cdfb47..212988d752f 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -572,7 +572,7 @@ struct t_net_power {
 /**
  * @brief Stores the bounding box of a net in terms of the minimum and
  *        maximum coordinates of the blocks forming the net, clipped to
- *        the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1)
+ *        the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1, 0..device_ctx.grid.num_layers()-1)
  */
 struct t_bb {
     t_bb() = default;
@@ -595,6 +595,10 @@ struct t_bb {
     int layer_max = OPEN;
 };
 
+/**
+ * @brief Stores a 2D bounding box.
+ * @note layer_num indicates the layer of the bounding box
+ */
 struct t_2D_tbb {
     t_2D_tbb() = default;
     t_2D_tbb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_num_)

From bcdae22e1595c05bfe9ff80b8e650868fdd85b66 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 20 Sep 2023 12:59:28 -0400
Subject: [PATCH 094/257] modify layer in t_pl_offset

---
 vpr/src/base/vpr_types.h          | 21 +++++++++++----------
 vpr/test/test_vpr_constraints.cpp |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 212988d752f..affa059561d 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -626,30 +626,30 @@ struct t_2D_tbb {
  */
 struct t_pl_offset {
     t_pl_offset() = default;
-    t_pl_offset(int layer_offset, int xoffset, int yoffset, int sub_tile_offset)
-        : layer(layer_offset)
-        , x(xoffset)
+    t_pl_offset(int xoffset, int yoffset, int sub_tile_offset, int layer_offset)
+        :x(xoffset)
         , y(yoffset)
-        , sub_tile(sub_tile_offset) {}
+        , sub_tile(sub_tile_offset)
+        , layer(layer_offset) {}
 
-    int layer = 0;
     int x = 0;
     int y = 0;
     int sub_tile = 0;
+    int layer = 0;
 
     t_pl_offset& operator+=(const t_pl_offset& rhs) {
-        layer += rhs.layer;
         x += rhs.x;
         y += rhs.y;
         sub_tile += rhs.sub_tile;
+        layer += rhs.layer;
         return *this;
     }
 
     t_pl_offset& operator-=(const t_pl_offset& rhs) {
-        layer -= rhs.layer;
         x -= rhs.x;
         y -= rhs.y;
         sub_tile -= rhs.sub_tile;
+        layer -= rhs.layer;
         return *this;
     }
 
@@ -664,10 +664,10 @@ struct t_pl_offset {
     }
 
     friend t_pl_offset operator-(const t_pl_offset& other) {
-        return t_pl_offset(-other.layer, -other.x, -other.y, -other.sub_tile);
+        return t_pl_offset(-other.x, -other.y, -other.sub_tile, -other.layer);
     }
     friend t_pl_offset operator+(const t_pl_offset& other) {
-        return t_pl_offset(+other.layer, +other.x, +other.y, +other.sub_tile);
+        return t_pl_offset(+other.x, +other.y, +other.sub_tile, +other.layer);
     }
 
     friend bool operator<(const t_pl_offset& lhs, const t_pl_offset& rhs) {
@@ -676,7 +676,7 @@ struct t_pl_offset {
     }
 
     friend bool operator==(const t_pl_offset& lhs, const t_pl_offset& rhs) {
-        return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile);
+        return std::tie(lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.x, rhs.y, rhs.sub_tile, rhs.layer);
     }
 
     friend bool operator!=(const t_pl_offset& lhs, const t_pl_offset& rhs) {
@@ -691,6 +691,7 @@ struct hash<t_pl_offset> {
         std::size_t seed = std::hash<int>{}(v.x);
         vtr::hash_combine(seed, v.y);
         vtr::hash_combine(seed, v.sub_tile);
+        vtr::hash_combine(seed, v.layer);
         return seed;
     }
 };
diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp
index baee4101136..f9a5d7e5bd4 100644
--- a/vpr/test/test_vpr_constraints.cpp
+++ b/vpr/test/test_vpr_constraints.cpp
@@ -441,7 +441,7 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") {
 TEST_CASE("MacroConstraints", "[vpr]") {
     t_pl_macro pl_macro;
     PartitionRegion head_pr;
-    t_pl_offset offset(0, 2, 1, 0);
+    t_pl_offset offset(2, 1, 0, 0);
 
     Region reg;
     reg.set_region_rect({5, 2, 9, 6, 0});

From e4081dc963fc53ab2297a6fb8646c9d4cb04b107 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 20 Sep 2023 14:43:16 -0400
Subject: [PATCH 095/257] remove uniform_inter_layer from available moves

---
 vpr/src/base/read_options.cpp           | 2 +-
 vpr/src/base/vpr_types.h                | 2 +-
 vpr/src/place/RL_agent_util.cpp         | 6 +-----
 vpr/src/place/move_utils.cpp            | 1 -
 vpr/src/place/move_utils.h              | 1 -
 vpr/src/place/simpleRL_move_generator.h | 3 ---
 6 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 0e8544ee68f..cf5b2eadc04 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1991,7 +1991,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "This option is only effective for timing-driven placement."
             "The numbers listed are interpreted as the percentage probabilities of {uniformMove, MedianMove, CentroidMove, WeightedCentroid, WeightedMedian, Timing feasible Region(TFR), Critical UniformMove}, in that order.")
         .nargs('+')
-        .default_value({"100", "0", "0", "0", "0", "0", "0", "0"})
+        .default_value({"100", "0", "0", "0", "0", "0", "0"})
 
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index affa059561d..af5908a674e 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -530,7 +530,7 @@ enum class e_timing_update_type {
  ****************************************************************************/
 
 /* Values of number of placement available move types */
-#define NUM_PL_MOVE_TYPES 8
+#define NUM_PL_MOVE_TYPES 7
 #define NUM_PL_NONTIMING_MOVE_TYPES 3
 #define NUM_PL_1ST_STATE_MOVE_TYPES 4
 
diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index f1f4ad1ecdc..5e0162abc63 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -48,11 +48,7 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
         int num_1st_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_1ST_STATE_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES;
         int num_2nd_state_avail_moves;
         if (placer_opts.place_algorithm.is_timing_driven()) {
-            if (is_multi_layer) {
-                num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES;
-            } else {
-                num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES - 1;
-            }
+            num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES;
         } else {
             num_2nd_state_avail_moves = NUM_PL_NONTIMING_MOVE_TYPES;
         }
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 40fc5079d0d..138d5ad057c 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -992,7 +992,6 @@ static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings =
     "W. Median",
     "Crit. Uniform",
     "Feasible Region",
-    "UniformInterLayer",
     "Manual Move"};
 
 //To convert enum move type to string
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index a22db910be2..5b9826f3aa6 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -27,7 +27,6 @@ enum class e_move_type {
     W_MEDIAN,
     CRIT_UNIFORM,
     FEASIBLE_REGION,
-    UniformInterLayer,
     NUMBER_OF_AUTO_MOVES,
     MANUAL_MOVE = NUMBER_OF_AUTO_MOVES,
     INVALID_MOVE
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 4fd0e846758..d4e142adae7 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -235,9 +235,6 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool is_
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
     avail_moves[(int)e_move_type::FEASIBLE_REGION] = std::make_unique<FeasibleRegionMoveGenerator>();
-    if (is_multi_layer) {
-        avail_moves[(int)e_move_type::UniformInterLayer] = std::make_unique<UniformInterLayerMoveGenerator>();
-    }
 
     karmed_bandit_agent = std::move(agent);
 }

From 9522517b2730d4a441b757239220452760ed43c1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 21 Sep 2023 10:16:45 -0400
Subject: [PATCH 096/257] set the max layer bounding box to num_layer - 1

---
 utils/route_diag/src/main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index 03b8c66d17b..0cf1c901d23 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -85,7 +85,7 @@ static void do_one_route(const Netlist<>& net_list,
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
     bounding_box.layer_min = 0;
-    bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+    bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;
 
     t_conn_cost_params cost_params;
     cost_params.criticality = router_opts.max_criticality;

From 05b2ac5980f747e074ec07e637c2c9fce0fad158 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 21 Sep 2023 10:17:16 -0400
Subject: [PATCH 097/257] comment the unused parameter

---
 vpr/src/place/simpleRL_move_generator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index d4e142adae7..714effb3989 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -225,7 +225,7 @@ class SimpleRLMoveGenerator : public MoveGenerator {
 };
 
 template<class T, class>
-SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool is_multi_layer) {
+SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool /*is_multi_layer*/) {
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();

From 66f7cd3f38565e3439214062892be1c2aef95ea1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 21 Sep 2023 10:37:26 -0400
Subject: [PATCH 098/257] fix the max layer num

---
 vpr/src/route/router_delay_profiling.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index aee1329ec9d..3add7d02962 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -60,7 +60,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node,
     bounding_box.ymax = device_ctx.grid.height() + 1;
     if (layer_num == OPEN) {
         bounding_box.layer_min = 0;
-        bounding_box.layer_max = device_ctx.grid.get_num_layers() + 1;
+        bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;
     } else {
         bounding_box.layer_min = layer_num;
         bounding_box.layer_max = layer_num;

From 9baa738d35952b8dd1e030e3bc8bb0067932c03f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 21 Sep 2023 15:36:32 -0400
Subject: [PATCH 099/257] remove an unnecessary assertion

---
 vpr/src/place/place.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index de5eb7b1d25..ac0cf6532a7 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2715,7 +2715,6 @@ static void get_non_updateable_bb(ClusterNetId net_id,
                 + physical_tile_type(bnum)->pin_height_offset[pnum];
 
         int layer_num = place_ctx.block_locs[bnum].loc.layer;
-        VTR_ASSERT(layer_num >= 0 && layer_num < num_layers);
         num_sink_layer[layer_num]++;
         if (x < xmin[layer_num]) {
             xmin[layer_num] = x;

From 75f293279bde9fee7aefe0d0ad0645e044d4bead Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 22 Sep 2023 17:38:07 -0400
Subject: [PATCH 100/257] remove redundant header

---
 vpr/src/place/move_utils.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 54daa0a62a3..5ef1b9f83f4 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -332,9 +332,6 @@ int get_random_layer(t_logical_block_type_ptr logical_block);
 
 t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
 
-void enable_placer_debug(const t_placer_opts& placer_opts,
-                         int blk_id_num,
-                         const std::vector<size_t>& net_id_nums);
 #ifdef VTR_ENABLE_DEBUG_LOGGING
 /**
  * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets

From 5602155a6501b33d6aa91c27120b46535934de02 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 22 Sep 2023 18:39:22 -0400
Subject: [PATCH 101/257] debug the layer number when pl_loc is subtracted

---
 vpr/src/base/vpr_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index af5908a674e..f56f9ab683b 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -753,10 +753,10 @@ struct t_pl_loc {
     }
 
     friend t_pl_offset operator-(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        return {lhs.layer - rhs.layer,
-                lhs.x - rhs.x,
+        return {lhs.x - rhs.x,
                 lhs.y - rhs.y,
-                lhs.sub_tile - rhs.sub_tile};
+                lhs.sub_tile - rhs.sub_tile,
+                lhs.layer - rhs.layer};
     }
 
     friend bool operator<(const t_pl_loc& lhs, const t_pl_loc& rhs) {

From c2336b1dbc527d9396aee40f710c59f5cd8d72ce Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 10:50:28 -0400
Subject: [PATCH 102/257] simplify updating bb

---
 vpr/src/place/place.cpp | 64 +++++++----------------------------------
 1 file changed, 10 insertions(+), 54 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8f607cb1ac6..cbe86e72ef7 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2958,79 +2958,35 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
     int layer_old = old_pin_loc.layer_num;
     int layer_new = new_pin_loc.layer_num;
 
-    VTR_ASSERT(bb_edge_old.layer_num == bb_edge_new.layer_num);
-    VTR_ASSERT(bb_coord_old.layer_num == bb_coord_new.layer_num);
-    VTR_ASSERT(bb_edge_old.layer_num == bb_coord_old.layer_num);
-
-    if (xold == xnew && layer_old == layer_new) {
-        bb_edge_new.xmin = bb_edge_old.xmin;
-        bb_coord_new.xmin = bb_coord_old.xmin;
-        bb_edge_new.xmax = bb_edge_old.xmax;
-        bb_coord_new.xmax = bb_coord_old.xmax;
-    } else if (xnew > bb_coord_old.xmax) {
+    VTR_ASSERT_SAFE(bb_edge_old.layer_num == bb_edge_new.layer_num);
+    VTR_ASSERT_SAFE(bb_coord_old.layer_num == bb_coord_new.layer_num);
+    VTR_ASSERT_SAFE(bb_edge_old.layer_num == bb_coord_old.layer_num);
+
+    bb_edge_new = bb_edge_old;
+    bb_coord_new= bb_coord_old;
+
+    if (xnew > bb_coord_old.xmax) {
         bb_edge_new.xmax = 1;
         bb_coord_new.xmax = xnew;
-        if (layer_old != layer_new) {
-            bb_edge_new.xmin = bb_edge_old.xmin;
-            bb_coord_new.xmin = bb_coord_old.xmin;
-        }
     } else if (xnew == bb_coord_old.xmax) {
         bb_edge_new.xmax = bb_edge_old.xmax + 1;
-        bb_coord_new.xmax = xnew;
-        if (layer_old != layer_new) {
-            bb_edge_new.xmin = bb_edge_old.xmin;
-            bb_coord_new.xmin = bb_coord_old.xmin;
-        }
     } else if (xnew < bb_coord_old.xmin) {
         bb_edge_new.xmin = 1;
         bb_coord_new.xmin = xnew;
-        if (layer_old != layer_new) {
-            bb_edge_new.xmax = bb_edge_old.xmax;
-            bb_coord_new.xmax = bb_coord_old.xmax;
-        }
     } else if (xnew == bb_coord_old.xmin) {
         bb_edge_new.xmin = bb_edge_old.xmin + 1;
-        bb_coord_new.xmin = xnew;
-        if (layer_old != layer_new) {
-            bb_edge_new.xmax = bb_edge_old.xmax;
-            bb_coord_new.xmax = bb_coord_old.xmax;
-        }
     }
 
-    if (yold == ynew && layer_old == layer_new) {
-        bb_edge_new.ymin = bb_edge_old.ymin;
-        bb_coord_new.ymin = bb_coord_old.ymin;
-        bb_edge_new.ymax = bb_edge_old.ymax;
-        bb_coord_new.ymax = bb_coord_old.ymax;
-    } else if (ynew > bb_coord_old.ymax) {
+    if (ynew > bb_coord_old.ymax) {
         bb_edge_new.ymax = bb_edge_old.ymax + 1;
         bb_coord_new.ymax = ynew;
-        if (layer_new != layer_old) {
-            bb_edge_new.ymin = bb_edge_old.ymin;
-            bb_coord_new.ymin = bb_coord_old.ymin;
-        }
     } else if (ynew == bb_coord_old.ymax) {
-        bb_edge_new.ymax = 1;
-        bb_coord_new.ymax = ynew;
-        if (layer_new != layer_old) {
-            bb_edge_new.ymin = bb_edge_old.ymin;
-            bb_coord_new.ymin = bb_coord_old.ymin;
-        }
-
+        bb_edge_new.ymax = bb_edge_old.ymax + 1;
     } else if (ynew < bb_coord_old.ymin) {
         bb_edge_new.ymin = 1;
         bb_coord_new.ymin = ynew;
-        if (layer_new != layer_old) {
-            bb_edge_new.ymax = bb_edge_old.ymax;
-            bb_coord_new.ymax = bb_coord_old.ymax;
-        }
     } else if (ynew == bb_coord_old.ymin) {
         bb_edge_new.ymin = bb_edge_old.ymin + 1;
-        bb_coord_new.ymin = ynew;
-        if (layer_new != layer_old) {
-            bb_edge_new.ymax = bb_edge_old.ymax;
-            bb_coord_new.ymax = bb_coord_old.ymax;
-        }
     }
 }
 

From 426f09dfea02265d2ec10c9507fe6221e6396755 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 12:47:20 -0400
Subject: [PATCH 103/257] update bb if xnew is not equal to xold

---
 vpr/src/place/place.cpp | 52 ++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index cbe86e72ef7..3ad01e91b36 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2963,30 +2963,34 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
     VTR_ASSERT_SAFE(bb_edge_old.layer_num == bb_coord_old.layer_num);
 
     bb_edge_new = bb_edge_old;
-    bb_coord_new= bb_coord_old;
-
-    if (xnew > bb_coord_old.xmax) {
-        bb_edge_new.xmax = 1;
-        bb_coord_new.xmax = xnew;
-    } else if (xnew == bb_coord_old.xmax) {
-        bb_edge_new.xmax = bb_edge_old.xmax + 1;
-    } else if (xnew < bb_coord_old.xmin) {
-        bb_edge_new.xmin = 1;
-        bb_coord_new.xmin = xnew;
-    } else if (xnew == bb_coord_old.xmin) {
-        bb_edge_new.xmin = bb_edge_old.xmin + 1;
-    }
-
-    if (ynew > bb_coord_old.ymax) {
-        bb_edge_new.ymax = bb_edge_old.ymax + 1;
-        bb_coord_new.ymax = ynew;
-    } else if (ynew == bb_coord_old.ymax) {
-        bb_edge_new.ymax = bb_edge_old.ymax + 1;
-    } else if (ynew < bb_coord_old.ymin) {
-        bb_edge_new.ymin = 1;
-        bb_coord_new.ymin = ynew;
-    } else if (ynew == bb_coord_old.ymin) {
-        bb_edge_new.ymin = bb_edge_old.ymin + 1;
+    bb_coord_new = bb_coord_old;
+
+    if (xnew != xold) {
+        if (xnew > bb_coord_old.xmax) {
+            bb_edge_new.xmax = 1;
+            bb_coord_new.xmax = xnew;
+        } else if (xnew == bb_coord_old.xmax) {
+            bb_edge_new.xmax = bb_edge_old.xmax + 1;
+        } else if (xnew < bb_coord_old.xmin) {
+            bb_edge_new.xmin = 1;
+            bb_coord_new.xmin = xnew;
+        } else if (xnew == bb_coord_old.xmin) {
+            bb_edge_new.xmin = bb_edge_old.xmin + 1;
+        }
+    }
+
+    if (ynew != yold) {
+        if (ynew > bb_coord_old.ymax) {
+            bb_edge_new.ymax = bb_edge_old.ymax + 1;
+            bb_coord_new.ymax = ynew;
+        } else if (ynew == bb_coord_old.ymax) {
+            bb_edge_new.ymax = bb_edge_old.ymax + 1;
+        } else if (ynew < bb_coord_old.ymin) {
+            bb_edge_new.ymin = 1;
+            bb_coord_new.ymin = ynew;
+        } else if (ynew == bb_coord_old.ymin) {
+            bb_edge_new.ymin = bb_edge_old.ymin + 1;
+        }
     }
 }
 

From b0d58d58a82ef6d6696980776d47f64215b60040 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 13:32:39 -0400
Subject: [PATCH 104/257] update bb coord if layer is changed

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3ad01e91b36..32c91936f18 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2965,7 +2965,7 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
     bb_edge_new = bb_edge_old;
     bb_coord_new = bb_coord_old;
 
-    if (xnew != xold) {
+    if (xnew != xold || layer_old != layer_new) {
         if (xnew > bb_coord_old.xmax) {
             bb_edge_new.xmax = 1;
             bb_coord_new.xmax = xnew;
@@ -2979,7 +2979,7 @@ static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
         }
     }
 
-    if (ynew != yold) {
+    if (ynew != yold || layer_old != layer_new) {
         if (ynew > bb_coord_old.ymax) {
             bb_edge_new.ymax = bb_edge_old.ymax + 1;
             bb_coord_new.ymax = ynew;

From 32845de8e0a7f26a16eeefb7f195c68cab3dfaeb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 15:19:51 -0400
Subject: [PATCH 105/257] rewrite how to calculate bb

---
 vpr/src/place/place.cpp | 324 ++++++++++++++++++++--------------------
 1 file changed, 161 insertions(+), 163 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 32c91936f18..cdad9b4ce4c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -346,28 +346,19 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      std::vector<int>& bb_pin_sink_count_new,
                                      bool is_output_pin);
 
-static void try_remove_block_from_bb_edge(ClusterNetId net_id,
-                                          const t_physical_tile_loc& pin_old_loc,
-                                          const t_physical_tile_loc& pin_new_loc,
-                                          const std::vector<t_2D_tbb>& curr_bb_edge,
-                                          const std::vector<t_2D_tbb>& curr_bb_coord,
-                                          std::vector<t_2D_tbb>& bb_edge_new,
-                                          std::vector<t_2D_tbb>& bb_coord_new,
-                                          std::vector<int>& bb_pin_sink_count_new);
-
-static void remove_block_from_bb_edge(ClusterNetId net_id,
-                                      std::vector<t_2D_tbb>& bb_edge_new,
-                                      std::vector<t_2D_tbb>& bb_coord_new,
-                                      std::vector<int>& bb_layer_pin_sink_count,
-                                      const int& old_num_block_on_edge,
-                                      const int& old_edge_coord,
-                                      int& new_num_block_on_edge,
-                                      int& new_edge_coord);
-
-static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
+static void update_bb_edge (ClusterNetId net_id,
+                           std::vector<t_2D_tbb>& bb_edge_new,
+                           std::vector<t_2D_tbb>& bb_coord_new,
+                           std::vector<int>& bb_layer_pin_sink_count,
+                           const int& old_num_block_on_edge,
+                           const int& old_edge_coord,
+                           int& new_num_block_on_edge,
+                           int& new_edge_coord);
+
+
+static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
+                            const t_2D_tbb& bb_edge_old,
                             const t_2D_tbb& bb_coord_old,
-                            const t_physical_tile_loc& old_pin_loc,
-                            const t_physical_tile_loc& new_pin_loc,
                             t_2D_tbb& bb_edge_new,
                             t_2D_tbb& bb_coord_new);
 
@@ -2773,7 +2764,6 @@ static void update_bb(ClusterNetId net_id,
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
-    int new_layer = pin_new_loc.layer_num;
 
     pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
@@ -2806,132 +2796,156 @@ static void update_bb(ClusterNetId net_id,
                              bb_pin_sink_count_new,
                              is_output_pin);
 
-    try_remove_block_from_bb_edge(net_id,
-                                  pin_old_loc,
-                                  pin_new_loc,
-                                  *curr_bb_edge,
-                                  *curr_bb_coord,
-                                  bb_edge_new,
-                                  bb_coord_new,
-                                  bb_pin_sink_count_new);
-
     if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
         return;
     }
 
-    add_block_to_bb((*curr_bb_edge)[new_layer],
-                    (*curr_bb_coord)[new_layer],
-                    pin_old_loc,
-                    pin_new_loc,
-                    bb_edge_new[new_layer],
-                    bb_coord_new[new_layer]);
+    int x_old = pin_old_loc.x;
+    int x_new = pin_new_loc.x;
 
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    }
-}
+    int y_old = pin_old_loc.y;
+    int y_new = pin_new_loc.y;
 
-static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
-                                     const t_physical_tile_loc& pin_old_loc,
-                                     const t_physical_tile_loc& pin_new_loc,
-                                     const std::vector<int>& curr_layer_pin_sink_count,
-                                     std::vector<int>& bb_pin_sink_count_new,
-                                     bool is_output_pin) {
-    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
-    bb_pin_sink_count_new = curr_layer_pin_sink_count;
-    if (!is_output_pin) {
-        bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
-        bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
-    }
-}
+    int layer_old = pin_old_loc.layer_num;
+    int layer_new = pin_new_loc.layer_num;
+    bool layer_changed = (layer_old != layer_new);
+
+    bb_edge_new = *curr_bb_edge;
+    bb_coord_new = *curr_bb_coord;
 
-static void try_remove_block_from_bb_edge(ClusterNetId net_id,
-                                          const t_physical_tile_loc& pin_old_loc,
-                                          const t_physical_tile_loc& pin_new_loc,
-                                          const std::vector<t_2D_tbb>& curr_bb_edge,
-                                          const std::vector<t_2D_tbb>& curr_bb_coord,
-                                          std::vector<t_2D_tbb>& bb_edge_new,
-                                          std::vector<t_2D_tbb>& bb_coord_new,
-                                          std::vector<int>& bb_pin_sink_count_new) {
-    int old_layer = pin_old_loc.layer_num;
-    int new_layer = pin_new_loc.layer_num;
-
-    bb_edge_new = curr_bb_edge;
-    bb_coord_new = curr_bb_coord;
-
-    if (pin_old_loc.x == curr_bb_coord[old_layer].xmax) {
-        if (old_layer != new_layer || pin_new_loc.x < pin_old_loc.x) {
-            remove_block_from_bb_edge(net_id,
-                                      bb_edge_new,
-                                      bb_coord_new,
-                                      bb_pin_sink_count_new,
-                                      curr_bb_edge[old_layer].xmax,
-                                      curr_bb_coord[old_layer].xmax,
-                                      bb_edge_new[old_layer].xmax,
-                                      bb_coord_new[old_layer].xmax);
+    if (x_new < x_old || layer_changed) {
+        if (x_old == (*curr_bb_coord)[layer_old].xmax) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           (*curr_bb_edge)[layer_old].xmax,
+                           (*curr_bb_coord)[layer_old].xmax,
+                           bb_edge_new[layer_old].xmax,
+                           bb_coord_new[layer_old].xmax);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         }
-    }
 
-    if (pin_old_loc.x == curr_bb_coord[old_layer].xmin) {
-        if (old_layer != new_layer || pin_new_loc.x > pin_old_loc.x) {
-            remove_block_from_bb_edge(net_id,
-                                      bb_edge_new,
-                                      bb_coord_new,
-                                      bb_pin_sink_count_new,
-                                      curr_bb_edge[old_layer].xmin,
-                                      curr_bb_coord[old_layer].xmin,
-                                      bb_edge_new[old_layer].xmin,
-                                      bb_coord_new[old_layer].xmin);
+        if (!layer_changed) {
+            if (x_new < (*curr_bb_coord)[layer_new].xmin) {
+                bb_edge_new[layer_new].xmin = 1;
+                bb_coord_new[layer_new].xmin = x_new;
+            } else if (x_new == (*curr_bb_coord)[layer_new].xmin) {
+                bb_edge_new[layer_new].xmin++;
+            }
+        }
+
+    } else if (x_new > x_old || layer_old != layer_new) {
+        if (x_old == (*curr_bb_coord)[layer_old].xmin) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           (*curr_bb_edge)[layer_old].xmin,
+                           (*curr_bb_coord)[layer_old].xmin,
+                           bb_edge_new[layer_old].xmin,
+                           bb_coord_new[layer_old].xmin);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         }
+
+        if (!layer_changed) {
+            if (x_new > (*curr_bb_coord)[layer_new].xmax) {
+                bb_edge_new[layer_new].xmax = 1;
+                bb_coord_new[layer_new].xmax = x_new;
+            } else if (x_new == (*curr_bb_coord)[layer_new].xmax) {
+                    bb_edge_new[layer_new].xmax++;
+            }
+        }
     }
 
-    if (pin_old_loc.y == curr_bb_coord[old_layer].ymax) {
-        if (old_layer != new_layer || pin_new_loc.y < pin_old_loc.y) {
-            remove_block_from_bb_edge(net_id,
-                                      bb_edge_new,
-                                      bb_coord_new,
-                                      bb_pin_sink_count_new,
-                                      curr_bb_edge[old_layer].ymax,
-                                      curr_bb_coord[old_layer].ymax,
-                                      bb_edge_new[old_layer].ymax,
-                                      bb_coord_new[old_layer].ymax);
+    if (y_new < y_old || layer_changed) {
+        if (y_old == (*curr_bb_coord)[layer_old].ymax) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           (*curr_bb_edge)[layer_old].ymax,
+                           (*curr_bb_coord)[layer_old].ymax,
+                           bb_edge_new[layer_old].ymax,
+                           bb_coord_new[layer_old].ymax);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
+                    return;
             }
         }
-    }
 
-    if (pin_old_loc.y == curr_bb_coord[old_layer].ymin) {
-        if (old_layer != new_layer || pin_new_loc.y > pin_old_loc.y) {
-            remove_block_from_bb_edge(net_id,
-                                      bb_edge_new,
-                                      bb_coord_new,
-                                      bb_pin_sink_count_new,
-                                      curr_bb_edge[old_layer].ymin,
-                                      curr_bb_coord[old_layer].ymin,
-                                      bb_edge_new[old_layer].ymin,
-                                      bb_coord_new[old_layer].ymin);
+        if (!layer_changed) {
+            if (y_new < (*curr_bb_coord)[layer_new].ymin) {
+                    bb_edge_new[layer_new].ymin = 1;
+                    bb_coord_new[layer_new].ymin = y_new;
+            } else if (y_new == (*curr_bb_coord)[layer_new].ymin) {
+                    bb_edge_new[layer_new].ymin++;
+            }
+        }
+
+    } else if (y_new > y_old || layer_old != layer_new) {
+        if (y_old == (*curr_bb_coord)[layer_old].ymin) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           (*curr_bb_edge)[layer_old].ymin,
+                           (*curr_bb_coord)[layer_old].ymin,
+                           bb_edge_new[layer_old].ymin,
+                           bb_coord_new[layer_old].ymin);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
+                    return;
+            }
+        }
+
+        if (!layer_changed) {
+            if (y_new > (*curr_bb_coord)[layer_new].ymax) {
+                    bb_edge_new[layer_new].ymax = 1;
+                    bb_coord_new[layer_new].ymax = y_new;
+            } else if (y_new == (*curr_bb_coord)[layer_new].ymax) {
+                    bb_edge_new[layer_new].ymax++;
             }
         }
     }
+
+    if (layer_changed) {
+        add_block_to_bb(pin_new_loc,
+                        (*curr_bb_edge)[layer_new],
+                        (*curr_bb_coord)[layer_new],
+                        bb_edge_new[layer_new],
+                        bb_coord_new[layer_new]);
+    }
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    }
 }
 
-static void remove_block_from_bb_edge(ClusterNetId net_id,
-                                      std::vector<t_2D_tbb>& bb_edge_new,
-                                      std::vector<t_2D_tbb>& bb_coord_new,
-                                      std::vector<int>& bb_layer_pin_sink_count,
-                                      const int& old_num_block_on_edge,
-                                      const int& old_edge_coord,
-                                      int& new_num_block_on_edge,
-                                      int& new_edge_coord) {
+static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
+                                     const t_physical_tile_loc& pin_old_loc,
+                                     const t_physical_tile_loc& pin_new_loc,
+                                     const std::vector<int>& curr_layer_pin_sink_count,
+                                     std::vector<int>& bb_pin_sink_count_new,
+                                     bool is_output_pin) {
+    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
+    bb_pin_sink_count_new = curr_layer_pin_sink_count;
+    if (!is_output_pin) {
+        bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
+        bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
+    }
+}
+
+static void update_bb_edge (ClusterNetId net_id,
+                           std::vector<t_2D_tbb>& bb_edge_new,
+                           std::vector<t_2D_tbb>& bb_coord_new,
+                           std::vector<int>& bb_layer_pin_sink_count,
+                           const int& old_num_block_on_edge,
+                           const int& old_edge_coord,
+                           int& new_num_block_on_edge,
+                           int& new_edge_coord) {
     if (old_num_block_on_edge == 1) {
         get_bb_from_scratch(net_id,
                             bb_edge_new,
@@ -2945,52 +2959,36 @@ static void remove_block_from_bb_edge(ClusterNetId net_id,
     }
 }
 
-static void add_block_to_bb(const t_2D_tbb& bb_edge_old,
+static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
+                            const t_2D_tbb& bb_edge_old,
                             const t_2D_tbb& bb_coord_old,
-                            const t_physical_tile_loc& old_pin_loc,
-                            const t_physical_tile_loc& new_pin_loc,
                             t_2D_tbb& bb_edge_new,
                             t_2D_tbb& bb_coord_new) {
-    int xold = old_pin_loc.x;
-    int xnew = new_pin_loc.x;
-    int yold = old_pin_loc.y;
-    int ynew = new_pin_loc.y;
-    int layer_old = old_pin_loc.layer_num;
-    int layer_new = new_pin_loc.layer_num;
-
-    VTR_ASSERT_SAFE(bb_edge_old.layer_num == bb_edge_new.layer_num);
-    VTR_ASSERT_SAFE(bb_coord_old.layer_num == bb_coord_new.layer_num);
-    VTR_ASSERT_SAFE(bb_edge_old.layer_num == bb_coord_old.layer_num);
-
-    bb_edge_new = bb_edge_old;
-    bb_coord_new = bb_coord_old;
-
-    if (xnew != xold || layer_old != layer_new) {
-        if (xnew > bb_coord_old.xmax) {
-            bb_edge_new.xmax = 1;
-            bb_coord_new.xmax = xnew;
-        } else if (xnew == bb_coord_old.xmax) {
-            bb_edge_new.xmax = bb_edge_old.xmax + 1;
-        } else if (xnew < bb_coord_old.xmin) {
-            bb_edge_new.xmin = 1;
-            bb_coord_new.xmin = xnew;
-        } else if (xnew == bb_coord_old.xmin) {
-            bb_edge_new.xmin = bb_edge_old.xmin + 1;
-        }
-    }
-
-    if (ynew != yold || layer_old != layer_new) {
-        if (ynew > bb_coord_old.ymax) {
-            bb_edge_new.ymax = bb_edge_old.ymax + 1;
-            bb_coord_new.ymax = ynew;
-        } else if (ynew == bb_coord_old.ymax) {
-            bb_edge_new.ymax = bb_edge_old.ymax + 1;
-        } else if (ynew < bb_coord_old.ymin) {
-            bb_edge_new.ymin = 1;
-            bb_coord_new.ymin = ynew;
-        } else if (ynew == bb_coord_old.ymin) {
-            bb_edge_new.ymin = bb_edge_old.ymin + 1;
-        }
+    int x_new = new_pin_loc.x;
+    int y_new = new_pin_loc.y;
+
+    if (x_new > bb_coord_old.xmax) {
+        bb_edge_new.xmax = 1;
+        bb_coord_new.xmax = x_new;
+    } else if (x_new == bb_coord_old.xmax) {
+        bb_edge_new.xmax = bb_edge_old.xmax + 1;
+    } else if (x_new < bb_coord_old.xmin) {
+        bb_edge_new.xmin = 1;
+        bb_coord_new.xmin = x_new;
+    } else if (x_new == bb_coord_old.xmin) {
+        bb_edge_new.xmin = bb_edge_old.xmin + 1;
+    }
+
+    if (y_new > bb_coord_old.ymax) {
+        bb_edge_new.ymax = 1;
+        bb_coord_new.ymax = y_new;
+    } else if (y_new == bb_coord_old.ymax) {
+        bb_edge_new.ymax = bb_edge_old.ymax + 1;
+    } else if (y_new < bb_coord_old.ymin) {
+        bb_edge_new.ymin = 1;
+        bb_coord_new.ymin = y_new;
+    } else if (y_new == bb_coord_old.ymin) {
+        bb_edge_new.ymin = bb_edge_old.ymin + 1;
     }
 }
 

From 98d5b613dad6a16d82d3fb414f941cac3828eea9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 19:35:40 -0400
Subject: [PATCH 106/257] fix the inline static name

---
 vpr/src/route/connection_router.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index ff96b23eb82..c87c1edf68c 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -5,7 +5,7 @@
 #include "bucket.h"
 #include "rr_graph_fwd.h"
 
-inline static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
+static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                                     const RRGraphView* rr_graph,
                                     RRNodeId from_node,
                                     RRNodeId sink_node) {

From e860289a4ae2dcf487676f85ca5ee25168868100 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 19:36:07 -0400
Subject: [PATCH 107/257] remove a redunant check in place.cpp

---
 vpr/src/place/place.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index cdad9b4ce4c..fc29a3a5ea0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2796,10 +2796,6 @@ static void update_bb(ClusterNetId net_id,
                              bb_pin_sink_count_new,
                              is_output_pin);
 
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-        return;
-    }
-
     int x_old = pin_old_loc.x;
     int x_new = pin_new_loc.x;
 

From 94d2562e312caf9ccb8c5b67ead9460a443416c3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 25 Sep 2023 19:38:55 -0400
Subject: [PATCH 108/257] make format

---
 vpr/src/base/vpr_types.h     |  2 +-
 vpr/src/place/move_utils.cpp |  2 +-
 vpr/src/place/place.cpp      | 28 +++++++++++++---------------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index f56f9ab683b..ccedd4a24a6 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -627,7 +627,7 @@ struct t_2D_tbb {
 struct t_pl_offset {
     t_pl_offset() = default;
     t_pl_offset(int xoffset, int yoffset, int sub_tile_offset, int layer_offset)
-        :x(xoffset)
+        : x(xoffset)
         , y(yoffset)
         , sub_tile(sub_tile_offset)
         , layer(layer_offset) {}
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 29fb5097ced..4cd0975bef0 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1335,7 +1335,7 @@ t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
                    OPEN,
                    OPEN,
                    0,
-                   tbb_vec.size()-1);
+                   tbb_vec.size() - 1);
 
     for (const auto& bb : tbb_vec) {
         if (bb.xmin == OPEN) {
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index fc29a3a5ea0..de1fa09b5ef 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -346,7 +346,7 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      std::vector<int>& bb_pin_sink_count_new,
                                      bool is_output_pin);
 
-static void update_bb_edge (ClusterNetId net_id,
+static void update_bb_edge(ClusterNetId net_id,
                            std::vector<t_2D_tbb>& bb_edge_new,
                            std::vector<t_2D_tbb>& bb_coord_new,
                            std::vector<int>& bb_layer_pin_sink_count,
@@ -355,7 +355,6 @@ static void update_bb_edge (ClusterNetId net_id,
                            int& new_num_block_on_edge,
                            int& new_edge_coord);
 
-
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
                             const t_2D_tbb& bb_edge_old,
                             const t_2D_tbb& bb_coord_old,
@@ -2609,7 +2608,7 @@ static double get_net_wirelength_estimate(ClusterNetId /* net_id */,
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]+1);
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
          * connection will be made with no bends and hence no x-cost.    *
@@ -2641,7 +2640,7 @@ static double get_net_cost(ClusterNetId /* net_id */,
         if (layer_pin_sink_count[layer_num] == 0) {
             continue;
         }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num]+1);
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
 
         /* Could insert a check for xmin == xmax.  In that case, assume  *
          * connection will be made with no bends and hence no x-cost.    *
@@ -2764,7 +2763,6 @@ static void update_bb(ClusterNetId net_id,
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
-
     pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
@@ -2853,7 +2851,7 @@ static void update_bb(ClusterNetId net_id,
                 bb_edge_new[layer_new].xmax = 1;
                 bb_coord_new[layer_new].xmax = x_new;
             } else if (x_new == (*curr_bb_coord)[layer_new].xmax) {
-                    bb_edge_new[layer_new].xmax++;
+                bb_edge_new[layer_new].xmax++;
             }
         }
     }
@@ -2869,16 +2867,16 @@ static void update_bb(ClusterNetId net_id,
                            bb_edge_new[layer_old].ymax,
                            bb_coord_new[layer_old].ymax);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                    return;
+                return;
             }
         }
 
         if (!layer_changed) {
             if (y_new < (*curr_bb_coord)[layer_new].ymin) {
-                    bb_edge_new[layer_new].ymin = 1;
-                    bb_coord_new[layer_new].ymin = y_new;
+                bb_edge_new[layer_new].ymin = 1;
+                bb_coord_new[layer_new].ymin = y_new;
             } else if (y_new == (*curr_bb_coord)[layer_new].ymin) {
-                    bb_edge_new[layer_new].ymin++;
+                bb_edge_new[layer_new].ymin++;
             }
         }
 
@@ -2893,16 +2891,16 @@ static void update_bb(ClusterNetId net_id,
                            bb_edge_new[layer_old].ymin,
                            bb_coord_new[layer_old].ymin);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                    return;
+                return;
             }
         }
 
         if (!layer_changed) {
             if (y_new > (*curr_bb_coord)[layer_new].ymax) {
-                    bb_edge_new[layer_new].ymax = 1;
-                    bb_coord_new[layer_new].ymax = y_new;
+                bb_edge_new[layer_new].ymax = 1;
+                bb_coord_new[layer_new].ymax = y_new;
             } else if (y_new == (*curr_bb_coord)[layer_new].ymax) {
-                    bb_edge_new[layer_new].ymax++;
+                bb_edge_new[layer_new].ymax++;
             }
         }
     }
@@ -2934,7 +2932,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
     }
 }
 
-static void update_bb_edge (ClusterNetId net_id,
+static void update_bb_edge(ClusterNetId net_id,
                            std::vector<t_2D_tbb>& bb_edge_new,
                            std::vector<t_2D_tbb>& bb_coord_new,
                            std::vector<int>& bb_layer_pin_sink_count,

From 328e222e6c6d36b7bec982121e53d85f3d210056 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 11:33:44 -0400
Subject: [PATCH 109/257] remove a redundant if statemetn since there should be
 a bounding box on all layers

---
 vpr/src/place/median_move_generator.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 08b8d7d6fd4..8dcadfca9e4 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -232,13 +232,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        if (!first_block[layer_num]) {
-            bb_coord_new[layer_num].xmin = OPEN;
-            bb_coord_new[layer_num].ymin = OPEN;
-            bb_coord_new[layer_num].xmax = OPEN;
-            bb_coord_new[layer_num].ymax = OPEN;
-            continue;
-        }
+        VTR_ASSERT_SAFE(first_block[layer_num]);
         bb_coord_new[layer_num].xmin = std::max(std::min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
         bb_coord_new[layer_num].ymin = std::max(std::min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
         bb_coord_new[layer_num].xmax = std::max(std::min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels

From 954a7b49d26c65935739b780b2efab09650bf7dc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 11:43:57 -0400
Subject: [PATCH 110/257] remove a redundant assignment

---
 vpr/src/place/median_move_generator.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 8dcadfca9e4..98e6868b399 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -268,13 +268,6 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     yold = std::max(std::min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        if (layer_num == layer) {
-            continue;
-        }
-        bb_coord_new[layer_num] = place_move_ctx.bb_coords[net_id][layer];
-    }
-
     /* The net had NOT been updated before, could use the old values */
     curr_bb_coord = &(place_move_ctx.bb_coords[net_id][layer]);
     curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id][layer]);

From 091f8d62b27fb11ba046f00c7235e8640817b0a8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 11:49:30 -0400
Subject: [PATCH 111/257] make union_2d_tbb faster

---
 vpr/src/place/move_utils.cpp | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 4cd0975bef0..367eef97f8b 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1330,28 +1330,31 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
 }
 
 t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
-    t_bb merged_bb(OPEN,
-                   OPEN,
-                   OPEN,
-                   OPEN,
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    VTR_ASSERT_SAFE((int)tbb_vec.size() == num_layers);
+    t_bb merged_bb(tbb_vec[0].xmin,
+                   tbb_vec[0].xmax,
+                   tbb_vec[0].ymin,
+                   tbb_vec[0].ymax,
                    0,
-                   tbb_vec.size() - 1);
+                   num_layers - 1);
 
-    for (const auto& bb : tbb_vec) {
-        if (bb.xmin == OPEN) {
+    for (int layer_num = 1; layer_num < num_layers; layer_num++) {
+        const auto& layer_bb = tbb_vec[layer_num];
+        if (layer_bb.xmin == OPEN) {
             continue;
         }
-        if (merged_bb.xmin == OPEN || bb.xmin < merged_bb.xmin) {
-            merged_bb.xmin = bb.xmin;
+        if (merged_bb.xmin == OPEN || layer_bb.xmin < merged_bb.xmin) {
+            merged_bb.xmin = layer_bb.xmin;
         }
-        if (merged_bb.xmax == OPEN || bb.xmax > merged_bb.xmax) {
-            merged_bb.xmax = bb.xmax;
+        if (merged_bb.xmax == OPEN || layer_bb.xmax > merged_bb.xmax) {
+            merged_bb.xmax = layer_bb.xmax;
         }
-        if (merged_bb.ymin == OPEN || bb.ymin < merged_bb.ymin) {
-            merged_bb.ymin = bb.ymin;
+        if (merged_bb.ymin == OPEN || layer_bb.ymin < merged_bb.ymin) {
+            merged_bb.ymin = layer_bb.ymin;
         }
-        if (merged_bb.ymax == OPEN || bb.ymax > merged_bb.ymax) {
-            merged_bb.ymax = bb.ymax;
+        if (merged_bb.ymax == OPEN || layer_bb.ymax > merged_bb.ymax) {
+            merged_bb.ymax = layer_bb.ymax;
         }
     }
 

From a93551a41c5bba37341e7217cd89139e85d20269 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 15:56:00 -0400
Subject: [PATCH 112/257] don't need to create an object, just pass the
 constructor args

---
 vpr/src/place/initial_placement.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index e8f9a051aaa..c80d5ff245b 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -632,13 +632,13 @@ static bool try_random_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_log
     t_physical_tile_loc to_compressed_loc;
 
     bool legal;
-    t_bb place_bb(min_compressed_loc.x, max_compressed_loc.x,
-                  min_compressed_loc.y, max_compressed_loc.y,
-                  reg_coord.layer_num, reg_coord.layer_num);
+
     legal = find_compatible_compressed_loc_in_range(block_type,
                                                     delta_cx,
                                                     {cx_from, cy_from, reg_coord.layer_num},
-                                                    place_bb,
+                                                    {min_compressed_loc.x, max_compressed_loc.x,
+                                                     min_compressed_loc.y, max_compressed_loc.y,
+                                                     reg_coord.layer_num, reg_coord.layer_num},
                                                     to_compressed_loc,
                                                     false,
                                                     reg_coord.layer_num);

From 10b390097e6cf0b8112aacd00f373beaf9f7bab0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 15:57:03 -0400
Subject: [PATCH 113/257] remove an unused var

---
 vpr/src/place/median_move_generator.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 98e6868b399..ebafe16110f 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -260,9 +260,7 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
-
-    int num_layers = device_ctx.grid.get_num_layers();
-
+    
     xnew = std::max(std::min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     ynew = std::max(std::min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels

From e11ff58494636dfe50d80d51da2c206449c97ff9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 16:18:18 -0400
Subject: [PATCH 114/257] remove unused var

---
 vpr/src/place/median_move_generator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index ebafe16110f..ae5767f4d0b 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -260,7 +260,7 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
-    
+
     xnew = std::max(std::min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     ynew = std::max(std::min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels

From 7e96dd04f94dc6d0ebfdacf40e28640f5269a225 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 26 Sep 2023 19:07:43 -0400
Subject: [PATCH 115/257] instead of assigning vectors, set a value whenever it
 is needes

---
 vpr/src/place/place.cpp | 133 +++++++++++++++++++++++++++-------------
 1 file changed, 89 insertions(+), 44 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index de1fa09b5ef..34e9164e753 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -346,14 +346,14 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      std::vector<int>& bb_pin_sink_count_new,
                                      bool is_output_pin);
 
-static void update_bb_edge(ClusterNetId net_id,
-                           std::vector<t_2D_tbb>& bb_edge_new,
-                           std::vector<t_2D_tbb>& bb_coord_new,
-                           std::vector<int>& bb_layer_pin_sink_count,
-                           const int& old_num_block_on_edge,
-                           const int& old_edge_coord,
-                           int& new_num_block_on_edge,
-                           int& new_edge_coord);
+static inline void update_bb_edge(ClusterNetId net_id,
+                                  std::vector<t_2D_tbb>& bb_edge_new,
+                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<int>& bb_layer_pin_sink_count,
+                                  const int& old_num_block_on_edge,
+                                  const int& old_edge_coord,
+                                  int& new_num_block_on_edge,
+                                  int& new_edge_coord);
 
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
                             const t_2D_tbb& bb_edge_old,
@@ -2757,7 +2757,7 @@ static void update_bb(ClusterNetId net_id,
      * The x and y coordinates are the pin's x and y coordinates.         */
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_tbb>*curr_bb_edge, *curr_bb_coord;
+    const std::vector<t_2D_tbb> *curr_bb_edge, *curr_bb_coord;
     const std::vector<int>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -2804,9 +2804,6 @@ static void update_bb(ClusterNetId net_id,
     int layer_new = pin_new_loc.layer_num;
     bool layer_changed = (layer_old != layer_new);
 
-    bb_edge_new = *curr_bb_edge;
-    bb_coord_new = *curr_bb_coord;
-
     if (x_new < x_old || layer_changed) {
         if (x_old == (*curr_bb_coord)[layer_old].xmax) {
             update_bb_edge(net_id,
@@ -2820,18 +2817,25 @@ static void update_bb(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
+        } else {
+            bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax;
+            bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
         }
 
         if (!layer_changed) {
-            if (x_new < (*curr_bb_coord)[layer_new].xmin) {
-                bb_edge_new[layer_new].xmin = 1;
-                bb_coord_new[layer_new].xmin = x_new;
-            } else if (x_new == (*curr_bb_coord)[layer_new].xmin) {
-                bb_edge_new[layer_new].xmin++;
+            if (x_new < (*curr_bb_coord)[layer_old].xmin) {
+                bb_edge_new[layer_old].xmin = 1;
+                bb_coord_new[layer_old].xmin = x_new;
+            } else if (x_new == (*curr_bb_coord)[layer_old].xmin) {
+                bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin + 1;
+                bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
+            } else {
+                bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin;
+                bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
             }
         }
 
-    } else if (x_new > x_old || layer_old != layer_new) {
+    } else if (x_new > x_old || layer_changed) {
         if (x_old == (*curr_bb_coord)[layer_old].xmin) {
             update_bb_edge(net_id,
                            bb_edge_new,
@@ -2844,14 +2848,21 @@ static void update_bb(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
+        } else {
+            bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin;
+            bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
         }
 
         if (!layer_changed) {
-            if (x_new > (*curr_bb_coord)[layer_new].xmax) {
-                bb_edge_new[layer_new].xmax = 1;
-                bb_coord_new[layer_new].xmax = x_new;
-            } else if (x_new == (*curr_bb_coord)[layer_new].xmax) {
-                bb_edge_new[layer_new].xmax++;
+            if (x_new > (*curr_bb_coord)[layer_old].xmax) {
+                bb_edge_new[layer_old].xmax = 1;
+                bb_coord_new[layer_old].xmax = x_new;
+            } else if (x_new == (*curr_bb_coord)[layer_old].xmax) {
+                bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax + 1;
+                bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
+            } else {
+                bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax;
+                bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
             }
         }
     }
@@ -2869,18 +2880,25 @@ static void update_bb(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
+        } else {
+            bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax;
+            bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
         }
 
         if (!layer_changed) {
-            if (y_new < (*curr_bb_coord)[layer_new].ymin) {
-                bb_edge_new[layer_new].ymin = 1;
-                bb_coord_new[layer_new].ymin = y_new;
-            } else if (y_new == (*curr_bb_coord)[layer_new].ymin) {
-                bb_edge_new[layer_new].ymin++;
+            if (y_new < (*curr_bb_coord)[layer_old].ymin) {
+                bb_edge_new[layer_old].ymin = 1;
+                bb_coord_new[layer_old].ymin = y_new;
+            } else if (y_new == (*curr_bb_coord)[layer_old].ymin) {
+                bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin + 1;
+                bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
+            } else {
+                bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin;
+                bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
             }
         }
 
-    } else if (y_new > y_old || layer_old != layer_new) {
+    } else if (y_new > y_old || layer_changed) {
         if (y_old == (*curr_bb_coord)[layer_old].ymin) {
             update_bb_edge(net_id,
                            bb_edge_new,
@@ -2893,14 +2911,21 @@ static void update_bb(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
+        } else {
+            bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin;
+            bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
         }
 
         if (!layer_changed) {
-            if (y_new > (*curr_bb_coord)[layer_new].ymax) {
-                bb_edge_new[layer_new].ymax = 1;
-                bb_coord_new[layer_new].ymax = y_new;
-            } else if (y_new == (*curr_bb_coord)[layer_new].ymax) {
-                bb_edge_new[layer_new].ymax++;
+            if (y_new > (*curr_bb_coord)[layer_old].ymax) {
+                bb_edge_new[layer_old].ymax = 1;
+                bb_coord_new[layer_old].ymax = y_new;
+            } else if (y_new == (*curr_bb_coord)[layer_old].ymax) {
+                bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax + 1;
+                bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
+            } else {
+                bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax;
+                bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
             }
         }
     }
@@ -2932,14 +2957,14 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
     }
 }
 
-static void update_bb_edge(ClusterNetId net_id,
-                           std::vector<t_2D_tbb>& bb_edge_new,
-                           std::vector<t_2D_tbb>& bb_coord_new,
-                           std::vector<int>& bb_layer_pin_sink_count,
-                           const int& old_num_block_on_edge,
-                           const int& old_edge_coord,
-                           int& new_num_block_on_edge,
-                           int& new_edge_coord) {
+static inline void update_bb_edge(ClusterNetId net_id,
+                                  std::vector<t_2D_tbb>& bb_edge_new,
+                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<int>& bb_layer_pin_sink_count,
+                                  const int& old_num_block_on_edge,
+                                  const int& old_edge_coord,
+                                  int& new_num_block_on_edge,
+                                  int& new_edge_coord) {
     if (old_num_block_on_edge == 1) {
         get_bb_from_scratch(net_id,
                             bb_edge_new,
@@ -2966,11 +2991,21 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.xmax = x_new;
     } else if (x_new == bb_coord_old.xmax) {
         bb_edge_new.xmax = bb_edge_old.xmax + 1;
-    } else if (x_new < bb_coord_old.xmin) {
+        bb_coord_new.xmax = bb_coord_old.xmax;
+    } else {
+        bb_edge_new.xmax = bb_edge_old.xmax;
+        bb_coord_new.xmax = bb_coord_old.xmax;
+    }
+
+    if (x_new < bb_coord_old.xmin) {
         bb_edge_new.xmin = 1;
         bb_coord_new.xmin = x_new;
     } else if (x_new == bb_coord_old.xmin) {
         bb_edge_new.xmin = bb_edge_old.xmin + 1;
+        bb_coord_new.xmin = bb_coord_old.xmin;
+    } else {
+        bb_edge_new.xmin = bb_edge_old.xmin;
+        bb_coord_new.xmin = bb_coord_old.xmin;
     }
 
     if (y_new > bb_coord_old.ymax) {
@@ -2978,11 +3013,21 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.ymax = y_new;
     } else if (y_new == bb_coord_old.ymax) {
         bb_edge_new.ymax = bb_edge_old.ymax + 1;
-    } else if (y_new < bb_coord_old.ymin) {
+        bb_coord_new.ymax = bb_coord_old.ymax;
+    } else {
+        bb_edge_new.ymax = bb_edge_old.ymax;
+        bb_coord_new.ymax = bb_coord_old.ymax;
+    }
+
+    if (y_new < bb_coord_old.ymin) {
         bb_edge_new.ymin = 1;
         bb_coord_new.ymin = y_new;
     } else if (y_new == bb_coord_old.ymin) {
         bb_edge_new.ymin = bb_edge_old.ymin + 1;
+        bb_coord_new.ymin = bb_coord_old.ymin;
+    } else {
+        bb_edge_new.ymin = bb_edge_old.ymin;
+        bb_coord_new.ymin = bb_coord_old.ymin;
     }
 }
 

From 05800d92898dc16472d29f864e3bda851db603fb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 09:49:10 -0400
Subject: [PATCH 116/257] write update_bb_same_layer to update bb when block is
 moved on the same layer

---
 vpr/src/place/place.cpp | 228 ++++++++++++++++++++++++----------------
 1 file changed, 138 insertions(+), 90 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 34e9164e753..dabab2bab8e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -339,6 +339,15 @@ static void update_bb(ClusterNetId net_id,
                       t_physical_tile_loc pin_new_loc,
                       bool is_output_pin);
 
+static inline void update_bb_same_layer(ClusterNetId net_id,
+                                        const t_physical_tile_loc& pin_old_loc,
+                                        const t_physical_tile_loc& pin_new_loc,
+                                        const std::vector<t_2D_tbb>& curr_bb_edge,
+                                        const std::vector<t_2D_tbb>& curr_bb_coord,
+                                        std::vector<int>& bb_pin_sink_count_new,
+                                        std::vector<t_2D_tbb>& bb_edge_new,
+                                        std::vector<t_2D_tbb>& bb_coord_new);
+
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
@@ -2794,153 +2803,192 @@ static void update_bb(ClusterNetId net_id,
                              bb_pin_sink_count_new,
                              is_output_pin);
 
+
+    int layer_old = pin_old_loc.layer_num;
+    int layer_new = pin_new_loc.layer_num;
+    bool layer_changed = (layer_old != layer_new);
+
+    if(layer_changed) {
+        update_bb_layer_changed();
+    } else {
+        update_bb_same_layer(net_id,
+                             pin_old_loc,
+                             pin_new_loc,
+                             *curr_bb_edge,
+                             *curr_bb_coord,
+                             bb_pin_sink_count_new,
+                             bb_edge_new,
+                             bb_coord_new);
+    }
+
+
+
+    if (layer_changed) {
+        add_block_to_bb(pin_new_loc,
+                        (*curr_bb_edge)[layer_new],
+                        (*curr_bb_coord)[layer_new],
+                        bb_edge_new[layer_new],
+                        bb_coord_new[layer_new]);
+    }
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    }
+}
+
+static inline void update_bb_layer_changed() {
+
+}
+
+
+static inline void update_bb_same_layer(ClusterNetId net_id,
+                                        const t_physical_tile_loc& pin_old_loc,
+                                        const t_physical_tile_loc& pin_new_loc,
+                                        const std::vector<t_2D_tbb>& curr_bb_edge,
+                                        const std::vector<t_2D_tbb>& curr_bb_coord,
+                                        std::vector<int>& bb_pin_sink_count_new,
+                                        std::vector<t_2D_tbb>& bb_edge_new,
+                                        std::vector<t_2D_tbb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
     int x_new = pin_new_loc.x;
 
     int y_old = pin_old_loc.y;
     int y_new = pin_new_loc.y;
 
-    int layer_old = pin_old_loc.layer_num;
-    int layer_new = pin_new_loc.layer_num;
-    bool layer_changed = (layer_old != layer_new);
+    int layer_num = pin_old_loc.layer_num;
+    VTR_ASSERT_SAFE(layer_num == pin_new_loc.layer_num);
 
-    if (x_new < x_old || layer_changed) {
-        if (x_old == (*curr_bb_coord)[layer_old].xmax) {
+    if (x_new < x_old) {
+        if (x_old == curr_bb_coord[layer_num].xmax) {
             update_bb_edge(net_id,
                            bb_edge_new,
                            bb_coord_new,
                            bb_pin_sink_count_new,
-                           (*curr_bb_edge)[layer_old].xmax,
-                           (*curr_bb_coord)[layer_old].xmax,
-                           bb_edge_new[layer_old].xmax,
-                           bb_coord_new[layer_old].xmax);
+                           curr_bb_edge[layer_num].xmax,
+                           curr_bb_coord[layer_num].xmax,
+                           bb_edge_new[layer_num].xmax,
+                           bb_coord_new[layer_num].xmax);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         } else {
-            bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax;
-            bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
+            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
+            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
         }
 
-        if (!layer_changed) {
-            if (x_new < (*curr_bb_coord)[layer_old].xmin) {
-                bb_edge_new[layer_old].xmin = 1;
-                bb_coord_new[layer_old].xmin = x_new;
-            } else if (x_new == (*curr_bb_coord)[layer_old].xmin) {
-                bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin + 1;
-                bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
-            } else {
-                bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin;
-                bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
-            }
+        if (x_new < curr_bb_coord[layer_num].xmin) {
+            bb_edge_new[layer_num].xmin = 1;
+            bb_coord_new[layer_num].xmin = x_new;
+        } else if (x_new == curr_bb_coord[layer_num].xmin) {
+            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin + 1;
+            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
+        } else {
+            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
+            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
         }
 
-    } else if (x_new > x_old || layer_changed) {
-        if (x_old == (*curr_bb_coord)[layer_old].xmin) {
+    } else if (x_new > x_old) {
+        if (x_old == curr_bb_coord[layer_num].xmin) {
             update_bb_edge(net_id,
                            bb_edge_new,
                            bb_coord_new,
                            bb_pin_sink_count_new,
-                           (*curr_bb_edge)[layer_old].xmin,
-                           (*curr_bb_coord)[layer_old].xmin,
-                           bb_edge_new[layer_old].xmin,
-                           bb_coord_new[layer_old].xmin);
+                           curr_bb_edge[layer_num].xmin,
+                           curr_bb_coord[layer_num].xmin,
+                           bb_edge_new[layer_num].xmin,
+                           bb_coord_new[layer_num].xmin);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         } else {
-            bb_edge_new[layer_old].xmin = (*curr_bb_edge)[layer_old].xmin;
-            bb_coord_new[layer_old].xmin = (*curr_bb_coord)[layer_old].xmin;
+            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
+            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
         }
 
-        if (!layer_changed) {
-            if (x_new > (*curr_bb_coord)[layer_old].xmax) {
-                bb_edge_new[layer_old].xmax = 1;
-                bb_coord_new[layer_old].xmax = x_new;
-            } else if (x_new == (*curr_bb_coord)[layer_old].xmax) {
-                bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax + 1;
-                bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
-            } else {
-                bb_edge_new[layer_old].xmax = (*curr_bb_edge)[layer_old].xmax;
-                bb_coord_new[layer_old].xmax = (*curr_bb_coord)[layer_old].xmax;
-            }
+        if (x_new > curr_bb_coord[layer_num].xmax) {
+            bb_edge_new[layer_num].xmax = 1;
+            bb_coord_new[layer_num].xmax = x_new;
+        } else if (x_new == curr_bb_coord[layer_num].xmax) {
+            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1;
+            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
+        } else {
+            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
+            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
         }
+
+    } else {
+        /* block has not moved */
+        bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
+        bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
+        bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
+        bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
     }
 
-    if (y_new < y_old || layer_changed) {
-        if (y_old == (*curr_bb_coord)[layer_old].ymax) {
+    if (y_new < y_old) {
+        if (y_old == curr_bb_coord[layer_num].ymax) {
             update_bb_edge(net_id,
                            bb_edge_new,
                            bb_coord_new,
                            bb_pin_sink_count_new,
-                           (*curr_bb_edge)[layer_old].ymax,
-                           (*curr_bb_coord)[layer_old].ymax,
-                           bb_edge_new[layer_old].ymax,
-                           bb_coord_new[layer_old].ymax);
+                           curr_bb_edge[layer_num].ymax,
+                           curr_bb_coord[layer_num].ymax,
+                           bb_edge_new[layer_num].ymax,
+                           bb_coord_new[layer_num].ymax);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         } else {
-            bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax;
-            bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
+            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
+            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
         }
 
-        if (!layer_changed) {
-            if (y_new < (*curr_bb_coord)[layer_old].ymin) {
-                bb_edge_new[layer_old].ymin = 1;
-                bb_coord_new[layer_old].ymin = y_new;
-            } else if (y_new == (*curr_bb_coord)[layer_old].ymin) {
-                bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin + 1;
-                bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
-            } else {
-                bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin;
-                bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
-            }
+        if (y_new < curr_bb_coord[layer_num].ymin) {
+            bb_edge_new[layer_num].ymin = 1;
+            bb_coord_new[layer_num].ymin = y_new;
+        } else if (y_new == curr_bb_coord[layer_num].ymin) {
+            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin + 1;
+            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
+        } else {
+            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
+            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
         }
 
-    } else if (y_new > y_old || layer_changed) {
-        if (y_old == (*curr_bb_coord)[layer_old].ymin) {
+    } else if (y_new > y_old) {
+        if (y_old == curr_bb_coord[layer_num].ymin) {
             update_bb_edge(net_id,
                            bb_edge_new,
                            bb_coord_new,
                            bb_pin_sink_count_new,
-                           (*curr_bb_edge)[layer_old].ymin,
-                           (*curr_bb_coord)[layer_old].ymin,
-                           bb_edge_new[layer_old].ymin,
-                           bb_coord_new[layer_old].ymin);
+                           curr_bb_edge[layer_num].ymin,
+                           curr_bb_coord[layer_num].ymin,
+                           bb_edge_new[layer_num].ymin,
+                           bb_coord_new[layer_num].ymin);
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
         } else {
-            bb_edge_new[layer_old].ymin = (*curr_bb_edge)[layer_old].ymin;
-            bb_coord_new[layer_old].ymin = (*curr_bb_coord)[layer_old].ymin;
+            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
+            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
         }
 
-        if (!layer_changed) {
-            if (y_new > (*curr_bb_coord)[layer_old].ymax) {
-                bb_edge_new[layer_old].ymax = 1;
-                bb_coord_new[layer_old].ymax = y_new;
-            } else if (y_new == (*curr_bb_coord)[layer_old].ymax) {
-                bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax + 1;
-                bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
-            } else {
-                bb_edge_new[layer_old].ymax = (*curr_bb_edge)[layer_old].ymax;
-                bb_coord_new[layer_old].ymax = (*curr_bb_coord)[layer_old].ymax;
-            }
+        if (y_new > curr_bb_coord[layer_num].ymax) {
+            bb_edge_new[layer_num].ymax = 1;
+            bb_coord_new[layer_num].ymax = y_new;
+        } else if (y_new == curr_bb_coord[layer_num].ymax) {
+            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax + 1;
+            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
+        } else {
+            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
+            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
         }
+    } else {
+        /* block has not moved */
+        bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
+        bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
+        bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
+        bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
     }
 
-    if (layer_changed) {
-        add_block_to_bb(pin_new_loc,
-                        (*curr_bb_edge)[layer_new],
-                        (*curr_bb_coord)[layer_new],
-                        bb_edge_new[layer_new],
-                        bb_coord_new[layer_new]);
-    }
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    }
 }
 
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,

From 897ac388016070527828501c2af1d77245d5f00c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 10:04:22 -0400
Subject: [PATCH 117/257] write update_bb_layer_changed to update bb when block
 is moved to a different layer

---
 vpr/src/place/place.cpp | 133 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 117 insertions(+), 16 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index dabab2bab8e..2ff3b514c9b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -348,6 +348,15 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         std::vector<t_2D_tbb>& bb_edge_new,
                                         std::vector<t_2D_tbb>& bb_coord_new);
 
+static inline void update_bb_layer_changed(ClusterNetId net_id,
+                                           const t_physical_tile_loc& pin_old_loc,
+                                           const t_physical_tile_loc& pin_new_loc,
+                                           const std::vector<t_2D_tbb>& curr_bb_edge,
+                                           const std::vector<t_2D_tbb>& curr_bb_coord,
+                                           std::vector<int>& bb_pin_sink_count_new,
+                                           std::vector<t_2D_tbb>& bb_edge_new,
+                                           std::vector<t_2D_tbb>& bb_coord_new);
+
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
@@ -2809,7 +2818,14 @@ static void update_bb(ClusterNetId net_id,
     bool layer_changed = (layer_old != layer_new);
 
     if(layer_changed) {
-        update_bb_layer_changed();
+        update_bb_layer_changed(net_id,
+                                pin_old_loc,
+                                pin_new_loc,
+                                *curr_bb_edge,
+                                *curr_bb_coord,
+                                bb_pin_sink_count_new,
+                                bb_edge_new,
+                                bb_coord_new);
     } else {
         update_bb_same_layer(net_id,
                              pin_old_loc,
@@ -2821,26 +2837,11 @@ static void update_bb(ClusterNetId net_id,
                              bb_coord_new);
     }
 
-
-
-    if (layer_changed) {
-        add_block_to_bb(pin_new_loc,
-                        (*curr_bb_edge)[layer_new],
-                        (*curr_bb_coord)[layer_new],
-                        bb_edge_new[layer_new],
-                        bb_coord_new[layer_new]);
-    }
-
     if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         bb_updated_before[net_id] = UPDATED_ONCE;
     }
 }
 
-static inline void update_bb_layer_changed() {
-
-}
-
-
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
                                         const t_physical_tile_loc& pin_new_loc,
@@ -2991,6 +2992,106 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
 
 }
 
+static inline void update_bb_layer_changed(ClusterNetId net_id,
+                                           const t_physical_tile_loc& pin_old_loc,
+                                           const t_physical_tile_loc& pin_new_loc,
+                                           const std::vector<t_2D_tbb>& curr_bb_edge,
+                                           const std::vector<t_2D_tbb>& curr_bb_coord,
+                                           std::vector<int>& bb_pin_sink_count_new,
+                                           std::vector<t_2D_tbb>& bb_edge_new,
+                                           std::vector<t_2D_tbb>& bb_coord_new) {
+
+    int x_old = pin_old_loc.x;
+    int x_new = pin_new_loc.x;
+
+    int y_old = pin_old_loc.y;
+    int y_new = pin_new_loc.y;
+
+    int old_layer_num = pin_old_loc.layer_num;
+    int new_layer_num = pin_new_loc.layer_num;
+    VTR_ASSERT_SAFE(old_layer_num != new_layer_num);
+
+    if (x_old == curr_bb_coord[old_layer_num].xmax) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].xmax,
+                       curr_bb_coord[old_layer_num].xmax,
+                       bb_edge_new[old_layer_num].xmax,
+                       bb_coord_new[old_layer_num].xmax);
+        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            return;
+        }
+        bb_edge_new[old_layer_num].xmin = curr_bb_edge[old_layer_num].xmin;
+        bb_coord_new[old_layer_num].xmin = curr_bb_coord[old_layer_num].xmin;
+    } else if (x_old == curr_bb_coord[old_layer_num].xmin) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].xmin,
+                       curr_bb_coord[old_layer_num].xmin,
+                       bb_edge_new[old_layer_num].xmin,
+                       bb_coord_new[old_layer_num].xmin);
+        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            return;
+        }
+        bb_edge_new[old_layer_num].xmax = curr_bb_edge[old_layer_num].xmax;
+        bb_coord_new[old_layer_num].xmax = curr_bb_coord[old_layer_num].xmax;
+    } else {
+        /* block has not moved */
+        bb_edge_new[old_layer_num].xmin = curr_bb_edge[old_layer_num].xmin;
+        bb_coord_new[old_layer_num].xmin = curr_bb_coord[old_layer_num].xmin;
+        bb_edge_new[old_layer_num].xmax = curr_bb_edge[old_layer_num].xmax;
+        bb_coord_new[old_layer_num].xmax = curr_bb_coord[old_layer_num].xmax;
+    }
+
+    if (y_old == curr_bb_coord[old_layer_num].ymax) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].ymax,
+                       curr_bb_coord[old_layer_num].ymax,
+                       bb_edge_new[old_layer_num].ymax,
+                       bb_coord_new[old_layer_num].ymax);
+        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            return;
+        }
+        bb_edge_new[old_layer_num].ymin = curr_bb_edge[old_layer_num].ymin;
+        bb_coord_new[old_layer_num].ymin = curr_bb_coord[old_layer_num].ymin;
+    } else if (y_old == curr_bb_coord[old_layer_num].ymin) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].ymin,
+                       curr_bb_coord[old_layer_num].ymin,
+                       bb_edge_new[old_layer_num].ymin,
+                       bb_coord_new[old_layer_num].ymin);
+        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+            return;
+        }
+        bb_edge_new[old_layer_num].ymax = curr_bb_edge[old_layer_num].ymax;
+        bb_coord_new[old_layer_num].ymax = curr_bb_coord[old_layer_num].ymax;
+
+    } else {
+        /* block has not moved */
+        bb_edge_new[old_layer_num].ymin = curr_bb_edge[old_layer_num].ymin;
+        bb_coord_new[old_layer_num].ymin = curr_bb_coord[old_layer_num].ymin;
+        bb_edge_new[old_layer_num].ymax = curr_bb_edge[old_layer_num].ymax;
+        bb_coord_new[old_layer_num].ymax = curr_bb_coord[old_layer_num].ymax;
+    }
+
+    add_block_to_bb(pin_new_loc,
+                    curr_bb_edge[new_layer_num],
+                    curr_bb_coord[new_layer_num],
+                    bb_edge_new[new_layer_num],
+                    bb_coord_new[new_layer_num]);
+
+}
+
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,

From 3b9ce85e57434107ef4790dbbdf70542ca5bfca7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 10:07:50 -0400
Subject: [PATCH 118/257] remove unused var

---
 vpr/src/place/place.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2ff3b514c9b..18436ab4f97 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3002,10 +3002,8 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            std::vector<t_2D_tbb>& bb_coord_new) {
 
     int x_old = pin_old_loc.x;
-    int x_new = pin_new_loc.x;
 
     int y_old = pin_old_loc.y;
-    int y_new = pin_new_loc.y;
 
     int old_layer_num = pin_old_loc.layer_num;
     int new_layer_num = pin_new_loc.layer_num;

From 9cf4d7ff9d438bbe41d555dec783e54e6d7255e7 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 10:34:23 -0400
Subject: [PATCH 119/257] remove having separate bbs for median move generator

---
 vpr/src/place/median_move_generator.cpp | 124 +++++++++++-------------
 1 file changed, 59 insertions(+), 65 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index ae5767f4d0b..77e66b9b559 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -5,9 +5,9 @@
 #include "placer_globals.h"
 #include "move_utils.h"
 
-static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew);
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew);
 
-static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, ClusterBlockId block_id, bool& skip_net);
+static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net);
 
 e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) {
     //Find a movable block based on blk_type
@@ -38,8 +38,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     /* Calculate the median region */
     t_pl_loc to;
 
-    std::vector<t_2D_tbb> coords(num_layers, t_2D_tbb(OPEN, OPEN, OPEN, OPEN, OPEN));
-    t_2D_tbb limit_coords;
+    t_tbb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
+    t_tbb limit_coords;
     ClusterBlockId bnum;
     int pnum, xnew, xold, ynew, yold;
 
@@ -157,17 +157,17 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
  * Currently assumes channels on both sides of the CLBs forming the   *
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
-static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, ClusterBlockId block_id, bool& skip_net) {
+static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net) {
     //TODO: account for multiple physical pin instances per logical pin
 
     skip_net = true;
 
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    std::vector<int> xmin(num_layers, OPEN);
-    std::vector<int> xmax(num_layers, OPEN);
-    std::vector<int> ymin(num_layers, OPEN);
-    std::vector<int> ymax(num_layers, OPEN);
+    int xmin = OPEN;
+    int xmax = OPEN;
+    int ymin = OPEN;
+    int ymax = OPEN;
 
     int pnum;
 
@@ -176,7 +176,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
     auto& device_ctx = g_vpr_ctx.device();
 
     ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    std::vector<bool> first_block(num_layers, false);
+    bool first_block = false;
 
     if (bnum != block_id) {
         skip_net = false;
@@ -184,13 +184,11 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
         int src_x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
         int src_y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
 
-        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-            xmin[layer_num] = src_x;
-            ymin[layer_num] = src_y;
-            xmax[layer_num] = src_x;
-            ymax[layer_num] = src_y;
-            first_block[layer_num] = true;
-        }
+        xmin = src_x;
+        ymin = src_y;
+        xmax = src_x;
+        ymax = src_y;
+        first_block = true;
     }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
@@ -201,26 +199,25 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
         skip_net = false;
         int x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
         int y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
-        int layer_num = place_ctx.block_locs[bnum].loc.layer;
-
-        if (!first_block[layer_num]) {
-            xmin[layer_num] = x;
-            ymin[layer_num] = y;
-            xmax[layer_num] = x;
-            ymax[layer_num] = y;
-            first_block[layer_num] = true;
+
+        if (!first_block) {
+            xmin = x;
+            ymin = y;
+            xmax = x;
+            ymax = y;
+            first_block = true;
             continue;
         }
-        if (x < xmin[layer_num]) {
-            xmin[layer_num] = x;
-        } else if (x > xmax[layer_num]) {
-            xmax[layer_num] = x;
+        if (x < xmin) {
+            xmin = x;
+        } else if (x > xmax) {
+            xmax = x;
         }
 
-        if (y < ymin[layer_num]) {
-            ymin[layer_num] = y;
-        } else if (y > ymax[layer_num]) {
-            ymax[layer_num] = y;
+        if (y < ymin) {
+            ymin = y;
+        } else if (y > ymax) {
+            ymax = y;
         }
     }
 
@@ -231,13 +228,10 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
      * channel immediately to the left of the bounding box, I want to    *
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        VTR_ASSERT_SAFE(first_block[layer_num]);
-        bb_coord_new[layer_num].xmin = std::max(std::min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymin = std::max(std::min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-        bb_coord_new[layer_num].xmax = std::max(std::min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymax = std::max(std::min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    }
+    bb_coord_new.xmin = std::max(std::min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymin = std::max(std::min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.xmax = std::max(std::min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymax = std::max(std::min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 }
 
 /*
@@ -253,7 +247,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, std::vector
  * the pins always lie on the outside of the bounding box.            *
  * The x and y coordinates are the pin's x and y coordinates.         */
 /* IO blocks are considered to be one cell in for simplicity.         */
-static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew) {
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew) {
     //TODO: account for multiple physical pin instances per logical pin
 
     const t_2D_tbb *curr_bb_edge, *curr_bb_coord;
@@ -280,20 +274,20 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
             if (curr_bb_edge->xmax == 1) {
                 return false;
             } else {
-                bb_coord_new[layer].xmax = curr_bb_coord->xmax;
+                bb_coord_new.xmax = curr_bb_coord->xmax;
             }
         } else { /* Move to left, old postion was not at xmax. */
-            bb_coord_new[layer].xmax = curr_bb_coord->xmax;
+            bb_coord_new.xmax = curr_bb_coord->xmax;
         }
 
         /* Now do the xmin fields for coordinates and number of edges. */
 
         if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
-            bb_coord_new[layer].xmin = xnew;
+            bb_coord_new.xmin = xnew;
         } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
-            bb_coord_new[layer].xmin = xnew;
+            bb_coord_new.xmin = xnew;
         } else { /* Xmin unchanged. */
-            bb_coord_new[layer].xmin = curr_bb_coord->xmin;
+            bb_coord_new.xmin = curr_bb_coord->xmin;
         }
         /* End of move to left case. */
 
@@ -305,25 +299,25 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
             if (curr_bb_edge->xmin == 1) {
                 return false;
             } else {
-                bb_coord_new[layer].xmin = curr_bb_coord->xmin;
+                bb_coord_new.xmin = curr_bb_coord->xmin;
             }
         } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new[layer].xmin = curr_bb_coord->xmin;
+            bb_coord_new.xmin = curr_bb_coord->xmin;
         }
         /* Now do the xmax fields for coordinates and number of edges. */
 
         if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
-            bb_coord_new[layer].xmax = xnew;
+            bb_coord_new.xmax = xnew;
         } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
-            bb_coord_new[layer].xmax = xnew;
+            bb_coord_new.xmax = xnew;
         } else { /* Xmax unchanged. */
-            bb_coord_new[layer].xmax = curr_bb_coord->xmax;
+            bb_coord_new.xmax = curr_bb_coord->xmax;
         }
         /* End of move to right case. */
 
     } else { /* xnew == xold -- no x motion. */
-        bb_coord_new[layer].xmin = curr_bb_coord->xmin;
-        bb_coord_new[layer].xmax = curr_bb_coord->xmax;
+        bb_coord_new.xmin = curr_bb_coord->xmin;
+        bb_coord_new.xmax = curr_bb_coord->xmax;
     }
 
     /* Now account for the y-direction motion. */
@@ -336,20 +330,20 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
             if (curr_bb_edge->ymax == 1) {
                 return false;
             } else {
-                bb_coord_new[layer].ymax = curr_bb_coord->ymax;
+                bb_coord_new.ymax = curr_bb_coord->ymax;
             }
         } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new[layer].ymax = curr_bb_coord->ymax;
+            bb_coord_new.ymax = curr_bb_coord->ymax;
         }
 
         /* Now do the ymin fields for coordinates and number of edges. */
 
         if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
-            bb_coord_new[layer].ymin = ynew;
+            bb_coord_new.ymin = ynew;
         } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
-            bb_coord_new[layer].ymin = ynew;
+            bb_coord_new.ymin = ynew;
         } else { /* ymin unchanged. */
-            bb_coord_new[layer].ymin = curr_bb_coord->ymin;
+            bb_coord_new.ymin = curr_bb_coord->ymin;
         }
         /* End of move down case. */
 
@@ -361,26 +355,26 @@ static bool get_bb_incrementally(ClusterNetId net_id, std::vector<t_2D_tbb>& bb_
             if (curr_bb_edge->ymin == 1) {
                 return false;
             } else {
-                bb_coord_new[layer].ymin = curr_bb_coord->ymin;
+                bb_coord_new.ymin = curr_bb_coord->ymin;
             }
         } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new[layer].ymin = curr_bb_coord->ymin;
+            bb_coord_new.ymin = curr_bb_coord->ymin;
         }
 
         /* Now do the ymax fields for coordinates and number of edges. */
 
         if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
-            bb_coord_new[layer].ymax = ynew;
+            bb_coord_new.ymax = ynew;
         } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
-            bb_coord_new[layer].ymax = ynew;
+            bb_coord_new.ymax = ynew;
         } else { /* ymax unchanged. */
-            bb_coord_new[layer].ymax = curr_bb_coord->ymax;
+            bb_coord_new.ymax = curr_bb_coord->ymax;
         }
         /* End of move up case. */
 
     } else { /* ynew == yold -- no y motion. */
-        bb_coord_new[layer].ymin = curr_bb_coord->ymin;
-        bb_coord_new[layer].ymax = curr_bb_coord->ymax;
+        bb_coord_new.ymin = curr_bb_coord->ymin;
+        bb_coord_new.ymax = curr_bb_coord->ymax;
     }
     return true;
 }

From 0e637ca6a711d024bfe0682bca1253c6e62bae14 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 10:37:04 -0400
Subject: [PATCH 120/257] minor debugging - change order of parameters

---
 vpr/src/place/median_move_generator.cpp | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 77e66b9b559..2ba70dcc678 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -5,7 +5,7 @@
 #include "placer_globals.h"
 #include "move_utils.h"
 
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew);
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int layer);
 
 static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net);
 
@@ -38,8 +38,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     /* Calculate the median region */
     t_pl_loc to;
 
-    t_tbb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
-    t_tbb limit_coords;
+    t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
+    t_bb limit_coords;
     ClusterBlockId bnum;
     int pnum, xnew, xold, ynew, yold;
 
@@ -92,18 +92,17 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 ynew = place_move_ctx.bb_coords[net_id][block_layer].ymin;
             }
 
-            if (!get_bb_incrementally(net_id, coords, block_layer, xold, yold, xnew, ynew)) {
+            if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew, block_layer)) {
                 get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net);
                 if (skip_net)
                     continue;
             }
         }
         //push the calculated coorinates into X,Y coord vectors
-        auto merged_coords = union_2d_tbb(coords);
-        place_move_ctx.X_coord.push_back(merged_coords.xmin);
-        place_move_ctx.X_coord.push_back(merged_coords.xmax);
-        place_move_ctx.Y_coord.push_back(merged_coords.ymin);
-        place_move_ctx.Y_coord.push_back(merged_coords.ymax);
+        place_move_ctx.X_coord.push_back(coords.xmin);
+        place_move_ctx.X_coord.push_back(coords.xmax);
+        place_move_ctx.Y_coord.push_back(coords.ymin);
+        place_move_ctx.Y_coord.push_back(coords.ymax);
     }
 
     if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) {
@@ -162,8 +161,6 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
 
     skip_net = true;
 
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-
     int xmin = OPEN;
     int xmax = OPEN;
     int ymin = OPEN;
@@ -247,7 +244,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
  * the pins always lie on the outside of the bounding box.            *
  * The x and y coordinates are the pin's x and y coordinates.         */
 /* IO blocks are considered to be one cell in for simplicity.         */
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int layer, int xold, int yold, int xnew, int ynew) {
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     const t_2D_tbb *curr_bb_edge, *curr_bb_coord;

From e1ba007eec0b6afb3da6651951f433b3782c0a9b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 28 Sep 2023 10:38:46 -0400
Subject: [PATCH 121/257] remove an unused variable

---
 vpr/src/place/median_move_generator.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 2ba70dcc678..5fe2f738632 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -28,8 +28,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
-    int num_layers = device_ctx.grid.get_num_layers();
-
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});

From b50ba9844b311d22b37bf23c0a7675d155491482 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 29 Sep 2023 18:08:29 -0400
Subject: [PATCH 122/257] add two new vars to placer context to store layer bb

---
 vpr/src/place/placer_context.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 8cb80942fe7..467252ea6f7 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -91,11 +91,19 @@ struct PlacerRuntimeContext : public Context {
  */
 struct PlacerMoveContext : public Context {
   public:
+
+    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
+    vtr::vector<ClusterNetId, t_bb> bb_num_on_edges;
+    
+    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
+    vtr::vector<ClusterNetId, t_bb> bb_coords;
+
+
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
-    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_num_on_edges;
+    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_bb_num_on_edges;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
-    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> bb_coords;
+    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
     vtr::vector<ClusterNetId, std::vector<int>> num_sink_pin_layer;

From e4e7e7beac46032267e578cd5e7ada7c590713be Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 30 Sep 2023 13:03:10 -0400
Subject: [PATCH 123/257] use a 3d bb for median_move_generator

---
 vpr/src/place/median_move_generator.cpp | 20 ++++++++++----------
 vpr/src/place/placer_context.h          |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 5fe2f738632..5cfe3a6dc4a 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -78,16 +78,16 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
 
             //To calulate the bb incrementally while excluding the moving block
             //assume that the moving block is moved to a non-critical coord of the bb
-            if (place_move_ctx.bb_coords[net_id][block_layer].xmin == xold) {
-                xnew = place_move_ctx.bb_coords[net_id][block_layer].xmax;
+            if (place_move_ctx.bb_coords[net_id].xmin == xold) {
+                xnew = place_move_ctx.bb_coords[net_id].xmax;
             } else {
-                xnew = place_move_ctx.bb_coords[net_id][block_layer].xmin;
+                xnew = place_move_ctx.bb_coords[net_id].xmin;
             }
 
-            if (place_move_ctx.bb_coords[net_id][block_layer].ymin == yold) {
-                ynew = place_move_ctx.bb_coords[net_id][block_layer].ymax;
+            if (place_move_ctx.bb_coords[net_id].ymin == yold) {
+                ynew = place_move_ctx.bb_coords[net_id].ymax;
             } else {
-                ynew = place_move_ctx.bb_coords[net_id][block_layer].ymin;
+                ynew = place_move_ctx.bb_coords[net_id].ymin;
             }
 
             if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew, block_layer)) {
@@ -242,10 +242,10 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
  * the pins always lie on the outside of the bounding box.            *
  * The x and y coordinates are the pin's x and y coordinates.         */
 /* IO blocks are considered to be one cell in for simplicity.         */
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int layer) {
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int /* layer */) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    const t_2D_tbb *curr_bb_edge, *curr_bb_coord;
+    const t_bb *curr_bb_edge, *curr_bb_coord;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -256,8 +256,8 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
     yold = std::max(std::min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
     /* The net had NOT been updated before, could use the old values */
-    curr_bb_coord = &(place_move_ctx.bb_coords[net_id][layer]);
-    curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id][layer]);
+    curr_bb_coord = &(place_move_ctx.bb_coords[net_id]);
+    curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id]);
 
     /* Check if I can update the bounding box incrementally. */
 
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 467252ea6f7..bdf914d6ba5 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -94,7 +94,7 @@ struct PlacerMoveContext : public Context {
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, t_bb> bb_num_on_edges;
-    
+
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
     vtr::vector<ClusterNetId, t_bb> bb_coords;
 

From 2d3f9e3304f064fe2738e864ac16fd0cbe8ff90f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 10:13:40 -0400
Subject: [PATCH 124/257] add ts_bb* vars for a time when a 2D FPGA is used

---
 vpr/src/place/place.cpp | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 18436ab4f97..536a0e0a7c2 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -148,7 +148,8 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 
 /* The following arrays are used by the try_swap function for speed.   */
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
-static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> ts_bb_coord_new, ts_bb_edge_new;
+static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_ts_bb_coord_new, layer_ts_bb_edge_new;
 static vtr::vector<ClusterNetId, std::vector<int>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
@@ -2442,8 +2443,13 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     vtr::release_memory(net_cost);
     vtr::release_memory(proposed_net_cost);
-    vtr::release_memory(place_move_ctx.bb_coords);
     vtr::release_memory(place_move_ctx.bb_num_on_edges);
+    vtr::release_memory(place_move_ctx.bb_coords);
+
+    vtr::release_memory(place_move_ctx.layer_bb_num_on_edges);
+    vtr::release_memory(place_move_ctx.layer_bb_coords);
+
+    vtr::release_memory(place_move_ctx.num_sink_pin_layer);
 
     vtr::release_memory(bb_updated_before);
 
@@ -2465,9 +2471,15 @@ static void alloc_and_load_try_swap_structs() {
 
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-    ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
+    if (num_layers == 1) {
+        ts_bb_coord_new.resize(num_nets, t_bb());
+        ts_bb_edge_new.resize(num_nets, t_bb());
+    } else {
+        VTR_ASSERT(num_layers > 1);
+        layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
+    }
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2477,6 +2489,8 @@ static void alloc_and_load_try_swap_structs() {
 static void free_try_swap_structs() {
     vtr::release_memory(ts_bb_edge_new);
     vtr::release_memory(ts_bb_coord_new);
+    vtr::release_memory(layer_ts_bb_edge_new);
+    vtr::release_memory(layer_ts_bb_coord_new);
     vtr::release_memory(ts_layer_sink_pin_count);
     vtr::release_memory(ts_nets_to_update);
 

From 0f978166eb748c67ea23d3967ff3516ca7a394dc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 10:22:19 -0400
Subject: [PATCH 125/257] impl get_bb_from_scratch and the separate layer
 version of it

---
 vpr/src/place/place.cpp | 122 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 111 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 536a0e0a7c2..5d6cea85b12 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -411,9 +411,13 @@ static double get_net_cost(ClusterNetId net_id,
                            const std::vector<int>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
-                                std::vector<t_2D_tbb>& num_on_edges,
-                                std::vector<t_2D_tbb>& coords,
-                                std::vector<int>& layer_pin_sink_count);
+                                t_bb& coords,
+                                t_bb& num_on_edges);
+
+static void get_layer_bb_from_scratch(ClusterNetId net_id,
+                                      std::vector<t_2D_tbb>& num_on_edges,
+                                      std::vector<t_2D_tbb>& coords,
+                                      std::vector<int>& layer_pin_sink_count);
 
 static double get_net_wirelength_estimate(ClusterNetId net_id,
                                           const std::vector<t_2D_tbb>& bbptr,
@@ -2401,11 +2405,16 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
-    place_move_ctx.bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-
-    place_move_ctx.bb_coords.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
 
-    place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
+    if (num_layers == 1) {
+        place_move_ctx.bb_coords.resize(num_nets, t_bb());
+        place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
+    } else {
+        VTR_ASSERT(num_layers > 1);
+        place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
+    }
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
@@ -2502,10 +2511,101 @@ static void free_try_swap_structs() {
  * from only the block location information).  It updates both the       *
  * coordinate and number of pins on each edge information.  It           *
  * should only be called when the bounding box information is not valid. */
-static void get_bb_from_scratch(ClusterNetId net_id,
-                                std::vector<t_2D_tbb>& num_on_edges,
-                                std::vector<t_2D_tbb>& coords,
-                                std::vector<int>& layer_pin_sink_count) {
+static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_edges) {
+    int pnum, x, y, xmin, xmax, ymin, ymax;
+    int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& grid = device_ctx.grid;
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+    VTR_ASSERT(pnum >= 0);
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+    x = max(min<int>(x, grid.width() - 2), 1);
+    y = max(min<int>(y, grid.height() - 2), 1);
+
+    xmin = x;
+    ymin = y;
+    xmax = x;
+    ymax = y;
+    xmin_edge = 1;
+    ymin_edge = 1;
+    xmax_edge = 1;
+    ymax_edge = 1;
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
+         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
+         * I always take all channels impinging on the bounding box to be within   *
+         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
+         * the which channels are included within the bounding box, and it         *
+         * simplifies the code a lot.                                              */
+
+        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
+        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
+
+        if (x == xmin) {
+            xmin_edge++;
+        }
+        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
+            xmax_edge++;
+        } else if (x < xmin) {
+            xmin = x;
+            xmin_edge = 1;
+        } else if (x > xmax) {
+            xmax = x;
+            xmax_edge = 1;
+        }
+
+        if (y == ymin) {
+            ymin_edge++;
+        }
+        if (y == ymax) {
+            ymax_edge++;
+        } else if (y < ymin) {
+            ymin = y;
+            ymin_edge = 1;
+        } else if (y > ymax) {
+            ymax = y;
+            ymax_edge = 1;
+        }
+    }
+
+    /* Copy the coordinates and number on edges information into the proper   *
+     * structures.                                                            */
+    coords.xmin = xmin;
+    coords.xmax = xmax;
+    coords.ymin = ymin;
+    coords.ymax = ymax;
+
+    num_on_edges.xmin = xmin_edge;
+    num_on_edges.xmax = xmax_edge;
+    num_on_edges.ymin = ymin_edge;
+    num_on_edges.ymax = ymax_edge;
+}
+
+/* This routine finds the bounding box of each net from scratch (i.e.   *
+ * from only the block location information).  It updates both the       *
+ * coordinate and number of pins on each edge information.  It           *
+ * should only be called when the bounding box information is not valid. */
+static void get_layer_bb_from_scratch(ClusterNetId net_id,
+                                      std::vector<t_2D_tbb>& num_on_edges,
+                                      std::vector<t_2D_tbb>& coords,
+                                      std::vector<int>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
     num_on_edges.resize(num_layers, t_2D_tbb());

From 006c2d32c29c359f55840ecdc31cb090e802d757 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 10:36:37 -0400
Subject: [PATCH 126/257] impl get_net_cost and get_net_layer_cost

---
 vpr/src/place/place.cpp | 53 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 5d6cea85b12..fbecf96255e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -406,9 +406,11 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c
 
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static double get_net_cost(ClusterNetId net_id,
-                           const std::vector<t_2D_tbb>& bbptr,
-                           const std::vector<int>& layer_pin_sink_count);
+static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
+
+static double get_net_layer_cost(ClusterNetId /* net_id */,
+                                 const std::vector<t_2D_tbb>& bbptr,
+                                 const std::vector<int>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
@@ -1841,6 +1843,7 @@ static int find_affected_nets_and_update_costs(
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     int num_affected_nets = 0;
 
@@ -1879,9 +1882,15 @@ static int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                 ts_bb_coord_new[net_id],
-                                                 ts_layer_sink_pin_count[net_id]);
+        if (num_layers == 1) {
+            proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                     ts_bb_coord_new[net_id]);
+        } else {
+            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
+                                                           layer_ts_bb_coord_new[net_id],
+                                                           ts_layer_sink_pin_count[net_id]);
+        }
+
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
     }
 
@@ -2757,9 +2766,35 @@ static double get_net_wirelength_estimate(ClusterNetId /* net_id */,
     return (ncost);
 }
 
-static double get_net_cost(ClusterNetId /* net_id */,
-                           const std::vector<t_2D_tbb>& bbptr,
-                           const std::vector<int>& layer_pin_sink_count) {
+static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing
+            * chanx_place_cost_fac[bbptr.ymax][bbptr.ymin - 1];
+
+    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing
+             * chany_place_cost_fac[bbptr.xmax][bbptr.xmin - 1];
+
+    return (ncost);
+}
+
+static double get_net_layer_cost(ClusterNetId /* net_id */,
+                                 const std::vector<t_2D_tbb>& bbptr,
+                                 const std::vector<int>& layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 

From e2ccdba10e036407005f6229f89440af029e6abb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 10:56:48 -0400
Subject: [PATCH 127/257] impl update_layer_bb update_bb

---
 vpr/src/place/place.cpp | 228 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 213 insertions(+), 15 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index fbecf96255e..2d3e249451e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -333,12 +333,18 @@ static void get_non_updateable_bb(ClusterNetId net_id,
                                   std::vector<int>& num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
-                      std::vector<t_2D_tbb>& bb_edge_new,
-                      std::vector<t_2D_tbb>& bb_coord_new,
-                      std::vector<int>& bb_pin_sink_count_new,
+                      t_bb& bb_edge_new,
+                      t_bb& bb_coord_new,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc,
-                      bool is_output_pin);
+                      t_physical_tile_loc pin_new_loc);
+
+static void update_layer_bb(ClusterNetId net_id,
+                            std::vector<t_2D_tbb>& bb_edge_new,
+                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<int>& bb_pin_sink_count_new,
+                            t_physical_tile_loc pin_old_loc,
+                            t_physical_tile_loc pin_new_loc,
+                            bool is_output_pin);
 
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
@@ -1953,10 +1959,8 @@ static void update_net_bb(const ClusterNetId net,
         update_bb(net,
                   ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
-                  ts_layer_sink_pin_count[net],
                   pin_old_loc,
-                  pin_new_loc,
-                  pin_dir == e_pin_type::DRIVER);
+                  pin_new_loc);
     }
 }
 
@@ -2905,12 +2909,206 @@ static void get_non_updateable_bb(ClusterNetId net_id,
 }
 
 static void update_bb(ClusterNetId net_id,
-                      std::vector<t_2D_tbb>& bb_edge_new,
-                      std::vector<t_2D_tbb>& bb_coord_new,
-                      std::vector<int>& bb_pin_sink_count_new,
+                      t_bb& bb_edge_new,
+                      t_bb& bb_coord_new,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc,
-                      bool is_output_pin) {
+                      t_physical_tile_loc pin_new_loc) {
+    /* Updates the bounding box of a net by storing its coordinates in    *
+     * the bb_coord_new data structure and the number of blocks on each   *
+     * edge in the bb_edge_new data structure.  This routine should only  *
+     * be called for large nets, since it has some overhead relative to   *
+     * just doing a brute force bounding box calculation.  The bounding   *
+     * box coordinate and edge information for inet must be valid before  *
+     * this routine is called.                                            *
+     * Currently assumes channels on both sides of the CLBs forming the   *
+     * edges of the bounding box can be used.  Essentially, I am assuming *
+     * the pins always lie on the outside of the bounding box.            *
+     * The x and y coordinates are the pin's x and y coordinates.         */
+    /* IO blocks are considered to be one cell in for simplicity.         */
+    //TODO: account for multiple physical pin instances per logical pin
+    const t_bb *curr_bb_edge, *curr_bb_coord;
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& place_move_ctx = g_placer_ctx.move();
+
+    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+
+    /* Check if the net had been updated before. */
+    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
+        /* The net had been updated from scratch, DO NOT update again! */
+        return;
+    } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        /* The net had NOT been updated before, could use the old values */
+        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    } else {
+        /* The net had been updated before, must use the new values */
+        curr_bb_coord = &bb_coord_new;
+        curr_bb_edge = &bb_edge_new;
+    }
+
+    /* Check if I can update the bounding box incrementally. */
+
+    if (pin_new_loc.x < pin_old_loc.x) { /* Move to left. */
+
+        /* Update the xmax fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
+            if (curr_bb_edge->xmax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.xmax = curr_bb_edge->xmax - 1;
+                bb_coord_new.xmax = curr_bb_coord->xmax;
+            }
+        } else { /* Move to left, old postion was not at xmax. */
+            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_edge_new.xmax = curr_bb_edge->xmax;
+        }
+
+        /* Now do the xmin fields for coordinates and number of edges. */
+
+        if (pin_new_loc.x < curr_bb_coord->xmin) { /* Moved past xmin */
+            bb_coord_new.xmin = pin_new_loc.x;
+            bb_edge_new.xmin = 1;
+        } else if (pin_new_loc.x == curr_bb_coord->xmin) { /* Moved to xmin */
+            bb_coord_new.xmin = pin_new_loc.x;
+            bb_edge_new.xmin = curr_bb_edge->xmin + 1;
+        } else { /* Xmin unchanged. */
+            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_edge_new.xmin = curr_bb_edge->xmin;
+        }
+        /* End of move to left case. */
+
+    } else if (pin_new_loc.x > pin_old_loc.x) { /* Move to right. */
+
+        /* Update the xmin fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
+            if (curr_bb_edge->xmin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.xmin = curr_bb_edge->xmin - 1;
+                bb_coord_new.xmin = curr_bb_coord->xmin;
+            }
+        } else { /* Move to right, old position was not at xmin. */
+            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_edge_new.xmin = curr_bb_edge->xmin;
+        }
+
+        /* Now do the xmax fields for coordinates and number of edges. */
+
+        if (pin_new_loc.x > curr_bb_coord->xmax) { /* Moved past xmax. */
+            bb_coord_new.xmax = pin_new_loc.x;
+            bb_edge_new.xmax = 1;
+        } else if (pin_new_loc.x == curr_bb_coord->xmax) { /* Moved to xmax */
+            bb_coord_new.xmax = pin_new_loc.x;
+            bb_edge_new.xmax = curr_bb_edge->xmax + 1;
+        } else { /* Xmax unchanged. */
+            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_edge_new.xmax = curr_bb_edge->xmax;
+        }
+        /* End of move to right case. */
+
+    } else { /* pin_new_loc.x == pin_old_loc.x -- no x motion. */
+        bb_coord_new.xmin = curr_bb_coord->xmin;
+        bb_coord_new.xmax = curr_bb_coord->xmax;
+        bb_edge_new.xmin = curr_bb_edge->xmin;
+        bb_edge_new.xmax = curr_bb_edge->xmax;
+    }
+
+    /* Now account for the y-direction motion. */
+
+    if (pin_new_loc.y < pin_old_loc.y) { /* Move down. */
+
+        /* Update the ymax fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
+            if (curr_bb_edge->ymax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.ymax = curr_bb_edge->ymax - 1;
+                bb_coord_new.ymax = curr_bb_coord->ymax;
+            }
+        } else { /* Move down, old postion was not at ymax. */
+            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_edge_new.ymax = curr_bb_edge->ymax;
+        }
+
+        /* Now do the ymin fields for coordinates and number of edges. */
+
+        if (pin_new_loc.y < curr_bb_coord->ymin) { /* Moved past ymin */
+            bb_coord_new.ymin = pin_new_loc.y;
+            bb_edge_new.ymin = 1;
+        } else if (pin_new_loc.y == curr_bb_coord->ymin) { /* Moved to ymin */
+            bb_coord_new.ymin = pin_new_loc.y;
+            bb_edge_new.ymin = curr_bb_edge->ymin + 1;
+        } else { /* ymin unchanged. */
+            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_edge_new.ymin = curr_bb_edge->ymin;
+        }
+        /* End of move down case. */
+
+    } else if (pin_new_loc.y > pin_old_loc.y) { /* Moved up. */
+
+        /* Update the ymin fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
+            if (curr_bb_edge->ymin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.ymin = curr_bb_edge->ymin - 1;
+                bb_coord_new.ymin = curr_bb_coord->ymin;
+            }
+        } else { /* Moved up, old position was not at ymin. */
+            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_edge_new.ymin = curr_bb_edge->ymin;
+        }
+
+        /* Now do the ymax fields for coordinates and number of edges. */
+
+        if (pin_new_loc.y > curr_bb_coord->ymax) { /* Moved past ymax. */
+            bb_coord_new.ymax = pin_new_loc.y;
+            bb_edge_new.ymax = 1;
+        } else if (pin_new_loc.y == curr_bb_coord->ymax) { /* Moved to ymax */
+            bb_coord_new.ymax = pin_new_loc.y;
+            bb_edge_new.ymax = curr_bb_edge->ymax + 1;
+        } else { /* ymax unchanged. */
+            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_edge_new.ymax = curr_bb_edge->ymax;
+        }
+        /* End of move up case. */
+
+    } else { /* pin_new_loc.y == yold -- no y motion. */
+        bb_coord_new.ymin = curr_bb_coord->ymin;
+        bb_coord_new.ymax = curr_bb_coord->ymax;
+        bb_edge_new.ymin = curr_bb_edge->ymin;
+        bb_edge_new.ymax = curr_bb_edge->ymax;
+    }
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = UPDATED_ONCE;
+    }
+}
+
+static void update_layer_bb(ClusterNetId net_id,
+                            std::vector<t_2D_tbb>& bb_edge_new,
+                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<int>& bb_pin_sink_count_new,
+                            t_physical_tile_loc pin_old_loc,
+                            t_physical_tile_loc pin_new_loc,
+                            bool is_output_pin) {
     /* Updates the bounding box of a net by storing its coordinates in    *
      * the bb_coord_new data structure and the number of blocks on each   *
      * edge in the bb_edge_new data structure.  This routine should only  *
@@ -2941,8 +3139,8 @@ static void update_bb(ClusterNetId net_id,
         return;
     } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
-        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
-        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
+        curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id];
         curr_layer_pin_sink_count = &place_move_ctx.num_sink_pin_layer[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {

From 344eceedf7da5eed8cfc6ec16e8566f5a26c43a6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 11:07:28 -0400
Subject: [PATCH 128/257] impl get_non_updateable_bb
 get_non_updateable_layer_bb

---
 vpr/src/place/place.cpp | 73 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 67 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2d3e249451e..4c5ce26164c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -329,8 +329,11 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id,
-                                  std::vector<t_2D_tbb>& bb_coord_new,
-                                  std::vector<int>& num_sink_layer);
+                                  t_bb& bb_coord_new);
+
+static void get_non_updateable_layer_bb(ClusterNetId net_id,
+                                        std::vector<t_2D_tbb>& bb_coord_new,
+                                        std::vector<int>& num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
@@ -1935,8 +1938,7 @@ static void update_net_bb(const ClusterNetId net,
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
             get_non_updateable_bb(net,
-                                  ts_bb_coord_new[net],
-                                  ts_layer_sink_pin_count[net]);
+                                  ts_bb_coord_new[net]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -2838,8 +2840,67 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updateable_bb(ClusterNetId net_id,
-                                  std::vector<t_2D_tbb>& bb_coord_new,
-                                  std::vector<int>& num_sink_layer) {
+                                  t_bb& bb_coord_new) {
+    //TODO: account for multiple physical pin instances per logical pin
+
+    int xmax, ymax, xmin, ymin, x, y;
+    int pnum;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+    xmin = x;
+    ymin = y;
+    xmax = x;
+    ymax = y;
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        if (x < xmin) {
+            xmin = x;
+        } else if (x > xmax) {
+            xmax = x;
+        }
+
+        if (y < ymin) {
+            ymin = y;
+        } else if (y > ymax) {
+            ymax = y;
+        }
+    }
+
+    /* Now I've found the coordinates of the bounding box.  There are no *
+     * channels beyond device_ctx.grid.width()-2 and                     *
+     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
+     * since I'll always include the channel immediately below and the   *
+     * channel immediately to the left of the bounding box, I want to    *
+     * clip to 1 in both directions as well (since minimum channel index *
+     * is 0).  See route_common.cpp for a channel diagram.               */
+
+    bb_coord_new.xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+}
+
+static void get_non_updateable_layer_bb(ClusterNetId net_id,
+                                        std::vector<t_2D_tbb>& bb_coord_new,
+                                        std::vector<int>& num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();

From 95f367a16fd3bc0dcfe79ec2433a22f85914f71b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 11:11:25 -0400
Subject: [PATCH 129/257] impl get_net_wirelength_estimate and
 get_net_layer_wirelength_estimate

---
 vpr/src/place/place.cpp | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4c5ce26164c..2db99aead58 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -430,9 +430,11 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_tbb>& coords,
                                       std::vector<int>& layer_pin_sink_count);
 
-static double get_net_wirelength_estimate(ClusterNetId net_id,
-                                          const std::vector<t_2D_tbb>& bbptr,
-                                          const std::vector<int>& layer_pin_sink_count);
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
+
+static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
+                                                const std::vector<t_2D_tbb>& bbptr,
+                                                const std::vector<int>& layer_pin_sink_count);
 
 static void free_try_swap_arrays();
 
@@ -2740,9 +2742,33 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-static double get_net_wirelength_estimate(ClusterNetId /* net_id */,
-                                          const std::vector<t_2D_tbb>& bbptr,
-                                          const std::vector<int>& layer_pin_sink_count) {
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr) {
+    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
+     * its coordinate bounding box.                                         */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing;
+
+    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing;
+
+    return (ncost);
+}
+
+static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
+                                                const std::vector<t_2D_tbb>& bbptr,
+                                                const std::vector<int>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 

From 8ea2da1f946c57b3ea01867d79f1f24e047fe091 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 11:20:33 -0400
Subject: [PATCH 130/257] impl comp_bb_cost comp_layer_bb_cost

---
 vpr/src/place/place.cpp | 85 +++++++++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 16 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2db99aead58..58c4ca74813 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -271,6 +271,8 @@ static void free_fast_cost_update();
 
 static double comp_bb_cost(e_cost_methods method);
 
+static double comp_layer_bb_cost(e_cost_methods method);
+
 static void update_move_nets(int num_nets_affected);
 static void reset_move_nets(int num_nets_affected);
 
@@ -545,6 +547,8 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
+    int num_layers = device_ctx.grid.get_num_layers();
+
     t_placer_costs costs(placer_opts.place_algorithm);
 
     tatum::TimingPathInfo critical_path;
@@ -653,7 +657,13 @@ void try_place(const Netlist<>& net_list,
     /* Gets initial cost and loads bounding boxes. */
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
-        costs.bb_cost = comp_bb_cost(NORMAL);
+        if (num_layers == 1) {
+            costs.bb_cost = comp_bb_cost(NORMAL);
+        } else {
+            VTR_ASSERT_SAFE(num_layers > 1);
+            costs.bb_cost = comp_layer_bb_cost(NORMAL);
+        }
+
 
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
 
@@ -2330,23 +2340,58 @@ static double comp_bb_cost(e_cost_methods method) {
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
                 && method == NORMAL) {
                 get_bb_from_scratch(net_id,
-                                    place_move_ctx.bb_num_on_edges[net_id],
                                     place_move_ctx.bb_coords[net_id],
-                                    place_move_ctx.num_sink_pin_layer[net_id]);
+                                    place_move_ctx.bb_num_on_edges[net_id]);
             } else {
-                get_non_updateable_bb(net_id,
-                                      place_move_ctx.bb_coords[net_id],
-                                      place_move_ctx.num_sink_pin_layer[net_id]);
+                get_non_updateable_bb(net_id, place_move_ctx.bb_coords[net_id]);
             }
 
-            net_cost[net_id] = get_net_cost(net_id,
-                                            place_move_ctx.bb_coords[net_id],
+            net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
+            cost += net_cost[net_id];
+            if (method == CHECK)
+                expected_wirelength += get_net_wirelength_estimate(net_id, place_move_ctx.bb_coords[net_id]);
+        }
+    }
+
+    if (method == CHECK) {
+        VTR_LOG("\n");
+        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
+                expected_wirelength);
+    }
+    return cost;
+}
+
+
+static double comp_layer_bb_cost(e_cost_methods method) {
+    double cost = 0;
+    double expected_wirelength = 0.0;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Small nets don't use incremental updating on their bounding boxes, *
+             * so they can use a fast bounding box calculator.                    */
+            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
+                && method == NORMAL) {
+                get_layer_bb_from_scratch(net_id,
+                                    place_move_ctx.layer_bb_num_on_edges[net_id],
+                                    place_move_ctx.layer_bb_coords[net_id],
+                                    place_move_ctx.num_sink_pin_layer[net_id]);
+            } else {
+                get_non_updateable_layer_bb(net_id,
+                                            place_move_ctx.layer_bb_coords[net_id],
                                             place_move_ctx.num_sink_pin_layer[net_id]);
+            }
+
+            net_cost[net_id] = get_net_layer_cost(net_id,
+                                                  place_move_ctx.layer_bb_coords[net_id],
+                                                  place_move_ctx.num_sink_pin_layer[net_id]);
             cost += net_cost[net_id];
             if (method == CHECK)
-                expected_wirelength += get_net_wirelength_estimate(net_id,
-                                                                   place_move_ctx.bb_coords[net_id],
-                                                                   place_move_ctx.num_sink_pin_layer[net_id]);
+                expected_wirelength += get_net_layer_wirelength_estimate(net_id,
+                                                                         place_move_ctx.layer_bb_coords[net_id],
+                                                                         place_move_ctx.num_sink_pin_layer[net_id]);
         }
     }
 
@@ -3547,10 +3592,10 @@ static inline void update_bb_edge(ClusterNetId net_id,
                                   int& new_num_block_on_edge,
                                   int& new_edge_coord) {
     if (old_num_block_on_edge == 1) {
-        get_bb_from_scratch(net_id,
-                            bb_edge_new,
-                            bb_coord_new,
-                            bb_layer_pin_sink_count);
+        get_layer_bb_from_scratch(net_id,
+                                  bb_edge_new,
+                                  bb_coord_new,
+                                  bb_layer_pin_sink_count);
         bb_updated_before[net_id] = GOT_FROM_SCRATCH;
         return;
     } else {
@@ -3766,7 +3811,15 @@ static int check_placement_costs(const t_placer_costs& costs,
     double bb_cost_check;
     double timing_cost_check;
 
-    bb_cost_check = comp_bb_cost(CHECK);
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    if (num_layers == 1) {
+        bb_cost_check = comp_bb_cost(CHECK);
+    } else {
+        VTR_ASSERT_SAFE(num_layers > 1);
+        bb_cost_check = comp_layer_bb_cost(CHECK);
+    }
+
     if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * ERROR_TOL) {
         VTR_LOG_ERROR(
             "bb_cost_check: %g and bb_cost: %g differ in check_place.\n",

From 91fa4476d2e2aa5258ced0f0804a66edc8a2748b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 11:46:57 -0400
Subject: [PATCH 131/257] impl update_net_layer_bb update_net_bb

---
 vpr/src/place/place.cpp | 61 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 58c4ca74813..2936918fbb6 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -406,6 +406,13 @@ static void update_net_bb(const ClusterNetId net,
                           int iblk,
                           const ClusterBlockId blk,
                           const ClusterPinId blk_pin);
+
+static void update_net_layer_bb(const ClusterNetId net,
+                                const t_pl_blocks_to_be_moved& blocks_affected,
+                                int iblk,
+                                const ClusterBlockId blk,
+                                const ClusterPinId blk_pin);
+
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
@@ -1887,7 +1894,11 @@ static int find_affected_nets_and_update_costs(
             record_affected_net(net_id, num_affected_nets);
 
             /* Update the net bounding boxes. */
-            update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+            if (num_layers == 1) {
+                update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+            } else {
+                update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
+            }
 
             if (place_algorithm.is_timing_driven()) {
                 /* Determine the change in connection delay and timing cost. */
@@ -1949,8 +1960,7 @@ static void update_net_bb(const ClusterNetId net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net,
-                                  ts_bb_coord_new[net]);
+            get_non_updateable_bb(net, ts_bb_coord_new[net]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -1969,15 +1979,56 @@ static void update_net_bb(const ClusterNetId net,
             blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
             blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
             blocks_affected.moved_blocks[iblk].new_loc.layer);
-        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
         update_bb(net,
-                  ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
+                  ts_bb_edge_new[net],
                   pin_old_loc,
                   pin_new_loc);
     }
 }
 
+static void update_net_layer_bb(const ClusterNetId net,
+                                const t_pl_blocks_to_be_moved& blocks_affected,
+                                int iblk,
+                                const ClusterBlockId blk,
+                                const ClusterPinId blk_pin) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
+        //For small nets brute-force bounding box update is faster
+
+        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
+            get_non_updateable_bb(net,
+                                  ts_bb_coord_new[net]);
+        }
+    } else {
+        //For large nets, update bounding box incrementally
+        int iblk_pin = tile_pin_index(blk_pin);
+
+        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
+        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
+        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
+
+        //Incremental bounding box update
+        t_physical_tile_loc pin_old_loc(
+            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
+            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
+            blocks_affected.moved_blocks[iblk].old_loc.layer);
+        t_physical_tile_loc pin_new_loc(
+            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
+            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
+            blocks_affected.moved_blocks[iblk].new_loc.layer);
+        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
+        update_layer_bb(net,
+                        layer_ts_bb_edge_new[net],
+                        layer_ts_bb_coord_new[net],
+                        ts_layer_sink_pin_count[net],
+                        pin_old_loc,
+                        pin_new_loc,
+                        pin_dir);
+    }
+}
+
 /**
  * @brief Calculate the new connection delay and timing cost of all the
  *        sink pins affected by moving a specific pin to a new location.

From a9e67c35612a32903743cbaba6382ae85652079b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 13:00:23 -0400
Subject: [PATCH 132/257] fix a problem with update_move_nets (use correct data
 structure when multiple layers are used)

---
 vpr/src/place/place.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2936918fbb6..a939fc63e51 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -148,8 +148,8 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 
 /* The following arrays are used by the try_swap function for speed.   */
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
-static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
-static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_ts_bb_coord_new, layer_ts_bb_edge_new;
+static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
+static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
 static vtr::vector<ClusterNetId, std::vector<int>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
@@ -1467,14 +1467,27 @@ static void update_move_nets(int num_nets_affected) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
     for (int inet_affected = 0; inet_affected < num_nets_affected;
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
-        place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
-        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET)
-            place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
+        if (num_layers == 1) {
+            place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
+        } else {
+            place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
+            place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
+        }
+
+
+        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
+            if (num_layers == 1) {
+                place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
+            } else {
+                place_move_ctx.layer_bb_num_on_edges[net_id] = layer_ts_bb_edge_new[net_id];
+            }
+        }
 
         net_cost[net_id] = proposed_net_cost[net_id];
 
@@ -1998,8 +2011,9 @@ static void update_net_layer_bb(const ClusterNetId net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net,
-                                  ts_bb_coord_new[net]);
+            get_non_updateable_layer_bb(net,
+                                        layer_ts_bb_coord_new[net],
+                                        ts_layer_sink_pin_count[net]);
         }
     } else {
         //For large nets, update bounding box incrementally

From 514c68c73d85002c2c73c91ce95c1bf08bc8a192 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 16:35:12 -0400
Subject: [PATCH 133/257] solve the bug in the order of parameters passed to
 update_bb

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a939fc63e51..9d5a1b48863 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1884,7 +1884,7 @@ static int find_affected_nets_and_update_costs(
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     int num_affected_nets = 0;
 
@@ -1993,8 +1993,8 @@ static void update_net_bb(const ClusterNetId net,
             blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
             blocks_affected.moved_blocks[iblk].new_loc.layer);
         update_bb(net,
-                  ts_bb_coord_new[net],
                   ts_bb_edge_new[net],
+                  ts_bb_coord_new[net],
                   pin_old_loc,
                   pin_new_loc);
     }

From b8a20434544bfc2cde5ea32168656c02ed0cc026 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 17:10:17 -0400
Subject: [PATCH 134/257] minor formatting

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9d5a1b48863..766ed690f10 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2608,8 +2608,8 @@ static void alloc_and_load_try_swap_structs() {
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     if (num_layers == 1) {
-        ts_bb_coord_new.resize(num_nets, t_bb());
         ts_bb_edge_new.resize(num_nets, t_bb());
+        ts_bb_coord_new.resize(num_nets, t_bb());
     } else {
         VTR_ASSERT(num_layers > 1);
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
@@ -3139,8 +3139,8 @@ static void update_bb(ClusterNetId net_id,
         return;
     } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
-        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
         curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
+        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */

From 1227a4199b07e0bbc54859184f5f6474c52fa89c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 17:29:12 -0400
Subject: [PATCH 135/257] use a signle layer num to print a node's coordinate

---
 libs/librrgraph/src/base/rr_graph_view.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index f1a9efcaa13..9940ef85ee3 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -238,12 +238,11 @@ class RRGraphView {
      * This function is inlined for runtime optimization.
      */
     inline const std::string node_coordinate_to_string(RRNodeId node) const {
-        std::string start_layer_num;
-        std::string end_layer_num;
         std::string start_x;                                           //start x-coordinate
         std::string start_y;                                           //start y-coordinate
         std::string end_x;                                             //end x-coordinate
         std::string end_y;                                             //end y-coordinate
+        std::string layer_num_str;                                     //layer number
         std::string arrow;                                             //direction arrow
         std::string coordinate_string = node_type_string(node);        //write the component's type as a routing resource node
         coordinate_string += ":" + std::to_string(size_t(node)) + " "; //add the index of the routing resource node
@@ -260,12 +259,12 @@ class RRGraphView {
             // and the end to the lower coordinate
             start_x =  " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
             start_y = std::to_string(node_yhigh(node)) + ",";
-            start_layer_num = std::to_string(node_layer(node)) + ")";
+            layer_num_str = std::to_string(node_layer(node)) + ")";
         } else if (node_type(node) == SOURCE || node_type(node) == SINK) {
             // For SOURCE and SINK the starting and ending coordinate are identical, so just use start
             start_x = " (" + std::to_string(node_xhigh(node)) + ",";
             start_y = std::to_string(node_yhigh(node)) + ",";
-            start_layer_num = std::to_string(node_layer(node)) + ")";
+            layer_num_str = std::to_string(node_layer(node)) + ")";
         } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information
             RRIndexedDataId cost_index = node_cost_index(node);
             int seg_index = rr_indexed_data_[cost_index].seg_index;
@@ -279,28 +278,26 @@ class RRGraphView {
 
                 start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
                 start_y = std::to_string(node_yhigh(node)) + ",";
-                start_layer_num = std::to_string(node_layer(node)) + ")"; //layer number
                 end_x = " (" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
                 end_y = std::to_string(node_ylow(node)) + ",";
-                end_layer_num = std::to_string(node_layer(node)) + ")";
+                layer_num_str = std::to_string(node_layer(node)) + ")";
             }
 
             else {                                                      // signal travels in increasing direction, stays at same point, or can travel both directions
                 start_x = " (" + std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
                 start_y = std::to_string(node_ylow(node)) + ",";
-                start_layer_num = std::to_string(node_layer(node)) + ")"; //layer number
                 end_x = " (" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
                 end_y = std::to_string(node_yhigh(node)) + ",";
-                end_layer_num = std::to_string(node_layer(node)) + ")";
+                layer_num_str = std::to_string(node_layer(node)) + ")"; //layer number
                 if (node_direction(node) == Direction::BIDIR) {
                     arrow = "<->"; //indicate that signal can travel both direction
                 }
             }
         }
 
-        coordinate_string +=  start_x + start_y + start_layer_num; //Write the starting coordinates
+        coordinate_string +=  start_x + start_y + layer_num_str; //Write the starting coordinates
         coordinate_string += arrow;             //Indicate the direction
-        coordinate_string += end_x + end_y + end_layer_num;     //Write the end coordinates
+        coordinate_string += end_x + end_y + layer_num_str;     //Write the end coordinates
         return coordinate_string;
     }
 

From d9fa13761fff9403768abde2695adb447253ad12 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 17:40:11 -0400
Subject: [PATCH 136/257] add write_initial_placement option to place opt

---
 vpr/src/base/SetupVPR.cpp | 2 ++
 vpr/src/base/vpr_types.h  | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index b5492a1f8ec..42f9b913b65 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -622,6 +622,8 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
 
     PlacerOpts->constraints_file = Options.constraints_file;
 
+    PlacerOpts->write_initial_place_file = Options.write_initial_place_file;
+
     PlacerOpts->pad_loc_type = Options.pad_loc_type;
 
     PlacerOpts->place_chan_width = Options.PlaceChanWidth;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index ccedd4a24a6..63291441917 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1145,6 +1145,9 @@ enum class e_place_delta_delay_algorithm {
  *   @param constraints_file
  *              File that specifies locations of locked down (constrained)
  *              blocks for placement. Empty string means no constraints file.
+ *   @param write_initial_place_file
+ *              Write the initial placement into this file. Empty string means
+ *              the initial placement is not written.
  *   @param pad_loc_file
  *              File to read pad locations from if pad_loc_type is USER.
  *   @param place_freq
@@ -1187,6 +1190,7 @@ struct t_placer_opts {
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;
     std::string constraints_file;
+    std::string write_initial_place_file;
     enum pfreq place_freq;
     int recompute_crit_iter;
     int inner_loop_recompute_divider;

From ec3a8ac4bf200e2d37fa6d301b699a599b502324 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 17:49:47 -0400
Subject: [PATCH 137/257] add is_init_place to print_place method

---
 vpr/src/base/place_and_route.cpp    | 2 +-
 vpr/src/base/read_place.cpp         | 9 +++++++--
 vpr/src/base/read_place.h           | 3 ++-
 vpr/src/base/vpr_api.cpp            | 2 +-
 vpr/src/base/vpr_context.h          | 7 +++++++
 vpr/src/base/vpr_signal_handler.cpp | 2 +-
 vpr/src/place/place.cpp             | 6 +++---
 7 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index c34cd9cfbae..65a66f331d2 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -355,7 +355,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                     auto& cluster_ctx = g_vpr_ctx.clustering();
                     // Cluster-based net_list is used for placement
                     print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(),
-                                filename_opts.PlaceFile.c_str());
+                                filename_opts.PlaceFile.c_str(), false);
                 }
             }
 
diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index 17c40e4781e..cecda65aca6 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -309,7 +309,8 @@ void read_place_body(std::ifstream& placement_file,
  */
 void print_place(const char* net_file,
                  const char* net_id,
-                 const char* place_file) {
+                 const char* place_file,
+                 bool is_initial_place) {
     FILE* fp;
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -342,5 +343,9 @@ void print_place(const char* net_file,
     fclose(fp);
 
     //Calculate the ID of the placement
-    place_ctx.placement_id = vtr::secure_digest_file(place_file);
+    if(is_initial_place) {
+        place_ctx.initial_placement_id = vtr::secure_digest_file(place_file);
+    } else {
+        place_ctx.placement_id = vtr::secure_digest_file(place_file);
+    }
 }
diff --git a/vpr/src/base/read_place.h b/vpr/src/base/read_place.h
index 36740a5dc5d..06c38cc629d 100644
--- a/vpr/src/base/read_place.h
+++ b/vpr/src/base/read_place.h
@@ -19,6 +19,7 @@ void read_constraints(const char* constraints_file);
 
 void print_place(const char* net_file,
                  const char* net_id,
-                 const char* place_file);
+                 const char* place_file,
+                 bool is_initial_place);
 
 #endif
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 9f379f84e42..89972fbb4bb 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -755,7 +755,7 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch&
 
     print_place(filename_opts.NetFile.c_str(),
                 cluster_ctx.clb_nlist.netlist_id().c_str(),
-                filename_opts.PlaceFile.c_str());
+                filename_opts.PlaceFile.c_str(), false);
 }
 
 void vpr_load_placement(t_vpr_setup& vpr_setup, const t_arch& arch) {
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 82e7be31249..49904cde327 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -390,6 +390,13 @@ struct PlacementContext : public Context {
      */
     t_compressed_block_grids compressed_block_grids;
 
+    /**
+     * @brief SHA256 digest of the .init.place file
+     *
+     * Used for unique identification and consistency checking
+     */
+    std::string initial_placement_id;
+
     /**
      * @brief SHA256 digest of the .place file
      *
diff --git a/vpr/src/base/vpr_signal_handler.cpp b/vpr/src/base/vpr_signal_handler.cpp
index a8fff7b4394..efe95d100e6 100644
--- a/vpr/src/base/vpr_signal_handler.cpp
+++ b/vpr/src/base/vpr_signal_handler.cpp
@@ -90,7 +90,7 @@ void checkpoint() {
 
     std::string placer_checkpoint_file = "placer_checkpoint.place";
     VTR_LOG("Attempting to checkpoint current placement to file: %s\n", placer_checkpoint_file.c_str());
-    print_place(nullptr, nullptr, placer_checkpoint_file.c_str());
+    print_place(nullptr, nullptr, placer_checkpoint_file.c_str(), false);
 
     std::string router_checkpoint_file = "router_checkpoint.route";
     VTR_LOG("Attempting to checkpoint current routing to file: %s\n", router_checkpoint_file.c_str());
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 766ed690f10..3503c3a138e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -825,7 +825,7 @@ void try_place(const Netlist<>& net_list,
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0,
                                                0);
         VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str());
+        print_place(nullptr, nullptr, filename.c_str(), false);
     }
 
     first_move_lim = get_initial_move_lim(placer_opts, annealing_sched);
@@ -1060,7 +1060,7 @@ void try_place(const Netlist<>& net_list,
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
                                                state.num_temps + 1, 0);
         VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str());
+        print_place(nullptr, nullptr, filename.c_str(), false);
     }
 
     // TODO:
@@ -1293,7 +1293,7 @@ static void placement_inner_loop(const t_annealing_state* state,
             VTR_LOG(
                 "Saving placement to file at temperature move %d / %d: %s\n",
                 inner_iter, state->move_lim, filename.c_str());
-            print_place(nullptr, nullptr, filename.c_str());
+            print_place(nullptr, nullptr, filename.c_str(), false);
             ++inner_placement_save_count;
         }
     }

From a90955fe36041a299b7a119aa13cf93d62158c11 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 18:02:12 -0400
Subject: [PATCH 138/257] print initial placement if the parameter is passed

---
 vpr/src/place/place.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3503c3a138e..8f371bde318 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -635,6 +635,13 @@ void try_place(const Netlist<>& net_list,
                       placer_opts.constraints_file.c_str(),
                       noc_opts.noc);
 
+    if (!placer_opts.write_initial_place_file.empty()) {
+        print_place(nullptr,
+                    nullptr,
+                    (placer_opts.write_initial_place_file + ".init.place").c_str(),
+                    true);
+    }
+
 #ifdef ENABLE_ANALYTIC_PLACE
     /*
      * Analytic Placer:

From 98cfa497d932cb28e36379922ccb9c2b0144a423 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 18:11:40 -0400
Subject: [PATCH 139/257] add an assertion to t_2D_tbb to check that layer_num
 is bigger than or equal to 0

---
 vpr/src/base/vpr_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 63291441917..90297e18ffb 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -570,9 +570,8 @@ struct t_net_power {
 };
 
 /**
- * @brief Stores the bounding box of a net in terms of the minimum and
- *        maximum coordinates of the blocks forming the net, clipped to
- *        the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1, 0..device_ctx.grid.num_layers()-1)
+ * @brief Stores a 3D bounding box in terms of the minimum and
+ *        maximum coordinates: x, y, layer
  */
 struct t_bb {
     t_bb() = default;
@@ -609,6 +608,7 @@ struct t_2D_tbb {
         , layer_num(layer_num_) {
         VTR_ASSERT(xmax_ >= xmin_);
         VTR_ASSERT(ymax_ >= ymin_);
+        VTR_ASSERT(layer_num_ >= 0);
     }
     int xmin = OPEN;
     int xmax = OPEN;

From f2eb8799efe9f804861f81f9b6ed9a84a19c3ea1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 18:19:00 -0400
Subject: [PATCH 140/257] rename t_2D_tbb to *_bb

---
 vpr/src/base/vpr_types.h       |   6 +-
 vpr/src/place/move_utils.cpp   |  14 ++---
 vpr/src/place/move_utils.h     |   2 +-
 vpr/src/place/place.cpp        | 100 ++++++++++++++++-----------------
 vpr/src/place/placer_context.h |   4 +-
 5 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 90297e18ffb..fb6d4ef614b 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -598,9 +598,9 @@ struct t_bb {
  * @brief Stores a 2D bounding box.
  * @note layer_num indicates the layer of the bounding box
  */
-struct t_2D_tbb {
-    t_2D_tbb() = default;
-    t_2D_tbb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_num_)
+struct t_2D_bb {
+    t_2D_bb() = default;
+    t_2D_bb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_num_)
         : xmin(xmin_)
         , xmax(xmax_)
         , ymin(ymin_)
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 367eef97f8b..7a628b23a10 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1329,18 +1329,18 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
     return layer_num;
 }
 
-t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec) {
+t_bb union_2d_bb(const std::vector<t_2D_bb>& bb_vec) {
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    VTR_ASSERT_SAFE((int)tbb_vec.size() == num_layers);
-    t_bb merged_bb(tbb_vec[0].xmin,
-                   tbb_vec[0].xmax,
-                   tbb_vec[0].ymin,
-                   tbb_vec[0].ymax,
+    VTR_ASSERT_SAFE((int)bb_vec.size() == num_layers);
+    t_bb merged_bb(bb_vec[0].xmin,
+                   bb_vec[0].xmax,
+                   bb_vec[0].ymin,
+                   bb_vec[0].ymax,
                    0,
                    num_layers - 1);
 
     for (int layer_num = 1; layer_num < num_layers; layer_num++) {
-        const auto& layer_bb = tbb_vec[layer_num];
+        const auto& layer_bb = bb_vec[layer_num];
         if (layer_bb.xmin == OPEN) {
             continue;
         }
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 5ef1b9f83f4..fc6b344e4d6 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -330,7 +330,7 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 
 int get_random_layer(t_logical_block_type_ptr logical_block);
 
-t_bb union_2d_tbb(const std::vector<t_2D_tbb>& tbb_vec);
+t_bb union_2d_bb(const std::vector<t_2D_bb>& tbb_vec);
 
 #ifdef VTR_ENABLE_DEBUG_LOGGING
 /**
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8f371bde318..a0ec5d0aae0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -149,7 +149,7 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 /* The following arrays are used by the try_swap function for speed.   */
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
 static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
-static vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
+static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
 static vtr::vector<ClusterNetId, std::vector<int>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
@@ -334,7 +334,7 @@ static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new);
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_tbb>& bb_coord_new,
+                                        std::vector<t_2D_bb>& bb_coord_new,
                                         std::vector<int>& num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
@@ -344,8 +344,8 @@ static void update_bb(ClusterNetId net_id,
                       t_physical_tile_loc pin_new_loc);
 
 static void update_layer_bb(ClusterNetId net_id,
-                            std::vector<t_2D_tbb>& bb_edge_new,
-                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<t_2D_bb>& bb_edge_new,
+                            std::vector<t_2D_bb>& bb_coord_new,
                             std::vector<int>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
@@ -354,20 +354,20 @@ static void update_layer_bb(ClusterNetId net_id,
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
                                         const t_physical_tile_loc& pin_new_loc,
-                                        const std::vector<t_2D_tbb>& curr_bb_edge,
-                                        const std::vector<t_2D_tbb>& curr_bb_coord,
+                                        const std::vector<t_2D_bb>& curr_bb_edge,
+                                        const std::vector<t_2D_bb>& curr_bb_coord,
                                         std::vector<int>& bb_pin_sink_count_new,
-                                        std::vector<t_2D_tbb>& bb_edge_new,
-                                        std::vector<t_2D_tbb>& bb_coord_new);
+                                        std::vector<t_2D_bb>& bb_edge_new,
+                                        std::vector<t_2D_bb>& bb_coord_new);
 
 static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_old_loc,
                                            const t_physical_tile_loc& pin_new_loc,
-                                           const std::vector<t_2D_tbb>& curr_bb_edge,
-                                           const std::vector<t_2D_tbb>& curr_bb_coord,
+                                           const std::vector<t_2D_bb>& curr_bb_edge,
+                                           const std::vector<t_2D_bb>& curr_bb_coord,
                                            std::vector<int>& bb_pin_sink_count_new,
-                                           std::vector<t_2D_tbb>& bb_edge_new,
-                                           std::vector<t_2D_tbb>& bb_coord_new);
+                                           std::vector<t_2D_bb>& bb_edge_new,
+                                           std::vector<t_2D_bb>& bb_coord_new);
 
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
@@ -377,8 +377,8 @@ static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      bool is_output_pin);
 
 static inline void update_bb_edge(ClusterNetId net_id,
-                                  std::vector<t_2D_tbb>& bb_edge_new,
-                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<t_2D_bb>& bb_edge_new,
+                                  std::vector<t_2D_bb>& bb_coord_new,
                                   std::vector<int>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
@@ -386,10 +386,10 @@ static inline void update_bb_edge(ClusterNetId net_id,
                                   int& new_edge_coord);
 
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
-                            const t_2D_tbb& bb_edge_old,
-                            const t_2D_tbb& bb_coord_old,
-                            t_2D_tbb& bb_edge_new,
-                            t_2D_tbb& bb_coord_new);
+                            const t_2D_bb& bb_edge_old,
+                            const t_2D_bb& bb_coord_old,
+                            t_2D_bb& bb_edge_new,
+                            t_2D_bb& bb_coord_new);
 
 static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
@@ -427,7 +427,7 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
 static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_tbb>& bbptr,
+                                 const std::vector<t_2D_bb>& bbptr,
                                  const std::vector<int>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
@@ -435,14 +435,14 @@ static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& num_on_edges);
 
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
-                                      std::vector<t_2D_tbb>& num_on_edges,
-                                      std::vector<t_2D_tbb>& coords,
+                                      std::vector<t_2D_bb>& num_on_edges,
+                                      std::vector<t_2D_bb>& coords,
                                       std::vector<int>& layer_pin_sink_count);
 
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_tbb>& bbptr,
+                                                const std::vector<t_2D_bb>& bbptr,
                                                 const std::vector<int>& layer_pin_sink_count);
 
 static void free_try_swap_arrays();
@@ -2545,8 +2545,8 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
     } else {
         VTR_ASSERT(num_layers > 1);
-        place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-        place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+        place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
     }
 
@@ -2619,8 +2619,8 @@ static void alloc_and_load_try_swap_structs() {
         ts_bb_coord_new.resize(num_nets, t_bb());
     } else {
         VTR_ASSERT(num_layers > 1);
-        layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
-        layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_tbb>(num_layers, t_2D_tbb()));
+        layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+        layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
     }
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
@@ -2737,13 +2737,13 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_
  * coordinate and number of pins on each edge information.  It           *
  * should only be called when the bounding box information is not valid. */
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
-                                      std::vector<t_2D_tbb>& num_on_edges,
-                                      std::vector<t_2D_tbb>& coords,
+                                      std::vector<t_2D_bb>& num_on_edges,
+                                      std::vector<t_2D_bb>& coords,
                                       std::vector<int>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
-    num_on_edges.resize(num_layers, t_2D_tbb());
-    coords.resize(num_layers, t_2D_tbb());
+    num_on_edges.resize(num_layers, t_2D_bb());
+    coords.resize(num_layers, t_2D_bb());
     layer_pin_sink_count.resize(num_layers, 0);
     std::vector<int> xmin(num_layers, OPEN);
     std::vector<int> xmax(num_layers, OPEN);
@@ -2884,7 +2884,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr
 }
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_tbb>& bbptr,
+                                                const std::vector<t_2D_bb>& bbptr,
                                                 const std::vector<int>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
@@ -2942,7 +2942,7 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
 }
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_tbb>& bbptr,
+                                 const std::vector<t_2D_bb>& bbptr,
                                  const std::vector<int>& layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
@@ -3042,7 +3042,7 @@ static void get_non_updateable_bb(ClusterNetId net_id,
 }
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_tbb>& bb_coord_new,
+                                        std::vector<t_2D_bb>& bb_coord_new,
                                         std::vector<int>& num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
@@ -3307,8 +3307,8 @@ static void update_bb(ClusterNetId net_id,
 }
 
 static void update_layer_bb(ClusterNetId net_id,
-                            std::vector<t_2D_tbb>& bb_edge_new,
-                            std::vector<t_2D_tbb>& bb_coord_new,
+                            std::vector<t_2D_bb>& bb_edge_new,
+                            std::vector<t_2D_bb>& bb_coord_new,
                             std::vector<int>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
@@ -3326,7 +3326,7 @@ static void update_layer_bb(ClusterNetId net_id,
      * The x and y coordinates are the pin's x and y coordinates.         */
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_tbb> *curr_bb_edge, *curr_bb_coord;
+    const std::vector<t_2D_bb> *curr_bb_edge, *curr_bb_coord;
     const std::vector<int>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -3396,11 +3396,11 @@ static void update_layer_bb(ClusterNetId net_id,
 static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_old_loc,
                                         const t_physical_tile_loc& pin_new_loc,
-                                        const std::vector<t_2D_tbb>& curr_bb_edge,
-                                        const std::vector<t_2D_tbb>& curr_bb_coord,
+                                        const std::vector<t_2D_bb>& curr_bb_edge,
+                                        const std::vector<t_2D_bb>& curr_bb_coord,
                                         std::vector<int>& bb_pin_sink_count_new,
-                                        std::vector<t_2D_tbb>& bb_edge_new,
-                                        std::vector<t_2D_tbb>& bb_coord_new) {
+                                        std::vector<t_2D_bb>& bb_edge_new,
+                                        std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
     int x_new = pin_new_loc.x;
 
@@ -3546,11 +3546,11 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
 static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_old_loc,
                                            const t_physical_tile_loc& pin_new_loc,
-                                           const std::vector<t_2D_tbb>& curr_bb_edge,
-                                           const std::vector<t_2D_tbb>& curr_bb_coord,
+                                           const std::vector<t_2D_bb>& curr_bb_edge,
+                                           const std::vector<t_2D_bb>& curr_bb_coord,
                                            std::vector<int>& bb_pin_sink_count_new,
-                                           std::vector<t_2D_tbb>& bb_edge_new,
-                                           std::vector<t_2D_tbb>& bb_coord_new) {
+                                           std::vector<t_2D_bb>& bb_edge_new,
+                                           std::vector<t_2D_bb>& bb_coord_new) {
 
     int x_old = pin_old_loc.x;
 
@@ -3656,8 +3656,8 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
 }
 
 static inline void update_bb_edge(ClusterNetId net_id,
-                                  std::vector<t_2D_tbb>& bb_edge_new,
-                                  std::vector<t_2D_tbb>& bb_coord_new,
+                                  std::vector<t_2D_bb>& bb_edge_new,
+                                  std::vector<t_2D_bb>& bb_coord_new,
                                   std::vector<int>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
@@ -3677,10 +3677,10 @@ static inline void update_bb_edge(ClusterNetId net_id,
 }
 
 static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
-                            const t_2D_tbb& bb_edge_old,
-                            const t_2D_tbb& bb_coord_old,
-                            t_2D_tbb& bb_edge_new,
-                            t_2D_tbb& bb_coord_new) {
+                            const t_2D_bb& bb_edge_old,
+                            const t_2D_bb& bb_coord_old,
+                            t_2D_bb& bb_edge_new,
+                            t_2D_bb& bb_coord_new) {
     int x_new = new_pin_loc.x;
     int y_new = new_pin_loc.y;
 
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index bdf914d6ba5..f3d2796332c 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -100,10 +100,10 @@ struct PlacerMoveContext : public Context {
 
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
-    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_bb_num_on_edges;
+    vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_num_on_edges;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
-    vtr::vector<ClusterNetId, std::vector<t_2D_tbb>> layer_bb_coords;
+    vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
     vtr::vector<ClusterNetId, std::vector<int>> num_sink_pin_layer;

From 90f758979468051a83da9294c97898ab1f04f74e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 2 Oct 2023 18:22:10 -0400
Subject: [PATCH 141/257] comment on t_2D_bb

---
 vpr/src/base/vpr_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index fb6d4ef614b..1bbe65f87fb 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -595,8 +595,8 @@ struct t_bb {
 };
 
 /**
- * @brief Stores a 2D bounding box.
- * @note layer_num indicates the layer of the bounding box
+ * @brief Stores a 2D bounding box in terms of the minimum and maximum x and y
+ * @note layer_num indicates the layer that the bounding box is on.
  */
 struct t_2D_bb {
     t_2D_bb() = default;

From bc67c4ac5a9b7a109c123dc81a175a19d32df41a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 09:45:48 -0400
Subject: [PATCH 142/257] comment on print_place

---
 vpr/src/base/read_place.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index cecda65aca6..f473f7bfdaa 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -306,6 +306,9 @@ void read_place_body(std::ifstream& placement_file,
  *
  * The architecture and netlist files used to generate this placement are recorded
  * in the file to avoid loading a placement with the wrong support file later.
+ *
+ * The is_initial_place bool indicates whether this is the initial placement or not. It is used to determine the
+ * suffix of the output file name.
  */
 void print_place(const char* net_file,
                  const char* net_id,

From 0b358d880e1d7095069b52b7378eb221f0218987 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 09:49:07 -0400
Subject: [PATCH 143/257] comment on layer_num_blocks in noc_storage

---
 vpr/src/noc/noc_storage.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index cc758e0165a..cd0363d48c9 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -141,6 +141,12 @@ class NocStorage {
      * 
      */
     int device_grid_width;
+    /**
+     * @brief Internal reference to the number of blocks at each layer (width * height). This is necessary
+     * to compute a unique key for a given grid location which we can then use
+     * to get the corresponding physical (hard) router at the given grid
+     * location using 'grid_location_to_router_id'.
+     */
     int layer_num_blocks;
 
     // prevent "copying" of this object

From 30320ba288d24325d4b5ddcf2ac531863d7034fa Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 09:54:54 -0400
Subject: [PATCH 144/257] remove the remaining parts of inter layer move

---
 vpr/src/place/RL_agent_util.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index 5e0162abc63..a2177d4ec5b 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -14,7 +14,6 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
             VTR_LOG("Probability of Weighted_median_move : %f \n", placer_opts.place_static_move_prob[4]);
             VTR_LOG("Probability of Timing_feasible_region_move : %f \n", placer_opts.place_static_move_prob[5]);
             VTR_LOG("Probability of Critical_uniform_move : %f \n", placer_opts.place_static_move_prob[6]);
-            VTR_LOG("Probability of Inter Layer Uniform Move : %f \n", placer_opts.place_static_move_prob[7]);
             move_generator = std::make_unique<StaticMoveGenerator>(placer_opts.place_static_move_prob);
             move_generator2 = std::make_unique<StaticMoveGenerator>(placer_opts.place_static_move_prob);
         } else { //Non-timing driven placement
@@ -46,12 +45,7 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
          *      This state is activated late in the anneal and in the Quench   */
 
         int num_1st_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_1ST_STATE_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES;
-        int num_2nd_state_avail_moves;
-        if (placer_opts.place_algorithm.is_timing_driven()) {
-            num_2nd_state_avail_moves = NUM_PL_MOVE_TYPES;
-        } else {
-            num_2nd_state_avail_moves = NUM_PL_NONTIMING_MOVE_TYPES;
-        }
+        int num_2nd_state_avail_moves = placer_opts.place_algorithm.is_timing_driven() ? NUM_PL_MOVE_TYPES : NUM_PL_NONTIMING_MOVE_TYPES;
 
         if (placer_opts.place_agent_algorithm == E_GREEDY) {
             std::unique_ptr<EpsilonGreedyAgent> karmed_bandit_agent1, karmed_bandit_agent2;

From dd9014a66667e4822e48341ca56844551eacf3c4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 10:03:53 -0400
Subject: [PATCH 145/257] comment on is_multi_layer parameter passed to simple
 rl move generator

---
 vpr/src/place/simpleRL_move_generator.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 714effb3989..1c0f56b9c66 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -212,6 +212,8 @@ class SimpleRLMoveGenerator : public MoveGenerator {
      *
      *   @param agent std::unique_ptr to the agent. Only EpsilonGreedyAgent and SoftmaxAgent types are accepted
      *   by the constructor. If other types are passed, a compile error would be thrown.
+     *
+     *   @param is_multi_layer A boolean value to indicate whether the placement is multi-layer or not
      */
     template<class T,
              class = typename std::enable_if<std::is_same<T, EpsilonGreedyAgent>::value || std::is_same<T, SoftmaxAgent>::value>::type>

From a52f48e1f9609265089cc38e3942a017d8792b4a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 10:48:11 -0400
Subject: [PATCH 146/257] comment on register_block_inter_layer_connection

---
 vpr/src/route/router_lookahead_map.cpp       | 4 ++--
 vpr/src/route/router_lookahead_map_utils.cpp | 6 +++++-
 vpr/src/route/router_lookahead_map_utils.h   | 7 ++++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 8fa32c19152..af5632652df 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -585,7 +585,7 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
     // Store the sinks that have connections to other layers
-    this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
+    this->inter_layer_connection = util::register_block_inter_layer_connection(is_flat_);
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -610,7 +610,7 @@ void MapLookahead::read(const std::string& file) {
     //from the different physical tile type's SOURCEs & OPINs
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
 
-    this->inter_layer_connection = util::register_tiles_with_inter_layer_connection_block(is_flat_);
+    this->inter_layer_connection = util::register_block_inter_layer_connection(is_flat_);
 }
 
 void MapLookahead::read_intra_cluster(const std::string& file) {
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 7f429044fa1..702d07ddd03 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -409,7 +409,10 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
-t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat) {
+t_sink_inter_layer_connection register_block_inter_layer_connection(bool is_flat) {
+    //TODO: This function uses architectural information to determine the inter-layer connections.
+    // However, this infromation should be extracted from the RR graph. This is a temporary solution
+    // Moreover, there is an underlying assumption that the connection pattern for each block type on a layer is the same.
     vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
     auto& device_ctx = g_vpr_ctx.device();
 
@@ -423,6 +426,7 @@ t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(b
 
     t_sink_inter_layer_connection inter_layer_conn;
     inter_layer_conn.resize(num_layers);
+    // Resize inter_layer_conn to accommodate all the layers, block types, and number of classes for each block type
     for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
         const auto& physical_tiles = device_ctx.physical_tile_types;
         int num_physical_tile_types = (int)device_ctx.physical_tile_types.size();
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 5f74607d6cc..133d61afe47 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -294,7 +294,12 @@ typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins
 
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 
-t_sink_inter_layer_connection register_tiles_with_inter_layer_connection_block(bool is_flat);
+/**
+ *
+ * @brief Register which layers has connection to each block type
+ * @return
+ */
+t_sink_inter_layer_connection register_block_inter_layer_connection(bool is_flat);
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 

From bc5d59e084b9cb24e92a42a0df1375ad02dab056 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:13:52 -0400
Subject: [PATCH 147/257] comment on src_opin_inter_layer_delays - remove
 checking for IPIN inter-layer connection

---
 vpr/src/route/router_lookahead_map.cpp       | 15 +++++----------
 vpr/src/route/router_lookahead_map_utils.cpp |  1 +
 vpr/src/route/router_lookahead_map_utils.h   |  1 +
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index af5632652df..e381280311b 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -481,23 +481,18 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         auto from_ptc = rr_graph.node_ptc_num(from_node);
         auto to_ptc = rr_graph.node_ptc_num(to_node);
 
-        if (from_layer_num == to_layer_num || inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) != inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
+        // Currently, we assume inter-layer connections are only from a block output pin to another layer. Thus, if the from and to layers are different,
+        // We use src_opin_inter_layer_delays.
+        if (from_layer_num == to_layer_num) {
             std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc],
                                                                                        from_layer_num,
                                                                                        delta_x,
                                                                                        delta_y);
-        }
-
-        if (from_layer_num != to_layer_num) {
-            float tmp_expected_delay_cost, tmp_expected_cong_cost;
-            std::tie(tmp_expected_delay_cost, tmp_expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
+        } else if (from_layer_num != to_layer_num) {
+            std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
                                                                                                to_layer_num,
                                                                                                delta_x,
                                                                                                delta_y);
-            if (tmp_expected_delay_cost < expected_delay_cost) {
-                expected_delay_cost = tmp_expected_delay_cost;
-                expected_cong_cost = tmp_expected_cong_cost;
-            }
         }
 
         expected_delay_cost *= params.criticality;
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 702d07ddd03..e1b03ea637c 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -629,6 +629,7 @@ static void dijkstra_flood_to_wires(int itile,
                 src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
             } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index) || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
                        && curr_layer_num != node_layer_num) {
+                // Store a CHANX/Y node or a SINK node on another layer that is reachable by the current node.
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;
                 src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay = curr.delay;
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 133d61afe47..5ac6b8eedf2 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -273,6 +273,7 @@ struct t_reachable_wire_inf {
 // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned
 // as the lookahead expected cost. [opin/src layer_num][tile_index][opin/src ptc_number] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
+// Store the wire segments on to_layer_num reachable from a given SOURCE/OPIN
 // [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
 // [from layer num][tile_index][sink ptc number] -> set of layers that have connections to the given sink

From 0ebe3ed82c8953766f8df23ccd9fb19a26a70dad Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:23:16 -0400
Subject: [PATCH 148/257] if a from node is of type CHAN, it should be on the
 same layer as sink

---
 vpr/src/route/router_lookahead_map.cpp | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index e381280311b..adba4a88dc1 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -490,9 +490,9 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                                                                                        delta_y);
         } else if (from_layer_num != to_layer_num) {
             std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
-                                                                                               to_layer_num,
-                                                                                               delta_x,
-                                                                                               delta_y);
+                                                                                       to_layer_num,
+                                                                                       delta_x,
+                                                                                       delta_y);
         }
 
         expected_delay_cost *= params.criticality;
@@ -518,18 +518,9 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         VTR_ASSERT(from_seg_index >= 0);
 
         bool get_cost_entry = true;
-        if (from_layer_num != to_layer_num) {
-            t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
-                                                                                       rr_graph.node_ylow(to_node),
-                                                                                       to_layer_num});
-            auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
-            auto to_ptc = rr_graph.node_ptc_num(to_node);
-            if (inter_layer_connection[to_layer_num][to_tile_index][to_ptc].find(from_layer_num) == inter_layer_connection[to_layer_num][to_tile_index][to_ptc].end()) {
-                get_cost_entry = false;
-                expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
-                expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
-            }
-        }
+        // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node is of type CHANX/Y, it
+        // should be on the same layer as the sink node.
+        VTR_ASSERT(from_layer_num != to_layer_num);
 
         if (get_cost_entry) {
             /* now get the expected cost from our lookahead map */
@@ -540,10 +531,6 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                                                         delta_y);
             expected_delay_cost = cost_entry.delay;
             expected_cong_cost = cost_entry.congestion;
-            if (from_layer_num != to_layer_num) {
-                VTR_ASSERT(std::isfinite(inter_layer_connection_box_sw_delay));
-                expected_delay_cost += inter_layer_connection_box_sw_delay;
-            }
 
             expected_delay_cost *= params.criticality;
             expected_cong_cost *= (1 - params.criticality);

From f52c3b4d872864422209cea1e454026be645e17d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:32:20 -0400
Subject: [PATCH 149/257] remove unused vars

---
 vpr/src/route/router_lookahead_map.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index adba4a88dc1..2018ea8c6e5 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -476,10 +476,8 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                                                                                    to_layer_num});
 
         auto from_tile_index = std::distance(&device_ctx.physical_tile_types[0], from_tile_type);
-        auto to_tile_index = std::distance(&device_ctx.physical_tile_types[0], to_tile_type);
 
         auto from_ptc = rr_graph.node_ptc_num(from_node);
-        auto to_ptc = rr_graph.node_ptc_num(to_node);
 
         // Currently, we assume inter-layer connections are only from a block output pin to another layer. Thus, if the from and to layers are different,
         // We use src_opin_inter_layer_delays.

From 7d2c952ac8aac052c07c4642bfb0a64d95c98151 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:38:25 -0400
Subject: [PATCH 150/257] remove layer assignment in the uniform move

---
 vpr/src/place/uniform_move_generator.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index f46133020fe..6560c32af24 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -26,7 +26,6 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }

From c845ccf395283fb007422388ad397e56e14eabfe Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:45:43 -0400
Subject: [PATCH 151/257] remove t_sink_inter_layer_connection

---
 vpr/src/route/router_lookahead_map.cpp       |  5 --
 vpr/src/route/router_lookahead_map.h         |  2 -
 vpr/src/route/router_lookahead_map_utils.cpp | 57 --------------------
 vpr/src/route/router_lookahead_map_utils.h   |  9 ----
 4 files changed, 73 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 2018ea8c6e5..f0d56601ecc 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -563,9 +563,6 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
-
-    // Store the sinks that have connections to other layers
-    this->inter_layer_connection = util::register_block_inter_layer_connection(is_flat_);
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -589,8 +586,6 @@ void MapLookahead::read(const std::string& file) {
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
     std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
-
-    this->inter_layer_connection = util::register_block_inter_layer_connection(is_flat_);
 }
 
 void MapLookahead::read_intra_cluster(const std::string& file) {
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index 247b3d15ea1..a1c9f895a21 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -21,8 +21,6 @@ class MapLookahead : public RouterLookahead {
     vtr::NdMatrix<util::Cost_Entry, 3> distance_based_min_cost; // [layer_num][dx][dy] -> cost
     // [tile_index][from_layer_num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
     util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
-    // [source_layer_num][tile_index][sink ptc num] -> cost
-    util::t_sink_inter_layer_connection inter_layer_connection;
 
     float inter_layer_connection_box_sw_delay;
     const t_det_routing_arch& det_routing_arch_;
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index e1b03ea637c..01c0e79d16c 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -409,63 +409,6 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
     return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
 }
 
-t_sink_inter_layer_connection register_block_inter_layer_connection(bool is_flat) {
-    //TODO: This function uses architectural information to determine the inter-layer connections.
-    // However, this infromation should be extracted from the RR graph. This is a temporary solution
-    // Moreover, there is an underlying assumption that the connection pattern for each block type on a layer is the same.
-    vtr::ScopedStartFinishTimer timer("Computing sink inter layer lookahead");
-    auto& device_ctx = g_vpr_ctx.device();
-
-    int num_layers = device_ctx.grid.get_num_layers();
-    bool is_multi_layer = (num_layers > 1);
-    if (!is_multi_layer) {
-        return t_sink_inter_layer_connection();
-    }
-    // AM: Currently, for 3D stuff, I am only focusing on the case that flat-router is not enabled. If flat_router is on, I am not sure whether it works.
-    VTR_ASSERT(!is_flat);
-
-    t_sink_inter_layer_connection inter_layer_conn;
-    inter_layer_conn.resize(num_layers);
-    // Resize inter_layer_conn to accommodate all the layers, block types, and number of classes for each block type
-    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
-        const auto& physical_tiles = device_ctx.physical_tile_types;
-        int num_physical_tile_types = (int)device_ctx.physical_tile_types.size();
-
-        inter_layer_conn[from_layer_num].resize(num_physical_tile_types);
-        for (int itile = 0; itile < num_physical_tile_types; itile++) {
-            if (device_ctx.grid.num_instances(&physical_tiles[itile], from_layer_num) == 0) {
-                continue;
-            }
-            int num_classes = (int)physical_tiles[itile].class_inf.size();
-            inter_layer_conn[from_layer_num][itile].resize(num_classes);
-        }
-    }
-
-    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
-        for (int itile = 0; itile < (int)device_ctx.physical_tile_types.size(); itile++) {
-            for (int class_num = 0; class_num < (int)inter_layer_conn[from_layer_num][itile].size(); class_num++) {
-                const auto& physical_tile = device_ctx.physical_tile_types[itile];
-                if (get_class_type_from_class_physical_num(&physical_tile, class_num) == e_pin_type::RECEIVER) {
-                    for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
-                        if (from_layer_num == to_layer_num) {
-                            continue;
-                        } else {
-                            for (int pin_num : get_pin_list_from_class_physical_num(&physical_tile, class_num)) {
-                                if (is_pin_conencted_to_layer(&physical_tile, pin_num, from_layer_num, to_layer_num, num_layers)) {
-                                    inter_layer_conn[from_layer_num][itile][class_num].insert(to_layer_num);
-                                    break;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return inter_layer_conn;
-}
-
 t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
     vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead");
     auto& device_ctx = g_vpr_ctx.device();
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 5ac6b8eedf2..8eeae8357b9 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -276,8 +276,6 @@ typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>
 // Store the wire segments on to_layer_num reachable from a given SOURCE/OPIN
 // [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
 typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
-// [from layer num][tile_index][sink ptc number] -> set of layers that have connections to the given sink
-typedef std::vector<std::vector<std::vector<std::unordered_set<int>>>> t_sink_inter_layer_connection;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -295,13 +293,6 @@ typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins
 
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 
-/**
- *
- * @brief Register which layers has connection to each block type
- * @return
- */
-t_sink_inter_layer_connection register_block_inter_layer_connection(bool is_flat);
-
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 
 t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph,

From d0da8377f3ba8147ad01c2fa13934c57bbe950f6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 11:46:41 -0400
Subject: [PATCH 152/257] remove an unused variable

---
 vpr/src/route/router_lookahead_map.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index f0d56601ecc..054a67a4731 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -471,9 +471,6 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         t_physical_tile_type_ptr from_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
                                                                                      rr_graph.node_ylow(from_node),
                                                                                      from_layer_num});
-        t_physical_tile_type_ptr to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
-                                                                                   rr_graph.node_ylow(to_node),
-                                                                                   to_layer_num});
 
         auto from_tile_index = std::distance(&device_ctx.physical_tile_types[0], from_tile_type);
 

From 9ae8a10703b3b5b58c0ddf42947f5b8673db647e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 17:05:41 -0400
Subject: [PATCH 153/257] remove inter_layer_sw_id field from map lookahead

---
 vpr/src/route/router_lookahead_map.cpp | 16 +---------------
 vpr/src/route/router_lookahead_map.h   |  1 -
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 054a67a4731..2cff4623ea3 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -302,21 +302,7 @@ static void print_router_cost_map(const t_routing_cost_map& router_cost_map);
 /******** Interface class member function definitions ********/
 MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat)
     : det_routing_arch_(det_routing_arch)
-    , is_flat_(is_flat) {
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    if (num_layers > 1) {
-        const auto& sw_inf = g_vpr_ctx.device().all_sw_inf;
-        int inter_layer_sw_id = det_routing_arch_.wire_to_arch_ipin_switch_between_dice;
-        if (inter_layer_sw_id != OPEN) {
-            inter_layer_connection_box_sw_delay = sw_inf.at(inter_layer_sw_id).Tdel();
-        } else {
-            inter_layer_connection_box_sw_delay = std::numeric_limits<float>::max();
-        }
-    } else {
-        VTR_ASSERT(num_layers == 1);
-        inter_layer_connection_box_sw_delay = 0.;
-    }
-}
+    , is_flat_(is_flat) {}
 
 float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_node, const t_conn_cost_params& params, float R_upstream) const {
     auto& device_ctx = g_vpr_ctx.device();
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index a1c9f895a21..d6340acac85 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -22,7 +22,6 @@ class MapLookahead : public RouterLookahead {
     // [tile_index][from_layer_num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
     util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
 
-    float inter_layer_connection_box_sw_delay;
     const t_det_routing_arch& det_routing_arch_;
     bool is_flat_;
 

From d191ec20df649fcc2152d4669daa22871db18804 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 3 Oct 2023 17:52:22 -0400
Subject: [PATCH 154/257] fix a type in an assertion

---
 vpr/src/route/router_lookahead_map.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 2cff4623ea3..8c58d01678a 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -501,7 +501,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         bool get_cost_entry = true;
         // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node is of type CHANX/Y, it
         // should be on the same layer as the sink node.
-        VTR_ASSERT(from_layer_num != to_layer_num);
+        VTR_ASSERT(from_layer_num == to_layer_num);
 
         if (get_cost_entry) {
             /* now get the expected cost from our lookahead map */

From cb9a27052a2f92c1893df745ea09bb2c9780808d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 09:13:32 -0400
Subject: [PATCH 155/257] assign curr_bb data structures to the new one first,
 then update it

---
 vpr/src/place/place.cpp | 77 +++--------------------------------------
 1 file changed, 4 insertions(+), 73 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a0ec5d0aae0..537e418f5d3 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3368,6 +3368,10 @@ static void update_layer_bb(ClusterNetId net_id,
     int layer_new = pin_new_loc.layer_num;
     bool layer_changed = (layer_old != layer_new);
 
+    bb_edge_new = *curr_bb_edge;
+    bb_coord_new = *curr_bb_coord;
+
+
     if(layer_changed) {
         update_bb_layer_changed(net_id,
                                 pin_old_loc,
@@ -3423,9 +3427,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
-        } else {
-            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
-            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
         }
 
         if (x_new < curr_bb_coord[layer_num].xmin) {
@@ -3434,9 +3435,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
         } else if (x_new == curr_bb_coord[layer_num].xmin) {
             bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin + 1;
             bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
-        } else {
-            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
-            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
         }
 
     } else if (x_new > x_old) {
@@ -3452,9 +3450,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
-        } else {
-            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
-            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
         }
 
         if (x_new > curr_bb_coord[layer_num].xmax) {
@@ -3463,17 +3458,8 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
         } else if (x_new == curr_bb_coord[layer_num].xmax) {
             bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1;
             bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
-        } else {
-            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
-            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
         }
 
-    } else {
-        /* block has not moved */
-        bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin;
-        bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
-        bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax;
-        bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
     }
 
     if (y_new < y_old) {
@@ -3489,9 +3475,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
-        } else {
-            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
-            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
         }
 
         if (y_new < curr_bb_coord[layer_num].ymin) {
@@ -3500,9 +3483,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
         } else if (y_new == curr_bb_coord[layer_num].ymin) {
             bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin + 1;
             bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
-        } else {
-            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
-            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
         }
 
     } else if (y_new > y_old) {
@@ -3518,9 +3498,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
                 return;
             }
-        } else {
-            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
-            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
         }
 
         if (y_new > curr_bb_coord[layer_num].ymax) {
@@ -3529,16 +3506,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
         } else if (y_new == curr_bb_coord[layer_num].ymax) {
             bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax + 1;
             bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
-        } else {
-            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
-            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
         }
-    } else {
-        /* block has not moved */
-        bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin;
-        bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
-        bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax;
-        bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
     }
 
 }
@@ -3572,8 +3540,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
         if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
             return;
         }
-        bb_edge_new[old_layer_num].xmin = curr_bb_edge[old_layer_num].xmin;
-        bb_coord_new[old_layer_num].xmin = curr_bb_coord[old_layer_num].xmin;
     } else if (x_old == curr_bb_coord[old_layer_num].xmin) {
         update_bb_edge(net_id,
                        bb_edge_new,
@@ -3586,14 +3552,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
         if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
             return;
         }
-        bb_edge_new[old_layer_num].xmax = curr_bb_edge[old_layer_num].xmax;
-        bb_coord_new[old_layer_num].xmax = curr_bb_coord[old_layer_num].xmax;
-    } else {
-        /* block has not moved */
-        bb_edge_new[old_layer_num].xmin = curr_bb_edge[old_layer_num].xmin;
-        bb_coord_new[old_layer_num].xmin = curr_bb_coord[old_layer_num].xmin;
-        bb_edge_new[old_layer_num].xmax = curr_bb_edge[old_layer_num].xmax;
-        bb_coord_new[old_layer_num].xmax = curr_bb_coord[old_layer_num].xmax;
     }
 
     if (y_old == curr_bb_coord[old_layer_num].ymax) {
@@ -3608,8 +3566,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
         if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
             return;
         }
-        bb_edge_new[old_layer_num].ymin = curr_bb_edge[old_layer_num].ymin;
-        bb_coord_new[old_layer_num].ymin = curr_bb_coord[old_layer_num].ymin;
     } else if (y_old == curr_bb_coord[old_layer_num].ymin) {
         update_bb_edge(net_id,
                        bb_edge_new,
@@ -3622,15 +3578,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
         if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
             return;
         }
-        bb_edge_new[old_layer_num].ymax = curr_bb_edge[old_layer_num].ymax;
-        bb_coord_new[old_layer_num].ymax = curr_bb_coord[old_layer_num].ymax;
-
-    } else {
-        /* block has not moved */
-        bb_edge_new[old_layer_num].ymin = curr_bb_edge[old_layer_num].ymin;
-        bb_coord_new[old_layer_num].ymin = curr_bb_coord[old_layer_num].ymin;
-        bb_edge_new[old_layer_num].ymax = curr_bb_edge[old_layer_num].ymax;
-        bb_coord_new[old_layer_num].ymax = curr_bb_coord[old_layer_num].ymax;
     }
 
     add_block_to_bb(pin_new_loc,
@@ -3689,10 +3636,6 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.xmax = x_new;
     } else if (x_new == bb_coord_old.xmax) {
         bb_edge_new.xmax = bb_edge_old.xmax + 1;
-        bb_coord_new.xmax = bb_coord_old.xmax;
-    } else {
-        bb_edge_new.xmax = bb_edge_old.xmax;
-        bb_coord_new.xmax = bb_coord_old.xmax;
     }
 
     if (x_new < bb_coord_old.xmin) {
@@ -3700,10 +3643,6 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.xmin = x_new;
     } else if (x_new == bb_coord_old.xmin) {
         bb_edge_new.xmin = bb_edge_old.xmin + 1;
-        bb_coord_new.xmin = bb_coord_old.xmin;
-    } else {
-        bb_edge_new.xmin = bb_edge_old.xmin;
-        bb_coord_new.xmin = bb_coord_old.xmin;
     }
 
     if (y_new > bb_coord_old.ymax) {
@@ -3711,10 +3650,6 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.ymax = y_new;
     } else if (y_new == bb_coord_old.ymax) {
         bb_edge_new.ymax = bb_edge_old.ymax + 1;
-        bb_coord_new.ymax = bb_coord_old.ymax;
-    } else {
-        bb_edge_new.ymax = bb_edge_old.ymax;
-        bb_coord_new.ymax = bb_coord_old.ymax;
     }
 
     if (y_new < bb_coord_old.ymin) {
@@ -3722,10 +3657,6 @@ static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
         bb_coord_new.ymin = y_new;
     } else if (y_new == bb_coord_old.ymin) {
         bb_edge_new.ymin = bb_edge_old.ymin + 1;
-        bb_coord_new.ymin = bb_coord_old.ymin;
-    } else {
-        bb_edge_new.ymin = bb_edge_old.ymin;
-        bb_coord_new.ymin = bb_coord_old.ymin;
     }
 }
 

From ba2dd6379eb28d79ddfd0c84b795e3a6c07f3d12 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 09:34:03 -0400
Subject: [PATCH 156/257] use the union of bbs across layers when median
 calculate bb from scratch

---
 vpr/src/place/median_move_generator.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 5cfe3a6dc4a..8793374c362 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -28,6 +28,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
+    bool is_multi_layer = (device_ctx.grid.get_num_layers() > 1);
+
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
@@ -66,6 +68,12 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             if (skip_net)
                 continue;
         } else {
+            t_bb union_bb;
+            if (is_multi_layer) {
+                union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
+            }
+
+            const auto& net_bb_coords = is_multi_layer ? place_move_ctx.bb_coords[net_id] : union_bb;
             //use the incremental update of the bb
             bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
             pnum = tile_pin_index(pin_id);
@@ -78,16 +86,16 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
 
             //To calulate the bb incrementally while excluding the moving block
             //assume that the moving block is moved to a non-critical coord of the bb
-            if (place_move_ctx.bb_coords[net_id].xmin == xold) {
-                xnew = place_move_ctx.bb_coords[net_id].xmax;
+            if (net_bb_coords.xmin == xold) {
+                xnew = net_bb_coords.xmax;
             } else {
-                xnew = place_move_ctx.bb_coords[net_id].xmin;
+                xnew = net_bb_coords.xmin;
             }
 
-            if (place_move_ctx.bb_coords[net_id].ymin == yold) {
-                ynew = place_move_ctx.bb_coords[net_id].ymax;
+            if (net_bb_coords.ymin == yold) {
+                ynew = net_bb_coords.ymax;
             } else {
-                ynew = place_move_ctx.bb_coords[net_id].ymin;
+                ynew = net_bb_coords.ymin;
             }
 
             if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew, block_layer)) {

From fe8ffc3c92abc2798e7ad76f2cb822f22dc059bf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:13:20 -0400
Subject: [PATCH 157/257] impl union_2d_bb_incr

---
 vpr/src/place/move_utils.cpp | 99 ++++++++++++++++++++++++++++++++----
 vpr/src/place/move_utils.h   |  3 ++
 2 files changed, 91 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 7a628b23a10..dbd2b4b8099 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1330,18 +1330,14 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
 }
 
 t_bb union_2d_bb(const std::vector<t_2D_bb>& bb_vec) {
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    VTR_ASSERT_SAFE((int)bb_vec.size() == num_layers);
-    t_bb merged_bb(bb_vec[0].xmin,
-                   bb_vec[0].xmax,
-                   bb_vec[0].ymin,
-                   bb_vec[0].ymax,
-                   0,
-                   num_layers - 1);
-
-    for (int layer_num = 1; layer_num < num_layers; layer_num++) {
-        const auto& layer_bb = bb_vec[layer_num];
+    t_bb merged_bb;
+
+    for (const auto& layer_bb : bb_vec) {
         if (layer_bb.xmin == OPEN) {
+            VTR_ASSERT_SAFE(layer_bb.xmax == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.ymin == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.ymax == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.layer_num == OPEN);
             continue;
         }
         if (merged_bb.xmin == OPEN || layer_bb.xmin < merged_bb.xmin) {
@@ -1356,7 +1352,88 @@ t_bb union_2d_bb(const std::vector<t_2D_bb>& bb_vec) {
         if (merged_bb.ymax == OPEN || layer_bb.ymax > merged_bb.ymax) {
             merged_bb.ymax = layer_bb.ymax;
         }
+        if (merged_bb.layer_min == OPEN || layer_bb.layer_num < merged_bb.layer_min) {
+            merged_bb.layer_min = layer_bb.layer_num;
+        }
+        if (merged_bb.layer_max == OPEN || layer_bb.layer_num > merged_bb.layer_max) {
+            merged_bb.layer_max = layer_bb.layer_num;
+        }
     }
 
     return merged_bb;
 }
+
+std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
+                                       const std::vector<t_2D_bb>& bb_vec) {
+    t_bb merged_num_edge;
+    t_bb merged_bb;
+
+    for (const auto& layer_bb : bb_vec) {
+        if (layer_bb.xmin == OPEN) {
+            VTR_ASSERT_SAFE(layer_bb.xmax == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.ymin == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.ymax == OPEN);
+            VTR_ASSERT_SAFE(layer_bb.layer_num == OPEN);
+            continue;
+        }
+        if (merged_bb.xmin == OPEN || layer_bb.xmin <= merged_bb.xmin) {
+            merged_bb.xmin = layer_bb.xmin;
+            if (layer_bb.xmin == merged_bb.xmin) {
+                VTR_ASSERT_SAFE(merged_num_edge.xmin != OPEN);
+                merged_num_edge.xmin += num_edge_vec[layer_bb.layer_num].xmin;
+            } else {
+                merged_num_edge.xmin = num_edge_vec[merged_bb.layer_min].xmin;
+            }
+        }
+        if (merged_bb.xmax == OPEN || layer_bb.xmax >= merged_bb.xmax) {
+            merged_bb.xmax = layer_bb.xmax;
+            if (layer_bb.xmax == merged_bb.xmax) {
+                VTR_ASSERT_SAFE(merged_num_edge.xmax != OPEN);
+                merged_num_edge.xmax += num_edge_vec[layer_bb.layer_num].xmax;
+            } else {
+                merged_num_edge.xmax = num_edge_vec[merged_bb.layer_max].xmax;
+            }
+        }
+        if (merged_bb.ymin == OPEN || layer_bb.ymin <= merged_bb.ymin) {
+            merged_bb.ymin = layer_bb.ymin;
+            if (layer_bb.ymin == merged_bb.ymin) {
+                VTR_ASSERT_SAFE(merged_num_edge.ymin != OPEN);
+                merged_num_edge.ymin += num_edge_vec[layer_bb.layer_num].ymin;
+            } else {
+                merged_num_edge.ymin = num_edge_vec[merged_bb.layer_min].ymin;
+            }
+        }
+        if (merged_bb.ymax == OPEN || layer_bb.ymax >= merged_bb.ymax) {
+            merged_bb.ymax = layer_bb.ymax;
+            if (layer_bb.ymax == merged_bb.ymax) {
+                VTR_ASSERT_SAFE(merged_num_edge.ymax != OPEN);
+                merged_num_edge.ymax += num_edge_vec[layer_bb.layer_num].ymax;
+            } else {
+                merged_num_edge.ymax = num_edge_vec[merged_bb.layer_max].ymax;
+            }
+        }
+        if (merged_bb.layer_min == OPEN || layer_bb.layer_num <= merged_bb.layer_min) {
+            merged_bb.layer_min = layer_bb.layer_num;
+            if (layer_bb.layer_num == merged_bb.layer_min) {
+                VTR_ASSERT_SAFE(merged_num_edge.layer_min != OPEN);
+                merged_num_edge.layer_min += num_edge_vec[layer_bb.layer_num].layer_num;
+            } else {
+                merged_num_edge.layer_min = num_edge_vec[merged_bb.layer_min].layer_num;
+            }
+        }
+        if (merged_bb.layer_max == OPEN || layer_bb.layer_num >= merged_bb.layer_max) {
+            merged_bb.layer_max = layer_bb.layer_num;
+            if (layer_bb.layer_num == merged_bb.layer_max) {
+                VTR_ASSERT_SAFE(merged_num_edge.layer_max != OPEN);
+                merged_num_edge.layer_max += num_edge_vec[layer_bb.layer_num].layer_num;
+            } else {
+                merged_num_edge.layer_max = num_edge_vec[merged_bb.layer_max].layer_num;
+            }
+        }
+    }
+
+
+    return std::make_pair(merged_num_edge, merged_bb);
+
+
+}
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index fc6b344e4d6..b9f5873425a 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -332,6 +332,9 @@ int get_random_layer(t_logical_block_type_ptr logical_block);
 
 t_bb union_2d_bb(const std::vector<t_2D_bb>& tbb_vec);
 
+std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
+                                       const std::vector<t_2D_bb>& bb_vec);
+
 #ifdef VTR_ENABLE_DEBUG_LOGGING
 /**
  * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets

From aea843776f01629ac2666d5dc5f4ba93e8d5d65e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:16:08 -0400
Subject: [PATCH 158/257] use union_2d_bb_incr in median move incr bb update

---
 vpr/src/place/median_move_generator.cpp | 79 ++++++++++++++-----------
 1 file changed, 43 insertions(+), 36 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 8793374c362..43d61bf5a30 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -253,19 +253,26 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
 static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int /* layer */) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    const t_bb *curr_bb_edge, *curr_bb_coord;
-
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
+    bool is_multi_layer = (device_ctx.grid.get_num_layers() > 1);
+
     xnew = std::max(std::min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     ynew = std::max(std::min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     yold = std::max(std::min<int>(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
 
+    t_bb union_bb_edge;
+    t_bb union_bb;
+    if (is_multi_layer) {
+        std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_coords[net_id],
+                                                             place_move_ctx.layer_bb_num_on_edges[net_id]);
+    }
+
     /* The net had NOT been updated before, could use the old values */
-    curr_bb_coord = &(place_move_ctx.bb_coords[net_id]);
-    curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id]);
+    const t_bb& curr_bb_edge = is_multi_layer ? union_bb_edge : place_move_ctx.bb_num_on_edges[net_id];
+    const t_bb& curr_bb_coord = is_multi_layer ? union_bb : place_move_ctx.bb_coords[net_id];
 
     /* Check if I can update the bounding box incrementally. */
 
@@ -273,24 +280,24 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
         /* Update the xmax fields for coordinates and number of edges first. */
 
-        if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */
-            if (curr_bb_edge->xmax == 1) {
+        if (xold == curr_bb_coord.xmax) { /* Old position at xmax. */
+            if (curr_bb_edge.xmax == 1) {
                 return false;
             } else {
-                bb_coord_new.xmax = curr_bb_coord->xmax;
+                bb_coord_new.xmax = curr_bb_coord.xmax;
             }
         } else { /* Move to left, old postion was not at xmax. */
-            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_coord_new.xmax = curr_bb_coord.xmax;
         }
 
         /* Now do the xmin fields for coordinates and number of edges. */
 
-        if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */
+        if (xnew < curr_bb_coord.xmin) { /* Moved past xmin */
             bb_coord_new.xmin = xnew;
-        } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */
+        } else if (xnew == curr_bb_coord.xmin) { /* Moved to xmin */
             bb_coord_new.xmin = xnew;
         } else { /* Xmin unchanged. */
-            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_coord_new.xmin = curr_bb_coord.xmin;
         }
         /* End of move to left case. */
 
@@ -298,29 +305,29 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
         /* Update the xmin fields for coordinates and number of edges first. */
 
-        if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */
-            if (curr_bb_edge->xmin == 1) {
+        if (xold == curr_bb_coord.xmin) { /* Old position at xmin. */
+            if (curr_bb_edge.xmin == 1) {
                 return false;
             } else {
-                bb_coord_new.xmin = curr_bb_coord->xmin;
+                bb_coord_new.xmin = curr_bb_coord.xmin;
             }
         } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_coord_new.xmin = curr_bb_coord.xmin;
         }
         /* Now do the xmax fields for coordinates and number of edges. */
 
-        if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */
+        if (xnew > curr_bb_coord.xmax) { /* Moved past xmax. */
             bb_coord_new.xmax = xnew;
-        } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */
+        } else if (xnew == curr_bb_coord.xmax) { /* Moved to xmax */
             bb_coord_new.xmax = xnew;
         } else { /* Xmax unchanged. */
-            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_coord_new.xmax = curr_bb_coord.xmax;
         }
         /* End of move to right case. */
 
     } else { /* xnew == xold -- no x motion. */
-        bb_coord_new.xmin = curr_bb_coord->xmin;
-        bb_coord_new.xmax = curr_bb_coord->xmax;
+        bb_coord_new.xmin = curr_bb_coord.xmin;
+        bb_coord_new.xmax = curr_bb_coord.xmax;
     }
 
     /* Now account for the y-direction motion. */
@@ -329,24 +336,24 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
         /* Update the ymax fields for coordinates and number of edges first. */
 
-        if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */
-            if (curr_bb_edge->ymax == 1) {
+        if (yold == curr_bb_coord.ymax) { /* Old position at ymax. */
+            if (curr_bb_edge.ymax == 1) {
                 return false;
             } else {
-                bb_coord_new.ymax = curr_bb_coord->ymax;
+                bb_coord_new.ymax = curr_bb_coord.ymax;
             }
         } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_coord_new.ymax = curr_bb_coord.ymax;
         }
 
         /* Now do the ymin fields for coordinates and number of edges. */
 
-        if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */
+        if (ynew < curr_bb_coord.ymin) { /* Moved past ymin */
             bb_coord_new.ymin = ynew;
-        } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */
+        } else if (ynew == curr_bb_coord.ymin) { /* Moved to ymin */
             bb_coord_new.ymin = ynew;
         } else { /* ymin unchanged. */
-            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_coord_new.ymin = curr_bb_coord.ymin;
         }
         /* End of move down case. */
 
@@ -354,30 +361,30 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
         /* Update the ymin fields for coordinates and number of edges first. */
 
-        if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */
-            if (curr_bb_edge->ymin == 1) {
+        if (yold == curr_bb_coord.ymin) { /* Old position at ymin. */
+            if (curr_bb_edge.ymin == 1) {
                 return false;
             } else {
-                bb_coord_new.ymin = curr_bb_coord->ymin;
+                bb_coord_new.ymin = curr_bb_coord.ymin;
             }
         } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_coord_new.ymin = curr_bb_coord.ymin;
         }
 
         /* Now do the ymax fields for coordinates and number of edges. */
 
-        if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */
+        if (ynew > curr_bb_coord.ymax) { /* Moved past ymax. */
             bb_coord_new.ymax = ynew;
-        } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */
+        } else if (ynew == curr_bb_coord.ymax) { /* Moved to ymax */
             bb_coord_new.ymax = ynew;
         } else { /* ymax unchanged. */
-            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_coord_new.ymax = curr_bb_coord.ymax;
         }
         /* End of move up case. */
 
     } else { /* ynew == yold -- no y motion. */
-        bb_coord_new.ymin = curr_bb_coord->ymin;
-        bb_coord_new.ymax = curr_bb_coord->ymax;
+        bb_coord_new.ymin = curr_bb_coord.ymin;
+        bb_coord_new.ymax = curr_bb_coord.ymax;
     }
     return true;
 }

From e90d4f8b8a593fe17236304f5512243a7e335806 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:22:37 -0400
Subject: [PATCH 159/257] fix a typo

---
 vpr/src/place/median_move_generator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 43d61bf5a30..7156b942946 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -73,7 +73,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
             }
 
-            const auto& net_bb_coords = is_multi_layer ? place_move_ctx.bb_coords[net_id] : union_bb;
+            const auto& net_bb_coords = is_multi_layer ? union_bb: place_move_ctx.bb_coords[net_id];
             //use the incremental update of the bb
             bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
             pnum = tile_pin_index(pin_id);

From 97b8e0a43519d28e96b26e87642043cd75abc41c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:36:41 -0400
Subject: [PATCH 160/257] first num_edge should be updated, then, coords

---
 vpr/src/place/move_utils.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index dbd2b4b8099..bfceb98f483 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1377,58 +1377,58 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
             continue;
         }
         if (merged_bb.xmin == OPEN || layer_bb.xmin <= merged_bb.xmin) {
-            merged_bb.xmin = layer_bb.xmin;
             if (layer_bb.xmin == merged_bb.xmin) {
                 VTR_ASSERT_SAFE(merged_num_edge.xmin != OPEN);
                 merged_num_edge.xmin += num_edge_vec[layer_bb.layer_num].xmin;
             } else {
                 merged_num_edge.xmin = num_edge_vec[merged_bb.layer_min].xmin;
             }
+            merged_bb.xmin = layer_bb.xmin;
         }
         if (merged_bb.xmax == OPEN || layer_bb.xmax >= merged_bb.xmax) {
-            merged_bb.xmax = layer_bb.xmax;
             if (layer_bb.xmax == merged_bb.xmax) {
                 VTR_ASSERT_SAFE(merged_num_edge.xmax != OPEN);
                 merged_num_edge.xmax += num_edge_vec[layer_bb.layer_num].xmax;
             } else {
                 merged_num_edge.xmax = num_edge_vec[merged_bb.layer_max].xmax;
             }
+            merged_bb.xmax = layer_bb.xmax;
         }
         if (merged_bb.ymin == OPEN || layer_bb.ymin <= merged_bb.ymin) {
-            merged_bb.ymin = layer_bb.ymin;
             if (layer_bb.ymin == merged_bb.ymin) {
                 VTR_ASSERT_SAFE(merged_num_edge.ymin != OPEN);
                 merged_num_edge.ymin += num_edge_vec[layer_bb.layer_num].ymin;
             } else {
                 merged_num_edge.ymin = num_edge_vec[merged_bb.layer_min].ymin;
             }
+            merged_bb.ymin = layer_bb.ymin;
         }
         if (merged_bb.ymax == OPEN || layer_bb.ymax >= merged_bb.ymax) {
-            merged_bb.ymax = layer_bb.ymax;
             if (layer_bb.ymax == merged_bb.ymax) {
                 VTR_ASSERT_SAFE(merged_num_edge.ymax != OPEN);
                 merged_num_edge.ymax += num_edge_vec[layer_bb.layer_num].ymax;
             } else {
                 merged_num_edge.ymax = num_edge_vec[merged_bb.layer_max].ymax;
             }
+            merged_bb.ymax = layer_bb.ymax;
         }
         if (merged_bb.layer_min == OPEN || layer_bb.layer_num <= merged_bb.layer_min) {
-            merged_bb.layer_min = layer_bb.layer_num;
             if (layer_bb.layer_num == merged_bb.layer_min) {
                 VTR_ASSERT_SAFE(merged_num_edge.layer_min != OPEN);
                 merged_num_edge.layer_min += num_edge_vec[layer_bb.layer_num].layer_num;
             } else {
                 merged_num_edge.layer_min = num_edge_vec[merged_bb.layer_min].layer_num;
             }
+            merged_bb.layer_min = layer_bb.layer_num;
         }
         if (merged_bb.layer_max == OPEN || layer_bb.layer_num >= merged_bb.layer_max) {
-            merged_bb.layer_max = layer_bb.layer_num;
             if (layer_bb.layer_num == merged_bb.layer_max) {
                 VTR_ASSERT_SAFE(merged_num_edge.layer_max != OPEN);
                 merged_num_edge.layer_max += num_edge_vec[layer_bb.layer_num].layer_num;
             } else {
                 merged_num_edge.layer_max = num_edge_vec[merged_bb.layer_max].layer_num;
             }
+            merged_bb.layer_max = layer_bb.layer_num;
         }
     }
 

From f6e948a53fd222ae88f815c4963a7f1bc66ed800 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:47:55 -0400
Subject: [PATCH 161/257] fix the order of parameter of union_2d_bb_incr

---
 vpr/src/place/median_move_generator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 7156b942946..3a884fa7bc3 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -266,8 +266,8 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
     t_bb union_bb_edge;
     t_bb union_bb;
     if (is_multi_layer) {
-        std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_coords[net_id],
-                                                             place_move_ctx.layer_bb_num_on_edges[net_id]);
+        std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_num_on_edges[net_id],
+                                                             place_move_ctx.layer_bb_coords[net_id]);
     }
 
     /* The net had NOT been updated before, could use the old values */

From b02d7d4403931294196116d45a238f46711106a8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 10:57:09 -0400
Subject: [PATCH 162/257] use the layer num of the current bb

---
 vpr/src/place/move_utils.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index bfceb98f483..fde3090678c 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1381,7 +1381,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.xmin != OPEN);
                 merged_num_edge.xmin += num_edge_vec[layer_bb.layer_num].xmin;
             } else {
-                merged_num_edge.xmin = num_edge_vec[merged_bb.layer_min].xmin;
+                merged_num_edge.xmin = num_edge_vec[layer_bb.layer_num].xmin;
             }
             merged_bb.xmin = layer_bb.xmin;
         }
@@ -1390,7 +1390,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.xmax != OPEN);
                 merged_num_edge.xmax += num_edge_vec[layer_bb.layer_num].xmax;
             } else {
-                merged_num_edge.xmax = num_edge_vec[merged_bb.layer_max].xmax;
+                merged_num_edge.xmax = num_edge_vec[layer_bb.layer_num].xmax;
             }
             merged_bb.xmax = layer_bb.xmax;
         }
@@ -1399,7 +1399,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.ymin != OPEN);
                 merged_num_edge.ymin += num_edge_vec[layer_bb.layer_num].ymin;
             } else {
-                merged_num_edge.ymin = num_edge_vec[merged_bb.layer_min].ymin;
+                merged_num_edge.ymin = num_edge_vec[layer_bb.layer_num].ymin;
             }
             merged_bb.ymin = layer_bb.ymin;
         }
@@ -1408,7 +1408,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.ymax != OPEN);
                 merged_num_edge.ymax += num_edge_vec[layer_bb.layer_num].ymax;
             } else {
-                merged_num_edge.ymax = num_edge_vec[merged_bb.layer_max].ymax;
+                merged_num_edge.ymax = num_edge_vec[layer_bb.layer_num].ymax;
             }
             merged_bb.ymax = layer_bb.ymax;
         }
@@ -1417,7 +1417,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.layer_min != OPEN);
                 merged_num_edge.layer_min += num_edge_vec[layer_bb.layer_num].layer_num;
             } else {
-                merged_num_edge.layer_min = num_edge_vec[merged_bb.layer_min].layer_num;
+                merged_num_edge.layer_min = num_edge_vec[layer_bb.layer_num].layer_num;
             }
             merged_bb.layer_min = layer_bb.layer_num;
         }
@@ -1426,7 +1426,7 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                 VTR_ASSERT_SAFE(merged_num_edge.layer_max != OPEN);
                 merged_num_edge.layer_max += num_edge_vec[layer_bb.layer_num].layer_num;
             } else {
-                merged_num_edge.layer_max = num_edge_vec[merged_bb.layer_max].layer_num;
+                merged_num_edge.layer_max = num_edge_vec[layer_bb.layer_num].layer_num;
             }
             merged_bb.layer_max = layer_bb.layer_num;
         }

From a9185ef99af09bc132eee88ee4b664e71c3777c5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 11:08:28 -0400
Subject: [PATCH 163/257] debug: pass the comparison results not the value of
 the var

---
 vpr/src/place/place.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 537e418f5d3..1a758e73154 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2046,7 +2046,7 @@ static void update_net_layer_bb(const ClusterNetId net,
                         ts_layer_sink_pin_count[net],
                         pin_old_loc,
                         pin_new_loc,
-                        pin_dir);
+                        pin_dir == e_pin_type::DRIVER);
     }
 }
 

From c8a41db5260ad9f5629379ceefca30db4c152feb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 4 Oct 2023 12:05:33 -0400
Subject: [PATCH 164/257] fix a bug with router lookahead: return inifinite
 cost instead of assertion when channel node and sink are on different layers

---
 vpr/src/route/router_lookahead_map.cpp | 37 +++++++++++++-------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 8c58d01678a..bfd5f2109f5 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -498,12 +498,12 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         VTR_ASSERT(from_seg_index >= 0);
 
-        bool get_cost_entry = true;
-        // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node is of type CHANX/Y, it
-        // should be on the same layer as the sink node.
-        VTR_ASSERT(from_layer_num == to_layer_num);
-
-        if (get_cost_entry) {
+        // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node
+        // is of type CHANX/CHANY, and the sink node is on the other layer, there will no path from that node to the sink
+        if(from_layer_num != to_layer_num) {
+            expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
+            expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
+        } else {
             /* now get the expected cost from our lookahead map */
             Cost_Entry cost_entry = get_wire_cost_entry(from_type,
                                                         from_seg_index,
@@ -513,20 +513,19 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
             expected_delay_cost = cost_entry.delay;
             expected_cong_cost = cost_entry.congestion;
 
-            expected_delay_cost *= params.criticality;
-            expected_cong_cost *= (1 - params.criticality);
+            VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
+                                vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
+                                                rr_node_arch_name(from_node, is_flat_).c_str(),
+                                                describe_rr_node(rr_graph,
+                                                                 device_ctx.grid,
+                                                                 device_ctx.rr_indexed_data,
+                                                                 from_node,
+                                                                 is_flat_)
+                                                    .c_str())
+                                    .c_str());
         }
-
-        VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
-                            vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
-                                            rr_node_arch_name(from_node, is_flat_).c_str(),
-                                            describe_rr_node(rr_graph,
-                                                             device_ctx.grid,
-                                                             device_ctx.rr_indexed_data,
-                                                             from_node,
-                                                             is_flat_)
-                                                .c_str())
-                                .c_str());
+        expected_delay_cost *= params.criticality;
+        expected_cong_cost *= (1 - params.criticality);
     } else if (from_type == IPIN) { /* Change if you're allowing route-throughs */
         return std::make_pair(0., device_ctx.rr_indexed_data[RRIndexedDataId(SINK_COST_INDEX)].base_cost);
     } else { /* Change this if you want to investigate route-throughs */

From 0e26a21297acd2e853533d21af2a2b4823d60084 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 09:55:07 -0400
Subject: [PATCH 165/257] experiment: last 4 moves choose the free layer

---
 vpr/src/place/critical_uniform_move_generator.cpp  | 2 +-
 vpr/src/place/feasible_region_move_generator.cpp   | 2 +-
 vpr/src/place/weighted_centroid_move_generator.cpp | 2 +-
 vpr/src/place/weighted_median_move_generator.cpp   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 9fbc93a7645..a3318343ab4 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -28,7 +28,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = from.layer;
+    to.layer = find_free_layer(cluster_from_type, from);
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 54a651f40d3..a4c6e1454da 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -120,7 +120,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         center.y = (FR_coords.ymin + FR_coords.ymax) / 2;
         // TODO: Currently, we don't move blocks between different types of layers
         center.layer = from.layer;
-        to.layer = from.layer;
+        to.layer = find_free_layer(cluster_from_type, from);
         if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from))
             return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index f92b9fce719..4904bc0d3fd 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,7 +39,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
     /* Find a  */
-    to.layer = from.layer;
+    to.layer = find_free_layer(cluster_from_type, from);
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 93f59d7959c..ef21a4681f5 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -108,7 +108,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
-    to.layer = from.layer;
+    to.layer = find_free_layer(cluster_from_type, from);
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 23e904541b45c9c678cba785fe44464f64261ef5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 10:15:25 -0400
Subject: [PATCH 166/257] fix the order of moves in move to string

---
 vpr/src/place/move_utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index fde3090678c..00d7463910d 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1002,8 +1002,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
 static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings = {
     "Uniform",
     "Median",
-    "W. Centroid",
     "Centroid",
+    "W. Centroid",
     "W. Median",
     "Crit. Uniform",
     "Feasible Region",

From 0a9c907885551c8efa04b5d4c80dcf22536bf65e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 10:23:46 -0400
Subject: [PATCH 167/257] experiment: new move to change layer in addition to
 the random move

---
 vpr/src/base/read_options.cpp                      | 2 +-
 vpr/src/base/vpr_types.h                           | 4 ++--
 vpr/src/place/critical_uniform_move_generator.cpp  | 2 +-
 vpr/src/place/feasible_region_move_generator.cpp   | 2 +-
 vpr/src/place/move_utils.cpp                       | 1 +
 vpr/src/place/move_utils.h                         | 1 +
 vpr/src/place/simpleRL_move_generator.h            | 3 ++-
 vpr/src/place/weighted_centroid_move_generator.cpp | 2 +-
 vpr/src/place/weighted_median_move_generator.cpp   | 2 +-
 9 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index cf5b2eadc04..0e8544ee68f 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1991,7 +1991,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "This option is only effective for timing-driven placement."
             "The numbers listed are interpreted as the percentage probabilities of {uniformMove, MedianMove, CentroidMove, WeightedCentroid, WeightedMedian, Timing feasible Region(TFR), Critical UniformMove}, in that order.")
         .nargs('+')
-        .default_value({"100", "0", "0", "0", "0", "0", "0"})
+        .default_value({"100", "0", "0", "0", "0", "0", "0", "0"})
 
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 1bbe65f87fb..ce925cdef58 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -530,8 +530,8 @@ enum class e_timing_update_type {
  ****************************************************************************/
 
 /* Values of number of placement available move types */
-#define NUM_PL_MOVE_TYPES 7
-#define NUM_PL_NONTIMING_MOVE_TYPES 3
+#define NUM_PL_MOVE_TYPES 8
+#define NUM_PL_NONTIMING_MOVE_TYPES 4
 #define NUM_PL_1ST_STATE_MOVE_TYPES 4
 
 /* Timing data structures end */
diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index a3318343ab4..9fbc93a7645 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -28,7 +28,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index a4c6e1454da..54a651f40d3 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -120,7 +120,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         center.y = (FR_coords.ymin + FR_coords.ymax) / 2;
         // TODO: Currently, we don't move blocks between different types of layers
         center.layer = from.layer;
-        to.layer = find_free_layer(cluster_from_type, from);
+        to.layer = from.layer;
         if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from))
             return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 00d7463910d..41a871e0042 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1003,6 +1003,7 @@ static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings =
     "Uniform",
     "Median",
     "Centroid",
+    "INTER_LAYRE_UNIFORM",
     "W. Centroid",
     "W. Median",
     "Crit. Uniform",
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index b9f5873425a..03869bd38dd 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -23,6 +23,7 @@ enum class e_move_type {
     UNIFORM,
     MEDIAN,
     CENTROID,
+    INTER_LAYRE_UNIFORM,
     W_CENTROID,
     W_MEDIAN,
     CRIT_UNIFORM,
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 1c0f56b9c66..06b488fa757 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -96,7 +96,7 @@ class KArmedBanditAgent {
     /* Ratios of the average runtime to calculate each move type              */
     /* These ratios are useful for different reward functions                 *
      * The vector is calculated by averaging many runs on different circuits  */
-    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 2.5, 2.1, 0.8, 2.2, 1.0};
+    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 1.0, 2.5, 2.1, 0.8, 2.2};
 
     FILE* agent_info_file_ = nullptr;
 
@@ -233,6 +233,7 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool /*i
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
+    avail_moves[(int)e_move_type::INTER_LAYRE_UNIFORM] = std::make_unique<UniformInterLayerMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 4904bc0d3fd..f92b9fce719 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,7 +39,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
     /* Find a  */
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index ef21a4681f5..93f59d7959c 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -108,7 +108,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 33fc559981ecf694574698e9b990d3cf0cff7885 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 15:51:51 -0400
Subject: [PATCH 168/257] find_to_loc_median use to layer num instead of from
 layer num

---
 vpr/src/place/move_utils.cpp | 39 ++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 41a871e0042..bb7f7e62d63 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -831,7 +831,8 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
                         t_pl_loc& to_loc,
                         ClusterBlockId b_from) {
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    int from_layer_num = from_loc.layer;
+    int to_layer_num = to_loc.layer;
+    VTR_ASSERT(to_layer_num != OPEN);
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
 
     //Determine the coordinates in the compressed grid space of the current block
@@ -844,27 +845,27 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
 
     //Determine the valid compressed grid location ranges
     std::vector<t_physical_tile_loc> min_compressed_loc = get_compressed_loc_approx(compressed_block_grid,
-                                                                                    {limit_coords->xmin, limit_coords->ymin, 0, from_layer_num},
+                                                                                    {limit_coords->xmin, limit_coords->ymin, 0, to_layer_num},
                                                                                     num_layers);
     std::vector<t_physical_tile_loc> max_compressed_loc = get_compressed_loc_approx(compressed_block_grid,
-                                                                                    {limit_coords->xmax, limit_coords->ymax, 0, from_layer_num},
+                                                                                    {limit_coords->xmax, limit_coords->ymax, 0, to_layer_num},
                                                                                     num_layers);
 
-    VTR_ASSERT(min_compressed_loc[from_layer_num].x >= 0);
-    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_columns(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].x >= 0);
-    VTR_ASSERT(max_compressed_loc[from_layer_num].x >= min_compressed_loc[from_layer_num].x);
-    int delta_cx = max_compressed_loc[from_layer_num].x - min_compressed_loc[from_layer_num].x;
+    VTR_ASSERT(min_compressed_loc[to_layer_num].x >= 0);
+    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_columns(to_layer_num)) - 1 - max_compressed_loc[to_layer_num].x >= 0);
+    VTR_ASSERT(max_compressed_loc[to_layer_num].x >= min_compressed_loc[to_layer_num].x);
+    int delta_cx = max_compressed_loc[to_layer_num].x - min_compressed_loc[to_layer_num].x;
 
-    VTR_ASSERT(min_compressed_loc[from_layer_num].y >= 0);
-    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_rows(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].y >= 0);
-    VTR_ASSERT(max_compressed_loc[from_layer_num].y >= min_compressed_loc[from_layer_num].y);
+    VTR_ASSERT(min_compressed_loc[to_layer_num].y >= 0);
+    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_rows(to_layer_num)) - 1 - max_compressed_loc[to_layer_num].y >= 0);
+    VTR_ASSERT(max_compressed_loc[to_layer_num].y >= min_compressed_loc[to_layer_num].y);
 
-    t_bb search_range(min_compressed_loc[from_layer_num].x,
-                      max_compressed_loc[from_layer_num].x,
-                      min_compressed_loc[from_layer_num].y,
-                      max_compressed_loc[from_layer_num].y,
-                      from_layer_num,
-                      from_layer_num);
+    t_bb search_range(min_compressed_loc[to_layer_num].x,
+                      max_compressed_loc[to_layer_num].x,
+                      min_compressed_loc[to_layer_num].y,
+                      max_compressed_loc[to_layer_num].y,
+                      to_layer_num,
+                      to_layer_num);
 
     t_physical_tile_loc to_compressed_loc;
     bool legal = false;
@@ -874,7 +875,7 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
                                                                           b_from,
                                                                           search_range,
                                                                           delta_cx,
-                                                                          from_layer_num);
+                                                                          to_layer_num);
         if (!intersect) {
             return false;
         }
@@ -882,11 +883,11 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
 
     legal = find_compatible_compressed_loc_in_range(blk_type,
                                                     delta_cx,
-                                                    from_compressed_locs[from_layer_num],
+                                                    from_compressed_locs[to_layer_num],
                                                     search_range,
                                                     to_compressed_loc,
                                                     true,
-                                                    from_layer_num);
+                                                    to_layer_num);
 
     if (!legal) {
         //No valid position found

From f461cfdecc6d156fc45f7d46b20d32ca06941c14 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 15:54:35 -0400
Subject: [PATCH 169/257] assign layer num to to_loc in feasible move

---
 vpr/src/place/feasible_region_move_generator.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 54a651f40d3..7246070a42b 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -33,6 +33,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the feasible region */
     t_pl_loc to;
+    to.layer = from.layer;
     int ipin;
     ClusterBlockId bnum;
     int max_x, min_x, max_y, min_y;

From 5ee8d71124410be84f535d4882b51faf6acb23f4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 15:55:24 -0400
Subject: [PATCH 170/257] remove redundant assignment

---
 vpr/src/place/feasible_region_move_generator.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 7246070a42b..e57f660ab72 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -121,7 +121,6 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         center.y = (FR_coords.ymin + FR_coords.ymax) / 2;
         // TODO: Currently, we don't move blocks between different types of layers
         center.layer = from.layer;
-        to.layer = from.layer;
         if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from))
             return e_create_move::ABORT;
     }

From 250a19fab1d546b801951c05a231e6be11f92a28 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 16:00:56 -0400
Subject: [PATCH 171/257] last four move change layer in addition to new move
 and uniform move

---
 vpr/src/place/critical_uniform_move_generator.cpp  | 2 +-
 vpr/src/place/feasible_region_move_generator.cpp   | 2 +-
 vpr/src/place/weighted_centroid_move_generator.cpp | 3 +--
 vpr/src/place/weighted_median_move_generator.cpp   | 3 ++-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 9fbc93a7645..a3318343ab4 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -28,7 +28,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = from.layer;
+    to.layer = find_free_layer(cluster_from_type, from);
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index e57f660ab72..739911141e6 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -33,7 +33,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the feasible region */
     t_pl_loc to;
-    to.layer = from.layer;
+    to.layer = find_free_layer(cluster_from_type, from);
     int ipin;
     ClusterBlockId bnum;
     int max_x, min_x, max_y, min_y;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index f92b9fce719..55bf8913823 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -34,12 +34,11 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
                                     placer_opts.place_dm_rlim};
 
     t_pl_loc to, centroid;
+    to.layer = find_free_layer(cluster_from_type, from);
 
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
-    /* Find a  */
-    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 93f59d7959c..835f9c9c6fc 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -34,6 +34,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the Edge weighted median region */
     t_pl_loc to;
+    to.layer = find_free_layer(cluster_from_type, from);
 
     t_bb_cost coords;
     t_bb limit_coords;
@@ -108,7 +109,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
-    to.layer = from.layer;
+
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 64f8c44081b6b359364eb7e8a4fb5b1c9ecc7349 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 16:43:28 -0400
Subject: [PATCH 172/257] remove new move

---
 vpr/src/base/read_options.cpp           | 2 +-
 vpr/src/base/vpr_types.h                | 4 ++--
 vpr/src/place/move_utils.cpp            | 1 -
 vpr/src/place/move_utils.h              | 1 -
 vpr/src/place/simpleRL_move_generator.h | 3 +--
 5 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 0e8544ee68f..cf5b2eadc04 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1991,7 +1991,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "This option is only effective for timing-driven placement."
             "The numbers listed are interpreted as the percentage probabilities of {uniformMove, MedianMove, CentroidMove, WeightedCentroid, WeightedMedian, Timing feasible Region(TFR), Critical UniformMove}, in that order.")
         .nargs('+')
-        .default_value({"100", "0", "0", "0", "0", "0", "0", "0"})
+        .default_value({"100", "0", "0", "0", "0", "0", "0"})
 
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index ce925cdef58..1bbe65f87fb 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -530,8 +530,8 @@ enum class e_timing_update_type {
  ****************************************************************************/
 
 /* Values of number of placement available move types */
-#define NUM_PL_MOVE_TYPES 8
-#define NUM_PL_NONTIMING_MOVE_TYPES 4
+#define NUM_PL_MOVE_TYPES 7
+#define NUM_PL_NONTIMING_MOVE_TYPES 3
 #define NUM_PL_1ST_STATE_MOVE_TYPES 4
 
 /* Timing data structures end */
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index bb7f7e62d63..b65134ad33c 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1004,7 +1004,6 @@ static const std::array<std::string, NUM_PL_MOVE_TYPES + 1> move_type_strings =
     "Uniform",
     "Median",
     "Centroid",
-    "INTER_LAYRE_UNIFORM",
     "W. Centroid",
     "W. Median",
     "Crit. Uniform",
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 03869bd38dd..b9f5873425a 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -23,7 +23,6 @@ enum class e_move_type {
     UNIFORM,
     MEDIAN,
     CENTROID,
-    INTER_LAYRE_UNIFORM,
     W_CENTROID,
     W_MEDIAN,
     CRIT_UNIFORM,
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 06b488fa757..347714010ea 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -96,7 +96,7 @@ class KArmedBanditAgent {
     /* Ratios of the average runtime to calculate each move type              */
     /* These ratios are useful for different reward functions                 *
      * The vector is calculated by averaging many runs on different circuits  */
-    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 1.0, 2.5, 2.1, 0.8, 2.2};
+    std::vector<double> time_elapsed_{1.0, 3.6, 5.4, 2.5, 2.1, 0.8, 2.2};
 
     FILE* agent_info_file_ = nullptr;
 
@@ -233,7 +233,6 @@ SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool /*i
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();
     avail_moves[(int)e_move_type::MEDIAN] = std::make_unique<MedianMoveGenerator>();
     avail_moves[(int)e_move_type::CENTROID] = std::make_unique<CentroidMoveGenerator>();
-    avail_moves[(int)e_move_type::INTER_LAYRE_UNIFORM] = std::make_unique<UniformInterLayerMoveGenerator>();
     avail_moves[(int)e_move_type::W_CENTROID] = std::make_unique<WeightedCentroidMoveGenerator>();
     avail_moves[(int)e_move_type::W_MEDIAN] = std::make_unique<WeightedMedianMoveGenerator>();
     avail_moves[(int)e_move_type::CRIT_UNIFORM] = std::make_unique<CriticalUniformMoveGenerator>();

From 687d3c914d900b83572eddc3a4dd2bc0610241d2 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 16:45:36 -0400
Subject: [PATCH 173/257] make the to layer num const

---
 vpr/src/place/move_utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index b65134ad33c..acc28ce6689 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -831,7 +831,7 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
                         t_pl_loc& to_loc,
                         ClusterBlockId b_from) {
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    int to_layer_num = to_loc.layer;
+    const int to_layer_num = to_loc.layer;
     VTR_ASSERT(to_layer_num != OPEN);
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
 

From e604e302ab9947c15aaeec3269bb46f7f2cd8422 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 5 Oct 2023 16:50:30 -0400
Subject: [PATCH 174/257] return to original: only uniform move change the
 layer

---
 vpr/src/place/critical_uniform_move_generator.cpp  | 2 +-
 vpr/src/place/feasible_region_move_generator.cpp   | 2 +-
 vpr/src/place/weighted_centroid_move_generator.cpp | 2 +-
 vpr/src/place/weighted_median_move_generator.cpp   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index a3318343ab4..9fbc93a7645 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -28,7 +28,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 739911141e6..e57f660ab72 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -33,7 +33,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the feasible region */
     t_pl_loc to;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
     int ipin;
     ClusterBlockId bnum;
     int max_x, min_x, max_y, min_y;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 55bf8913823..f8c33357e63 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -34,7 +34,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
                                     placer_opts.place_dm_rlim};
 
     t_pl_loc to, centroid;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
 
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 835f9c9c6fc..8866425997c 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -34,7 +34,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the Edge weighted median region */
     t_pl_loc to;
-    to.layer = find_free_layer(cluster_from_type, from);
+    to.layer = from.layer;
 
     t_bb_cost coords;
     t_bb limit_coords;

From 2a3132752f505d342d1d0b8df72974f1c9af50a8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 6 Oct 2023 16:06:51 -0400
Subject: [PATCH 175/257] make format

---
 vpr/src/base/read_place.cpp             |  2 +-
 vpr/src/place/median_move_generator.cpp |  2 +-
 vpr/src/place/move_utils.cpp            |  3 ---
 vpr/src/place/place.cpp                 | 19 +++++--------------
 vpr/src/place/placer_context.h          |  2 --
 vpr/src/route/router_lookahead_map.cpp  |  2 +-
 6 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index f473f7bfdaa..bbe3bfc1a03 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -346,7 +346,7 @@ void print_place(const char* net_file,
     fclose(fp);
 
     //Calculate the ID of the placement
-    if(is_initial_place) {
+    if (is_initial_place) {
         place_ctx.initial_placement_id = vtr::secure_digest_file(place_file);
     } else {
         place_ctx.placement_id = vtr::secure_digest_file(place_file);
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 3a884fa7bc3..f9978f14612 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -73,7 +73,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
             }
 
-            const auto& net_bb_coords = is_multi_layer ? union_bb: place_move_ctx.bb_coords[net_id];
+            const auto& net_bb_coords = is_multi_layer ? union_bb : place_move_ctx.bb_coords[net_id];
             //use the incremental update of the bb
             bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
             pnum = tile_pin_index(pin_id);
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index acc28ce6689..11e72d709e7 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1433,8 +1433,5 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
         }
     }
 
-
     return std::make_pair(merged_num_edge, merged_bb);
-
-
 }
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 1a758e73154..81b34b28852 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -678,7 +678,6 @@ void try_place(const Netlist<>& net_list,
             costs.bb_cost = comp_layer_bb_cost(NORMAL);
         }
 
-
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
 
         num_connections = count_connections();
@@ -1487,7 +1486,6 @@ static void update_move_nets(int num_nets_affected) {
             place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
         }
 
-
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
             if (num_layers == 1) {
                 place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
@@ -2433,7 +2431,6 @@ static double comp_bb_cost(e_cost_methods method) {
     return cost;
 }
 
-
 static double comp_layer_bb_cost(e_cost_methods method) {
     double cost = 0;
     double expected_wirelength = 0.0;
@@ -2447,9 +2444,9 @@ static double comp_layer_bb_cost(e_cost_methods method) {
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
                 && method == NORMAL) {
                 get_layer_bb_from_scratch(net_id,
-                                    place_move_ctx.layer_bb_num_on_edges[net_id],
-                                    place_move_ctx.layer_bb_coords[net_id],
-                                    place_move_ctx.num_sink_pin_layer[net_id]);
+                                          place_move_ctx.layer_bb_num_on_edges[net_id],
+                                          place_move_ctx.layer_bb_coords[net_id],
+                                          place_move_ctx.num_sink_pin_layer[net_id]);
             } else {
                 get_non_updateable_layer_bb(net_id,
                                             place_move_ctx.layer_bb_coords[net_id],
@@ -3326,7 +3323,7 @@ static void update_layer_bb(ClusterNetId net_id,
      * The x and y coordinates are the pin's x and y coordinates.         */
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_bb> *curr_bb_edge, *curr_bb_coord;
+    const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
     const std::vector<int>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -3363,7 +3360,6 @@ static void update_layer_bb(ClusterNetId net_id,
                              bb_pin_sink_count_new,
                              is_output_pin);
 
-
     int layer_old = pin_old_loc.layer_num;
     int layer_new = pin_new_loc.layer_num;
     bool layer_changed = (layer_old != layer_new);
@@ -3371,8 +3367,7 @@ static void update_layer_bb(ClusterNetId net_id,
     bb_edge_new = *curr_bb_edge;
     bb_coord_new = *curr_bb_coord;
 
-
-    if(layer_changed) {
+    if (layer_changed) {
         update_bb_layer_changed(net_id,
                                 pin_old_loc,
                                 pin_new_loc,
@@ -3459,7 +3454,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1;
             bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
         }
-
     }
 
     if (y_new < y_old) {
@@ -3508,7 +3502,6 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
             bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
         }
     }
-
 }
 
 static inline void update_bb_layer_changed(ClusterNetId net_id,
@@ -3519,7 +3512,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            std::vector<int>& bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new) {
-
     int x_old = pin_old_loc.x;
 
     int y_old = pin_old_loc.y;
@@ -3585,7 +3577,6 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                     curr_bb_coord[new_layer_num],
                     bb_edge_new[new_layer_num],
                     bb_coord_new[new_layer_num]);
-
 }
 
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index f3d2796332c..266a407dab8 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -91,14 +91,12 @@ struct PlacerRuntimeContext : public Context {
  */
 struct PlacerMoveContext : public Context {
   public:
-
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, t_bb> bb_num_on_edges;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
     vtr::vector<ClusterNetId, t_bb> bb_coords;
 
-
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_num_on_edges;
 
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index bfd5f2109f5..ffb58ad95a6 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -500,7 +500,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node
         // is of type CHANX/CHANY, and the sink node is on the other layer, there will no path from that node to the sink
-        if(from_layer_num != to_layer_num) {
+        if (from_layer_num != to_layer_num) {
             expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
             expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
         } else {

From c97de47708f079d2160c066b831375d95f116ab4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 10 Oct 2023 19:18:50 -0400
Subject: [PATCH 176/257] new experiment: choose layer for last 4 layers

---
 vpr/src/place/critical_uniform_move_generator.cpp  | 1 +
 vpr/src/place/feasible_region_move_generator.cpp   | 1 +
 vpr/src/place/weighted_centroid_move_generator.cpp | 1 +
 vpr/src/place/weighted_median_move_generator.cpp   | 1 +
 4 files changed, 4 insertions(+)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 9fbc93a7645..7e0c548688f 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -33,6 +33,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
         return e_create_move::ABORT;
     }
 
+    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index e57f660ab72..eac836933ab 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -125,6 +125,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
             return e_create_move::ABORT;
     }
 
+    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index f8c33357e63..cc0b3e06596 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -43,6 +43,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
         return e_create_move::ABORT;
     }
 
+    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 8866425997c..e0bd83bcfc7 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -114,6 +114,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         return e_create_move::ABORT;
     }
 
+    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 9167e2fb50e904430e4f59d32e9300881a0a9926 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 10 Oct 2023 19:31:12 -0400
Subject: [PATCH 177/257] reverse the prev expr.

---
 vpr/src/place/critical_uniform_move_generator.cpp  | 1 -
 vpr/src/place/feasible_region_move_generator.cpp   | 1 -
 vpr/src/place/weighted_centroid_move_generator.cpp | 1 -
 vpr/src/place/weighted_median_move_generator.cpp   | 1 -
 4 files changed, 4 deletions(-)

diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 7e0c548688f..9fbc93a7645 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -33,7 +33,6 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
         return e_create_move::ABORT;
     }
 
-    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index eac836933ab..e57f660ab72 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -125,7 +125,6 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
             return e_create_move::ABORT;
     }
 
-    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index cc0b3e06596..f8c33357e63 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -43,7 +43,6 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
         return e_create_move::ABORT;
     }
 
-    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index e0bd83bcfc7..8866425997c 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -114,7 +114,6 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         return e_create_move::ABORT;
     }
 
-    to.layer = find_free_layer(cluster_from_type, to);
     e_create_move create_move = ::create_move(blocks_affected, b_from, to);
 
     //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap

From 06498cf287b36bddae13ad90985b4c0f2184f5c3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 09:59:22 -0400
Subject: [PATCH 178/257] assign loc layer right after declaration

---
 vpr/src/place/median_move_generator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index f9978f14612..bf6f0708bef 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -37,6 +37,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
 
     /* Calculate the median region */
     t_pl_loc to;
+    to.layer = from.layer;
 
     t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
     t_bb limit_coords;
@@ -137,7 +138,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
-    to.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From bcd511b2762438df633e794a65c9409d2a66c96c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 10:02:28 -0400
Subject: [PATCH 179/257] experiment: don't change layer after initial
 placement

---
 vpr/src/place/move_utils.cpp             | 2 +-
 vpr/src/place/uniform_move_generator.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 11e72d709e7..a43cfbcfd1b 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -755,7 +755,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    const int to_layer_num = get_random_layer(type);
+    const int to_layer_num = to.layer;
     VTR_ASSERT(to_layer_num != OPEN);
 
     //Determine the coordinates in the compressed grid space of the current block
diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index 6560c32af24..f46133020fe 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -26,6 +26,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
+    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }

From 12925b4669b42759f36a8f2c5c2f7df6e11b8e1d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 10:35:30 -0400
Subject: [PATCH 180/257] Revert "experiment: don't change layer after initial
 placement"

This reverts commit bcd511b2762438df633e794a65c9409d2a66c96c.
---
 vpr/src/place/move_utils.cpp             | 2 +-
 vpr/src/place/uniform_move_generator.cpp | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index a43cfbcfd1b..11e72d709e7 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -755,7 +755,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-    const int to_layer_num = to.layer;
+    const int to_layer_num = get_random_layer(type);
     VTR_ASSERT(to_layer_num != OPEN);
 
     //Determine the coordinates in the compressed grid space of the current block
diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index f46133020fe..6560c32af24 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -26,7 +26,6 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
-    to.layer = from.layer;
     if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) {
         return e_create_move::ABORT;
     }

From f86a7a7adc46195e91f2bab379059b15ddf90413 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 11:25:17 -0400
Subject: [PATCH 181/257] for median move, choose a layer that has the highest
 number of blocks

---
 vpr/src/place/median_move_generator.cpp | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index bf6f0708bef..6f4660eca3a 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -28,16 +28,18 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
-    bool is_multi_layer = (device_ctx.grid.get_num_layers() > 1);
+    const int num_layers = device_ctx.grid.get_num_layers();
+    bool is_multi_layer = (num_layers > 1);
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
+    int from_layer = from.layer;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from_layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     /* Calculate the median region */
     t_pl_loc to;
-    to.layer = from.layer;
+    to.layer = from_layer;
 
     t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
     t_bb limit_coords;
@@ -48,6 +50,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     //reused to save allocation time
     place_move_ctx.X_coord.clear();
     place_move_ctx.Y_coord.clear();
+    std::vector<int> layer_blk_cnt(0, num_layers);
 
     //true if the net is a feedback from the block to itself
     bool skip_net;
@@ -110,6 +113,13 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
         place_move_ctx.X_coord.push_back(coords.xmax);
         place_move_ctx.Y_coord.push_back(coords.ymin);
         place_move_ctx.Y_coord.push_back(coords.ymax);
+        if (is_multi_layer) {
+            for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
+            }
+            VTR_ASSERT(layer_blk_cnt[from_layer] > 0);
+            layer_blk_cnt[from_layer]--;
+        }
     }
 
     if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) {
@@ -138,6 +148,9 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
+    if (is_multi_layer) {
+        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+    }
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
@@ -200,8 +213,9 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
         if (bnum == block_id)
             continue;
         skip_net = false;
-        int x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
-        int y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
+        const auto& block_loc = place_ctx.block_locs[bnum].loc;
+        int x = block_loc.x + physical_tile_type(bnum)->pin_width_offset[pnum];
+        int y = block_loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
 
         if (!first_block) {
             xmin = x;

From c7afe7a9ce5ee3cff2e1acf5495bfbdf3d6eaedf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 11:51:35 -0400
Subject: [PATCH 182/257] for weighted median move, choose a layer that has the
 highest number of blocks

---
 vpr/src/place/weighted_median_move_generator.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 8866425997c..611f51751d7 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -27,6 +27,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    bool is_multi_layer = (num_layers > 1);
+
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
@@ -43,6 +46,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     //reused to save allocation time
     place_move_ctx.X_coord.clear();
     place_move_ctx.Y_coord.clear();
+    std::vector<int> layer_blk_cnt(0, num_layers);
 
     //true if the net is a feedback from the block to itself (all the net terminals are connected to the same block)
     bool skip_net;
@@ -73,6 +77,13 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         place_move_ctx.X_coord.insert(place_move_ctx.X_coord.end(), ceil(coords.xmax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.xmax.edge);
         place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymin.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymin.edge);
         place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymax.edge);
+        if (is_multi_layer) {
+            for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
+            }
+            VTR_ASSERT(layer_blk_cnt[from.layer] > 0);
+            layer_blk_cnt[from.layer]--;
+        }
     }
 
     if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) {
@@ -110,6 +121,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
 
+    if (is_multi_layer) {
+        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+    }
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 974ea6be2e4d2aa0948d52a58eada249565e17cc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 11:54:03 -0400
Subject: [PATCH 183/257] update calculate_centroid_loc to get the centroid of
 layers

---
 vpr/src/place/directed_moves_util.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp
index db49fc88486..330f1904368 100644
--- a/vpr/src/place/directed_moves_util.cpp
+++ b/vpr/src/place/directed_moves_util.cpp
@@ -25,6 +25,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
     float acc_weight = 0;
     float acc_x = 0;
     float acc_y = 0;
+    float acc_layer = 0;
     float weight = 1;
 
     int from_block_layer_num = g_vpr_ctx.placement().block_locs[b_from].loc.layer;
@@ -65,6 +66,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
 
                 acc_x += tile_loc.x * weight;
                 acc_y += tile_loc.y * weight;
+                acc_layer += tile_loc.layer_num * weight;
                 acc_weight += weight;
             }
         }
@@ -84,6 +86,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
 
             acc_x += tile_loc.x * weight;
             acc_y += tile_loc.y * weight;
+            acc_layer += tile_loc.layer_num * weight;
             acc_weight += weight;
         }
     }
@@ -91,8 +94,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
     //Calculate the centroid location
     centroid.x = acc_x / acc_weight;
     centroid.y = acc_y / acc_weight;
-    // TODO: For now, we don't move the centroid to a different layer
-    centroid.layer = from_block_layer_num;
+    centroid.layer = acc_layer / acc_weight;
 }
 
 static std::map<std::string, e_reward_function> available_reward_function = {

From bd58a10bb288c3f1ebbaaaa1204b35937bc43a5c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 11:56:13 -0400
Subject: [PATCH 184/257] assign centroid layer to centroid and weighted
 centroid moves

---
 vpr/src/place/centroid_move_generator.cpp          | 1 +
 vpr/src/place/weighted_centroid_move_generator.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index b0998360f1d..516fa1e44f4 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,6 +39,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
 
+    to.layer = centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index f8c33357e63..8e5f2e351a9 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,6 +39,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
+    to.layer = centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 0200061030e2be7a32466cc2971d03025145b965 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 12:57:16 -0400
Subject: [PATCH 185/257] fix initialization of layer_blk_cnt

---
 vpr/src/place/median_move_generator.cpp          | 2 +-
 vpr/src/place/weighted_median_move_generator.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 6f4660eca3a..9e23530dafa 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -50,7 +50,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     //reused to save allocation time
     place_move_ctx.X_coord.clear();
     place_move_ctx.Y_coord.clear();
-    std::vector<int> layer_blk_cnt(0, num_layers);
+    std::vector<int> layer_blk_cnt(num_layers, 0);
 
     //true if the net is a feedback from the block to itself
     bool skip_net;
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 611f51751d7..a7bb4f72ef7 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -46,7 +46,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     //reused to save allocation time
     place_move_ctx.X_coord.clear();
     place_move_ctx.Y_coord.clear();
-    std::vector<int> layer_blk_cnt(0, num_layers);
+    std::vector<int> layer_blk_cnt(num_layers, 0);
 
     //true if the net is a feedback from the block to itself (all the net terminals are connected to the same block)
     bool skip_net;

From a56ac72f28d7fdd5bbae5b9b008f424d5c0e26e2 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 12:59:09 -0400
Subject: [PATCH 186/257] Revert "assign centroid layer to centroid and
 weighted centroid moves"

This reverts commit bd58a10bb288c3f1ebbaaaa1204b35937bc43a5c.
---
 vpr/src/place/centroid_move_generator.cpp          | 1 -
 vpr/src/place/weighted_centroid_move_generator.cpp | 1 -
 2 files changed, 2 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 516fa1e44f4..b0998360f1d 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,7 +39,6 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
 
-    to.layer = centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 8e5f2e351a9..f8c33357e63 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,7 +39,6 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
-    to.layer = centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 9aadfcfe0d08b9d650bcdf17ed1e60644314e88e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 13:48:16 -0400
Subject: [PATCH 187/257] decreament num block on layer if pin is not of driver
 type

---
 vpr/src/place/median_move_generator.cpp          | 6 ++++--
 vpr/src/place/weighted_median_move_generator.cpp | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 9e23530dafa..d9a2e9bd8c8 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -117,8 +117,10 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
-            VTR_ASSERT(layer_blk_cnt[from_layer] > 0);
-            layer_blk_cnt[from_layer]--;
+            if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
+                VTR_ASSERT(layer_blk_cnt[from_layer] > 0);
+                layer_blk_cnt[from_layer]--;
+            }
         }
     }
 
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index a7bb4f72ef7..18ea19068e5 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -81,8 +81,10 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
-            VTR_ASSERT(layer_blk_cnt[from.layer] > 0);
-            layer_blk_cnt[from.layer]--;
+            if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
+                VTR_ASSERT(layer_blk_cnt[from.layer] > 0);
+                layer_blk_cnt[from.layer]--;
+            }
         }
     }
 

From 5d89b5fd93bf976c787590910754c978a5975006 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 13:55:06 -0400
Subject: [PATCH 188/257] assign centroid layer

---
 vpr/src/place/centroid_move_generator.cpp          | 1 +
 vpr/src/place/weighted_centroid_move_generator.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index b0998360f1d..516fa1e44f4 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,6 +39,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
 
+    to.layer = centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index f8c33357e63..8e5f2e351a9 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,6 +39,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
+    to.layer = centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From b5839fcaafffaceee0a31925f6d338292441fc95 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 14:48:23 -0400
Subject: [PATCH 189/257] choose from.layer if centroid layer is not valid

---
 vpr/src/place/centroid_move_generator.cpp          | 2 +-
 vpr/src/place/weighted_centroid_move_generator.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 516fa1e44f4..4fbfc4a7521 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -39,7 +39,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
 
-    to.layer = centroid.layer;
+    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 8e5f2e351a9..cba14eb0869 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -39,7 +39,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
 
-    to.layer = centroid.layer;
+    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 982726d8e9320539cdcfd0bb8727222162f4347f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 11 Oct 2023 19:32:10 -0400
Subject: [PATCH 190/257] experiment: centroid moves change layer + random move

---
 vpr/src/place/median_move_generator.cpp          | 6 +++---
 vpr/src/place/weighted_median_move_generator.cpp | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index d9a2e9bd8c8..ae063fe46b5 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -150,9 +150,9 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
-    if (is_multi_layer) {
-        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
-    }
+//    if (is_multi_layer) {
+//        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+//    }
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 18ea19068e5..7b968da297f 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -123,9 +123,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
 
-    if (is_multi_layer) {
-        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
-    }
+//    if (is_multi_layer) {
+//        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+//    }
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 6d25fa1a5d7647aa906ce443bfc5eb8e13304256 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 10:58:37 -0400
Subject: [PATCH 191/257] weighted median + median choose the layer with
 maximum number of sinks

---
 vpr/src/place/median_move_generator.cpp          | 6 +++---
 vpr/src/place/weighted_median_move_generator.cpp | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index ae063fe46b5..d9a2e9bd8c8 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -150,9 +150,9 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
     // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
     median_point.layer = from.layer;
-//    if (is_multi_layer) {
-//        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
-//    }
+    if (is_multi_layer) {
+        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+    }
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 7b968da297f..18ea19068e5 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -123,9 +123,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     // TODO: Currently, we don't move blocks between different types of layers
     w_median_point.layer = from.layer;
 
-//    if (is_multi_layer) {
-//        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
-//    }
+    if (is_multi_layer) {
+        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+    }
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From c140f9d77a6f7397ad26484bae671a6c3b89c770 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 11:07:49 -0400
Subject: [PATCH 192/257] add a type to store BB type

---
 vpr/src/base/vpr_types.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 1bbe65f87fb..b1c3511a103 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1010,6 +1010,12 @@ enum e_place_algorithm {
     SLACK_TIMING_PLACE
 };
 
+enum e_place_bounding_box_mode {
+    AUTO_BB,
+    CUBE_BB,
+    PER_LAYER_BB
+};
+
 /**
  * @brief Provides a wrapper around enum e_place_algorithm.
  *

From 755125cc4c388b2d352ad77f17bcf4c137a39455 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 11:29:04 -0400
Subject: [PATCH 193/257] add ParsePlaceBoundingBox

---
 vpr/src/base/read_options.cpp | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index cf5b2eadc04..0e7cc21afbd 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -430,6 +430,41 @@ struct ParsePlaceAlgorithm {
     }
 };
 
+struct ParsePlaceBoundingBox {
+    ConvertedValue<e_place_bounding_box_mode> from_str(std::string str) {
+        ConvertedValue<e_place_bounding_box_mode> conv_value;
+        if (str == "auto_bb") {
+            conv_value.set_value(AUTO_BB);
+        } else if (str == "cube_bb") {
+            conv_value.set_value(CUBE_BB);
+        } else if (str == "per_layer_bb") {
+            conv_value.set_value(PER_LAYER_BB);
+        } else {
+            std::stringstream msg;
+            msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
+            conv_value.set_error(msg.str());
+        }
+        return conv_value;
+    }
+
+    ConvertedValue<std::string> to_str(e_place_bounding_box_mode val) {
+        ConvertedValue<std::string> conv_value;
+        if (val == AUTO_BB) {
+            conv_value.set_value("auto_bb");
+        } else if (val == CUBE_BB) {
+            conv_value.set_value("cube_bb");
+        } else {
+            VTR_ASSERT(val == PER_LAYER_BB);
+            conv_value.set_value("per_layer_bb");
+        }
+        return conv_value;
+    }
+
+    std::vector<std::string> default_choices() {
+        return {"auto_bb", "cube_bb", "per_layer_bb"};
+    }
+};
+
 struct ParsePlaceAgentAlgorithm {
     ConvertedValue<e_agent_algorithm> from_str(std::string str) {
         ConvertedValue<e_agent_algorithm> conv_value;

From 360870b6cc2f0babbe24524128da149f47aff92b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 11:32:47 -0400
Subject: [PATCH 194/257] add a parameter to vpr to select bounding box mode

---
 vpr/src/base/SetupVPR.cpp     |  1 +
 vpr/src/base/read_options.cpp | 15 +++++++++++++++
 vpr/src/base/read_options.h   |  1 +
 vpr/src/base/vpr_types.h      |  1 +
 4 files changed, 18 insertions(+)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 42f9b913b65..e596bd51c43 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -663,6 +663,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->place_static_move_prob = Options.place_static_move_prob;
     PlacerOpts->place_static_notiming_move_prob = Options.place_static_notiming_move_prob;
     PlacerOpts->place_high_fanout_net = Options.place_high_fanout_net;
+    PlacerOpts->place_bounding_box_mode = Options.place_bounding_box_mode;
     PlacerOpts->RL_agent_placement = Options.RL_agent_placement;
     PlacerOpts->place_agent_multistate = Options.place_agent_multistate;
     PlacerOpts->place_checkpointing = Options.place_checkpointing;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 0e7cc21afbd..45808be4003 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2047,6 +2047,21 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .default_value("10")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_grp.add_argument<e_place_bounding_box_mode, ParsePlaceBoundingBox>(args.place_bounding_box_mode, "--place_bounding_box_mode")
+        .help(
+            "Specifies the type of bounding box to be used in 3D architectures.\n"
+            "\n"
+            "MODE options:\n"
+            "  auto_bb     : Automatically determine the appropriate bounding box based on the connections between layers.\n"
+            "  cube_bb            : Use 3D bounding boxes.\n"
+            "  per_layer_bb     : Use per-layer bounding boxes.\n"
+            "\n"
+            "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.")
+        .default_value("auto_bb")
+        .choices({"auto_bb", "cube_bb", "per_layer_bb"})
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
+
     place_grp.add_argument<bool, ParseOnOff>(args.RL_agent_placement, "--RL_agent_placement")
         .help(
             "Uses a Reinforcement Learning (RL) agent in choosing the appropiate move type in placement."
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index fa4bba34420..d1edc5ef2b2 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -128,6 +128,7 @@ struct t_options {
     argparse::ArgValue<std::vector<float>> place_static_move_prob;
     argparse::ArgValue<std::vector<float>> place_static_notiming_move_prob;
     argparse::ArgValue<int> place_high_fanout_net;
+    argparse::ArgValue<e_place_bounding_box_mode> place_bounding_box_mode;
 
     argparse::ArgValue<bool> RL_agent_placement;
     argparse::ArgValue<bool> place_agent_multistate;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index b1c3511a103..cb611c4acc4 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1232,6 +1232,7 @@ struct t_placer_opts {
     bool place_agent_multistate;
     bool place_checkpointing;
     int place_high_fanout_net;
+    e_place_bounding_box_mode place_bounding_box_mode;
     e_agent_algorithm place_agent_algorithm;
     float place_agent_epsilon;
     float place_agent_gamma;

From 04c3c7573d207d89d394fb8b1f410ebe6da73769 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 11:47:10 -0400
Subject: [PATCH 195/257] add an addition parameter to hierarchy of functions
 that try to update bb to show the mode of bounding box.

---
 vpr/src/place/place.cpp | 60 +++++++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 81b34b28852..90b1ce80a68 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -291,7 +291,8 @@ static e_move_result try_swap(const t_annealing_state* state,
                               MoveTypeStat& move_type_stat,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
-                              bool manual_move_enabled);
+                              bool manual_move_enabled,
+                              const bool bounding_box_mode);
 
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
@@ -308,7 +309,21 @@ static int check_placement_consistency();
 static int check_block_placement_consistency();
 static int check_macro_placement_consistency();
 
-static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t_annealing_sched annealing_sched, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, SetupTimingInfo* timing_info, MoveGenerator& move_generator, ManualMoveGenerator& manual_move_generator, NetPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, MoveTypeStat& move_type_stat);
+static float starting_t(const t_annealing_state* state,
+                        t_placer_costs* costs,
+                        t_annealing_sched annealing_sched,
+                        const PlaceDelayModel* delay_model,
+                        PlacerCriticalities* criticalities,
+                        PlacerSetupSlacks* setup_slacks,
+                        SetupTimingInfo* timing_info,
+                        MoveGenerator& move_generator,
+                        ManualMoveGenerator& manual_move_generator,
+                        NetPinTimingInvalidator* pin_timing_invalidator,
+                        t_pl_blocks_to_be_moved& blocks_affected,
+                        const t_placer_opts& placer_opts,
+                        const t_noc_opts& noc_opts,
+                        MoveTypeStat& move_type_stat,
+                        const bool bounding_box_mode);
 
 static int count_connections();
 
@@ -395,6 +410,7 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
+    const bool bounding_box_mode,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c);
@@ -476,7 +492,8 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  SetupTimingInfo* timing_info,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor);
+                                 float timing_bb_factor,
+                                 const bool bounding_box_mode);
 
 static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const t_noc_opts& noc_opts,
@@ -554,6 +571,8 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
+    bool bounding_box_mode = false;
+
     int num_layers = device_ctx.grid.get_num_layers();
 
     t_placer_costs costs(placer_opts.place_algorithm);
@@ -887,7 +906,7 @@ void try_place(const Netlist<>& net_list,
                          place_delay_model.get(), placer_criticalities.get(),
                          placer_setup_slacks.get(), timing_info.get(), *move_generator,
                          *manual_move_generator, pin_timing_invalidator.get(),
-                         blocks_affected, placer_opts, noc_opts, move_type_stat);
+                         blocks_affected, placer_opts, noc_opts, move_type_stat, bounding_box_mode);
 
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
@@ -958,7 +977,8 @@ void try_place(const Netlist<>& net_list,
                                  *current_move_generator, *manual_move_generator,
                                  blocks_affected, timing_info.get(),
                                  placer_opts.place_algorithm, move_type_stat,
-                                 timing_bb_factor);
+                                 timing_bb_factor,
+                                 bounding_box_mode);
 
             //move the update used move_generator to its original variable
             update_move_generator(move_generator, move_generator2, agent_state,
@@ -1023,7 +1043,8 @@ void try_place(const Netlist<>& net_list,
                              *current_move_generator, *manual_move_generator,
                              blocks_affected, timing_info.get(),
                              placer_opts.place_quench_algorithm, move_type_stat,
-                             timing_bb_factor);
+                             timing_bb_factor,
+                             bounding_box_mode);
 
         //move the update used move_generator to its original variable
         update_move_generator(move_generator, move_generator2, agent_state,
@@ -1216,7 +1237,8 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  SetupTimingInfo* timing_info,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor) {
+                                 float timing_bb_factor,
+                                 const bool bounding_box_mode) {
     int inner_crit_iter_count, inner_iter;
 
     int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature?
@@ -1390,7 +1412,21 @@ static int count_connections() {
 }
 
 ///@brief Find the starting temperature for the annealing loop.
-static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t_annealing_sched annealing_sched, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, SetupTimingInfo* timing_info, MoveGenerator& move_generator, ManualMoveGenerator& manual_move_generator, NetPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, MoveTypeStat& move_type_stat) {
+static float starting_t(const t_annealing_state* state,
+                        t_placer_costs* costs,
+                        t_annealing_sched annealing_sched,
+                        const PlaceDelayModel* delay_model,
+                        PlacerCriticalities* criticalities,
+                        PlacerSetupSlacks* setup_slacks,
+                        SetupTimingInfo* timing_info,
+                        MoveGenerator& move_generator,
+                        ManualMoveGenerator& manual_move_generator,
+                        NetPinTimingInvalidator* pin_timing_invalidator,
+                        t_pl_blocks_to_be_moved& blocks_affected,
+                        const t_placer_opts& placer_opts,
+                        const t_noc_opts& noc_opts,
+                        MoveTypeStat& move_type_stat,
+                        const bool bounding_box_mode) {
     if (annealing_sched.type == USER_SCHED) {
         return (annealing_sched.init_t);
     }
@@ -1423,7 +1459,7 @@ static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
                                              placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm,
-                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled);
+                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, bounding_box_mode);
 
         if (swap_result == ACCEPTED) {
             num_accepted++;
@@ -1543,7 +1579,8 @@ static e_move_result try_swap(const t_annealing_state* state,
                               MoveTypeStat& move_type_stat,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
-                              bool manual_move_enabled) {
+                              bool manual_move_enabled,
+                              const bool bounding_box_mode) {
     /* Picks some block and moves it to another spot.  If this spot is   *
      * occupied, switch the blocks.  Assess the change in cost function. *
      * rlim is the range limiter.                                        *
@@ -1644,7 +1681,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         //Also find all the pins affected by the swap, and calculates new connection
         //delays and timing costs and store them in proposed_* data structures.
         int num_nets_affected = find_affected_nets_and_update_costs(
-            place_algorithm, delay_model, criticalities, blocks_affected,
+            place_algorithm, delay_model, criticalities, bounding_box_mode, blocks_affected,
             bb_delta_c, timing_delta_c);
 
         //For setup slack analysis, we first do a timing analysis to get the newest
@@ -1883,6 +1920,7 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
+    const bool bounding_box_mode,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c) {

From 68ac9b652de610c03713cdcd1b93680b46378a63 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 11:49:14 -0400
Subject: [PATCH 196/257] fix a typo

---
 vpr/src/place/place.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 90b1ce80a68..c5201988625 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1254,7 +1254,8 @@ static void placement_inner_loop(const t_annealing_state* state,
         e_move_result swap_result = try_swap(state, costs, move_generator,
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
-                                             placer_opts, noc_opts, move_type_stat, place_algorithm, timing_bb_factor, manual_move_enabled);
+                                             placer_opts, noc_opts, move_type_stat, place_algorithm,
+                                             timing_bb_factor, manual_move_enabled, bounding_box_mode);
 
         if (swap_result == ACCEPTED) {
             /* Move was accepted.  Update statistics that are useful for the annealing schedule. */

From cb7d470ca27ac742161f290f82681062e1240159 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 17:08:21 -0400
Subject: [PATCH 197/257] remove some unused library

---
 libs/librrgraph/src/base/rr_graph_utils.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libs/librrgraph/src/base/rr_graph_utils.cpp b/libs/librrgraph/src/base/rr_graph_utils.cpp
index 6552c8c214b..d9761f25635 100644
--- a/libs/librrgraph/src/base/rr_graph_utils.cpp
+++ b/libs/librrgraph/src/base/rr_graph_utils.cpp
@@ -5,13 +5,9 @@
  ***************************************************************************/
 #include <queue>
 #include <random>
-#include <algorithm>
 
 #include "rr_graph_utils.h"
 
-#include "vtr_memory.h"
-#include "vtr_time.h"
-
 #include "vpr_error.h"
 
 #include "rr_graph_obj.h"

From 90ae26d3f73efcead73e01acda2a1fcf3c45f1bc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 17:32:57 -0400
Subject: [PATCH 198/257] impl inter_layer_connections_limited_to_opin

---
 libs/librrgraph/src/base/rr_graph_utils.cpp | 23 +++++++++++++++++++++
 libs/librrgraph/src/base/rr_graph_utils.h   |  1 +
 2 files changed, 24 insertions(+)

diff --git a/libs/librrgraph/src/base/rr_graph_utils.cpp b/libs/librrgraph/src/base/rr_graph_utils.cpp
index d9761f25635..11b6a569c9e 100644
--- a/libs/librrgraph/src/base/rr_graph_utils.cpp
+++ b/libs/librrgraph/src/base/rr_graph_utils.cpp
@@ -115,4 +115,27 @@ vtr::vector<RRNodeId, std::vector<RREdgeId>> get_fan_in_list(const RRGraphView&
         });
 
     return node_fan_in_list;
+}
+
+bool inter_layer_connections_limited_to_opin(const RRGraphView& rr_graph) {
+    bool limited_to_opin = true;
+    for (const auto& from_node : rr_graph.nodes()) {
+        for (t_edge_size edge : rr_graph.edges(from_node)) {
+            RRNodeId to_node = rr_graph.edge_sink_node(from_node, edge);
+            int from_layer = rr_graph.node_layer(from_node);
+            int to_layer = rr_graph.node_layer(to_node);
+
+            if (from_layer != to_layer) {
+                if (rr_graph.node_type(from_node) != e_rr_type::OPIN) {
+                    limited_to_opin = false;
+                    break;
+                }
+            }
+        }
+        if (!limited_to_opin) {
+            break;
+        }
+    }
+
+    return limited_to_opin;
 }
\ No newline at end of file
diff --git a/libs/librrgraph/src/base/rr_graph_utils.h b/libs/librrgraph/src/base/rr_graph_utils.h
index 0725bcd0cf9..aeff17b8d5d 100644
--- a/libs/librrgraph/src/base/rr_graph_utils.h
+++ b/libs/librrgraph/src/base/rr_graph_utils.h
@@ -48,4 +48,5 @@ vtr::vector<RRNodeId, std::vector<RREdgeId>> get_fan_in_list(const RRGraphView&
 int seg_index_of_cblock(const RRGraphView& rr_graph, t_rr_type from_rr_type, int to_node);
 int seg_index_of_sblock(const RRGraphView& rr_graph, int from_node, int to_node);
 
+bool inter_layer_connections_limited_to_opin(const RRGraphView& rr_graph);
 #endif
\ No newline at end of file

From ff118edaefeab36aa098ab4c99abe3478eac633b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 12 Oct 2023 17:36:35 -0400
Subject: [PATCH 199/257] determine the type of bb in try_place and pass it to
 related functions

---
 vpr/src/place/place.cpp | 62 +++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index c5201988625..eaea9b69bc5 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -254,6 +254,9 @@ std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
 void print_clb_placement(const char* fname);
 #endif
 
+static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
+                       const RRGraphView& rr_graph);
+
 static void alloc_and_load_placement_structs(float place_cost_exp,
                                              const t_placer_opts& placer_opts,
                                              const t_noc_opts& noc_opts,
@@ -292,7 +295,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
                               bool manual_move_enabled,
-                              const bool bounding_box_mode);
+                              const bool cube_bb);
 
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
@@ -323,7 +326,7 @@ static float starting_t(const t_annealing_state* state,
                         const t_placer_opts& placer_opts,
                         const t_noc_opts& noc_opts,
                         MoveTypeStat& move_type_stat,
-                        const bool bounding_box_mode);
+                        const bool cube_bb);
 
 static int count_connections();
 
@@ -410,7 +413,7 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
-    const bool bounding_box_mode,
+    const bool cube_bb,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c);
@@ -493,7 +496,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
                                  float timing_bb_factor,
-                                 const bool bounding_box_mode);
+                                 const bool cube_bb);
 
 static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const t_noc_opts& noc_opts,
@@ -571,7 +574,7 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
-    bool bounding_box_mode = false;
+    bool cube_bb;
 
     int num_layers = device_ctx.grid.get_num_layers();
 
@@ -628,6 +631,9 @@ void try_place(const Netlist<>& net_list,
         }
     }
 
+    cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode,
+                         device_ctx.rr_graph);
+
     int move_lim = 1;
     move_lim = (int)(annealing_sched.inner_num
                      * pow(net_list.blocks().size(), 1.3333));
@@ -906,7 +912,7 @@ void try_place(const Netlist<>& net_list,
                          place_delay_model.get(), placer_criticalities.get(),
                          placer_setup_slacks.get(), timing_info.get(), *move_generator,
                          *manual_move_generator, pin_timing_invalidator.get(),
-                         blocks_affected, placer_opts, noc_opts, move_type_stat, bounding_box_mode);
+                         blocks_affected, placer_opts, noc_opts, move_type_stat, cube_bb);
 
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
@@ -978,7 +984,7 @@ void try_place(const Netlist<>& net_list,
                                  blocks_affected, timing_info.get(),
                                  placer_opts.place_algorithm, move_type_stat,
                                  timing_bb_factor,
-                                 bounding_box_mode);
+                                 cube_bb);
 
             //move the update used move_generator to its original variable
             update_move_generator(move_generator, move_generator2, agent_state,
@@ -1044,7 +1050,7 @@ void try_place(const Netlist<>& net_list,
                              blocks_affected, timing_info.get(),
                              placer_opts.place_quench_algorithm, move_type_stat,
                              timing_bb_factor,
-                             bounding_box_mode);
+                             cube_bb);
 
         //move the update used move_generator to its original variable
         update_move_generator(move_generator, move_generator2, agent_state,
@@ -1238,7 +1244,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
                                  float timing_bb_factor,
-                                 const bool bounding_box_mode) {
+                                 const bool cube_bb) {
     int inner_crit_iter_count, inner_iter;
 
     int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature?
@@ -1255,7 +1261,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
                                              placer_opts, noc_opts, move_type_stat, place_algorithm,
-                                             timing_bb_factor, manual_move_enabled, bounding_box_mode);
+                                             timing_bb_factor, manual_move_enabled, cube_bb);
 
         if (swap_result == ACCEPTED) {
             /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
@@ -1427,7 +1433,7 @@ static float starting_t(const t_annealing_state* state,
                         const t_placer_opts& placer_opts,
                         const t_noc_opts& noc_opts,
                         MoveTypeStat& move_type_stat,
-                        const bool bounding_box_mode) {
+                        const bool cube_bb) {
     if (annealing_sched.type == USER_SCHED) {
         return (annealing_sched.init_t);
     }
@@ -1460,7 +1466,7 @@ static float starting_t(const t_annealing_state* state,
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
                                              placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm,
-                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, bounding_box_mode);
+                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, cube_bb);
 
         if (swap_result == ACCEPTED) {
             num_accepted++;
@@ -1581,7 +1587,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
                               bool manual_move_enabled,
-                              const bool bounding_box_mode) {
+                              const bool cube_bb) {
     /* Picks some block and moves it to another spot.  If this spot is   *
      * occupied, switch the blocks.  Assess the change in cost function. *
      * rlim is the range limiter.                                        *
@@ -1682,7 +1688,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         //Also find all the pins affected by the swap, and calculates new connection
         //delays and timing costs and store them in proposed_* data structures.
         int num_nets_affected = find_affected_nets_and_update_costs(
-            place_algorithm, delay_model, criticalities, bounding_box_mode, blocks_affected,
+            place_algorithm, delay_model, criticalities, cube_bb, blocks_affected,
             bb_delta_c, timing_delta_c);
 
         //For setup slack analysis, we first do a timing analysis to get the newest
@@ -1895,6 +1901,27 @@ static e_move_result try_swap(const t_annealing_state* state,
     return move_outcome;
 }
 
+static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
+                       const RRGraphView& rr_graph) {
+    bool cube_bb;
+
+    if (place_bb_mode == AUTO_BB) {
+        if (inter_layer_connections_limited_to_opin(rr_graph)) {
+            cube_bb = false;
+        } else {
+            cube_bb = true;
+        }
+    } else if (place_bb_mode == CUBE_BB) {
+        cube_bb = true;
+    } else {
+        VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB);
+        cube_bb = false;
+    }
+
+    return cube_bb;
+
+}
+
 /**
  * @brief Find all the nets and pins affected by this swap and update costs.
  *
@@ -1921,14 +1948,13 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
-    const bool bounding_box_mode,
+    const bool cube_bb,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c) {
     VTR_ASSERT_SAFE(bb_delta_c == 0.);
     VTR_ASSERT_SAFE(timing_delta_c == 0.);
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     int num_affected_nets = 0;
 
@@ -1951,7 +1977,7 @@ static int find_affected_nets_and_update_costs(
             record_affected_net(net_id, num_affected_nets);
 
             /* Update the net bounding boxes. */
-            if (num_layers == 1) {
+            if (cube_bb) {
                 update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
             } else {
                 update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
@@ -1971,7 +1997,7 @@ static int find_affected_nets_and_update_costs(
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        if (num_layers == 1) {
+        if (cube_bb) {
             proposed_net_cost[net_id] = get_net_cost(net_id,
                                                      ts_bb_coord_new[net_id]);
         } else {

From 56703eb80020eee30e5a17411f5dff9fb59de95a Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 13 Oct 2023 10:50:15 -0400
Subject: [PATCH 200/257] if the architecture has only one layer, use cube bb

---
 vpr/src/place/place.cpp | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index eaea9b69bc5..01d9cfb465b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1904,18 +1904,24 @@ static e_move_result try_swap(const t_annealing_state* state,
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
                        const RRGraphView& rr_graph) {
     bool cube_bb;
+    const int number_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    if (place_bb_mode == AUTO_BB) {
-        if (inter_layer_connections_limited_to_opin(rr_graph)) {
-            cube_bb = false;
-        } else {
-            cube_bb = true;
-        }
-    } else if (place_bb_mode == CUBE_BB) {
+    if (number_layers == 1) {
         cube_bb = true;
     } else {
-        VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB);
-        cube_bb = false;
+        VTR_ASSERT(number_layers > 1);
+        if (place_bb_mode == AUTO_BB) {
+            if (inter_layer_connections_limited_to_opin(rr_graph)) {
+                cube_bb = false;
+            } else {
+                cube_bb = true;
+            }
+        } else if (place_bb_mode == CUBE_BB) {
+            cube_bb = true;
+        } else {
+            VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB);
+            cube_bb = false;
+        }
     }
 
     return cube_bb;

From 4b5f829972a95c0ece5826f54af29ed8c5ba405e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 24 Oct 2023 11:15:34 -0400
Subject: [PATCH 201/257] Print bounding box mode when it is selected

---
 vpr/src/place/place.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 01d9cfb465b..7ac0a70ea22 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -633,6 +633,9 @@ void try_place(const Netlist<>& net_list,
 
     cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode,
                          device_ctx.rr_graph);
+    VTR_LOG("\n");
+    VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer"));
+    VTR_LOG("\n");
 
     int move_lim = 1;
     move_lim = (int)(annealing_sched.inner_num

From ddfab6fdb2ff6b2bad93c3fbc8b7033e67cf60f8 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 10:35:01 -0400
Subject: [PATCH 202/257] remove initial placement id

---
 vpr/src/base/place_and_route.cpp    | 2 +-
 vpr/src/base/read_options.cpp       | 2 +-
 vpr/src/base/read_place.cpp         | 9 ++-------
 vpr/src/base/read_place.h           | 3 +--
 vpr/src/base/vpr_api.cpp            | 2 +-
 vpr/src/base/vpr_context.h          | 7 -------
 vpr/src/base/vpr_signal_handler.cpp | 2 +-
 vpr/src/place/place.cpp             | 5 ++---
 8 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index 65a66f331d2..c34cd9cfbae 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -355,7 +355,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                     auto& cluster_ctx = g_vpr_ctx.clustering();
                     // Cluster-based net_list is used for placement
                     print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(),
-                                filename_opts.PlaceFile.c_str(), false);
+                                filename_opts.PlaceFile.c_str());
                 }
             }
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 45808be4003..eb31f168862 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1605,7 +1605,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     file_grp.add_argument(args.write_initial_place_file, "--write_initial_place_file")
-        .help("Writes out the initial placement of blocks to the specified file")
+        .help("Writes out the the placement chosen by the initial placement algorithm to the specified file")
         .metavar("INITIAL_PLACE_FILE")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index bbe3bfc1a03..c1a1862ba86 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -312,8 +312,7 @@ void read_place_body(std::ifstream& placement_file,
  */
 void print_place(const char* net_file,
                  const char* net_id,
-                 const char* place_file,
-                 bool is_initial_place) {
+                 const char* place_file) {
     FILE* fp;
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -346,9 +345,5 @@ void print_place(const char* net_file,
     fclose(fp);
 
     //Calculate the ID of the placement
-    if (is_initial_place) {
-        place_ctx.initial_placement_id = vtr::secure_digest_file(place_file);
-    } else {
-        place_ctx.placement_id = vtr::secure_digest_file(place_file);
-    }
+    place_ctx.placement_id = vtr::secure_digest_file(place_file);
 }
diff --git a/vpr/src/base/read_place.h b/vpr/src/base/read_place.h
index 06c38cc629d..36740a5dc5d 100644
--- a/vpr/src/base/read_place.h
+++ b/vpr/src/base/read_place.h
@@ -19,7 +19,6 @@ void read_constraints(const char* constraints_file);
 
 void print_place(const char* net_file,
                  const char* net_id,
-                 const char* place_file,
-                 bool is_initial_place);
+                 const char* place_file);
 
 #endif
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 89972fbb4bb..9f379f84e42 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -755,7 +755,7 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch&
 
     print_place(filename_opts.NetFile.c_str(),
                 cluster_ctx.clb_nlist.netlist_id().c_str(),
-                filename_opts.PlaceFile.c_str(), false);
+                filename_opts.PlaceFile.c_str());
 }
 
 void vpr_load_placement(t_vpr_setup& vpr_setup, const t_arch& arch) {
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 49904cde327..82e7be31249 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -390,13 +390,6 @@ struct PlacementContext : public Context {
      */
     t_compressed_block_grids compressed_block_grids;
 
-    /**
-     * @brief SHA256 digest of the .init.place file
-     *
-     * Used for unique identification and consistency checking
-     */
-    std::string initial_placement_id;
-
     /**
      * @brief SHA256 digest of the .place file
      *
diff --git a/vpr/src/base/vpr_signal_handler.cpp b/vpr/src/base/vpr_signal_handler.cpp
index efe95d100e6..a8fff7b4394 100644
--- a/vpr/src/base/vpr_signal_handler.cpp
+++ b/vpr/src/base/vpr_signal_handler.cpp
@@ -90,7 +90,7 @@ void checkpoint() {
 
     std::string placer_checkpoint_file = "placer_checkpoint.place";
     VTR_LOG("Attempting to checkpoint current placement to file: %s\n", placer_checkpoint_file.c_str());
-    print_place(nullptr, nullptr, placer_checkpoint_file.c_str(), false);
+    print_place(nullptr, nullptr, placer_checkpoint_file.c_str());
 
     std::string router_checkpoint_file = "router_checkpoint.route";
     VTR_LOG("Attempting to checkpoint current routing to file: %s\n", router_checkpoint_file.c_str());
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 7ac0a70ea22..2b12a76349b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -666,8 +666,7 @@ void try_place(const Netlist<>& net_list,
     if (!placer_opts.write_initial_place_file.empty()) {
         print_place(nullptr,
                     nullptr,
-                    (placer_opts.write_initial_place_file + ".init.place").c_str(),
-                    true);
+                    (placer_opts.write_initial_place_file + ".init.place").c_str());
     }
 
 #ifdef ENABLE_ANALYTIC_PLACE
@@ -1331,7 +1330,7 @@ static void placement_inner_loop(const t_annealing_state* state,
             VTR_LOG(
                 "Saving placement to file at temperature move %d / %d: %s\n",
                 inner_iter, state->move_lim, filename.c_str());
-            print_place(nullptr, nullptr, filename.c_str(), false);
+            print_place(nullptr, nullptr, filename.c_str());
             ++inner_placement_save_count;
         }
     }

From 9fd835e73359641491102b2768f47368c2b63120 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 10:49:24 -0400
Subject: [PATCH 203/257] rename to layer_num_grid_locs

---
 vpr/src/base/vpr_types.h    | 5 +++--
 vpr/src/noc/noc_storage.cpp | 4 ++--
 vpr/src/noc/noc_storage.h   | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index cb611c4acc4..4912addd2eb 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -619,10 +619,11 @@ struct t_2D_bb {
 
 /**
  * @brief An offset between placement locations (t_pl_loc)
- *
+ * @note In the case of comparing the offset, the layer offset should be equal
  * x: x-offset
  * y: y-offset
- * z: z-offset
+ * sub_tile: sub_tile-offset
+ * layer: layer-offset
  */
 struct t_pl_offset {
     t_pl_offset() = default;
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index ae42b5e1eac..70c92878f82 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -131,7 +131,7 @@ void NocStorage::set_device_grid_width(int grid_width) {
 
 void NocStorage::set_device_grid_spec(int grid_width, int grid_height) {
     device_grid_width = grid_width;
-    layer_num_blocks = grid_width * grid_height;
+    layer_num_grid_locs = grid_width * grid_height;
     return;
 }
 
@@ -235,7 +235,7 @@ NocLinkId NocStorage::get_parallel_link(NocLinkId current_link) const {
 
 int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const {
     // calculate the key value
-    return (layer_num_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x);
+    return (layer_num_grid_locs * layer_position + device_grid_width * grid_position_y + grid_position_x);
 }
 
 void NocStorage::echo_noc(char* file_name) const {
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index cd0363d48c9..f35f0121eb2 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -147,7 +147,7 @@ class NocStorage {
      * to get the corresponding physical (hard) router at the given grid
      * location using 'grid_location_to_router_id'.
      */
-    int layer_num_blocks;
+    int layer_num_grid_locs;
 
     // prevent "copying" of this object
     NocStorage(const NocStorage&) = delete;

From 55befbbae28c6ee49b204293ddc1ac707b260184 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 14:08:08 -0400
Subject: [PATCH 204/257] pass cube_bb to the global state

---
 vpr/src/base/vpr_context.h |  6 ++++++
 vpr/src/place/place.cpp    | 44 ++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 82e7be31249..a07a73e2827 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -402,6 +402,12 @@ struct PlacementContext : public Context {
      * placer_debug_net or placer_debug_block parameters in the command line.
      */
     bool f_placer_debug = false;
+
+    /**
+     * Set this variable to ture if the type of the bounding box used in placement is of the type cube. If it is false,
+     * it would mean that per-layer bounding box is used. For the 2D architecture, the cube bounding box would be used.
+     */
+    bool cube_bb = false;
 };
 
 /**
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2b12a76349b..3b12089e347 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -294,8 +294,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                               MoveTypeStat& move_type_stat,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
-                              bool manual_move_enabled,
-                              const bool cube_bb);
+                              bool manual_move_enabled);
 
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
@@ -325,8 +324,7 @@ static float starting_t(const t_annealing_state* state,
                         t_pl_blocks_to_be_moved& blocks_affected,
                         const t_placer_opts& placer_opts,
                         const t_noc_opts& noc_opts,
-                        MoveTypeStat& move_type_stat,
-                        const bool cube_bb);
+                        MoveTypeStat& move_type_stat);
 
 static int count_connections();
 
@@ -413,7 +411,6 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
-    const bool cube_bb,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c);
@@ -495,8 +492,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  SetupTimingInfo* timing_info,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor,
-                                 const bool cube_bb);
+                                 float timing_bb_factor);
 
 static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const t_noc_opts& noc_opts,
@@ -574,8 +570,6 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
-    bool cube_bb;
-
     int num_layers = device_ctx.grid.get_num_layers();
 
     t_placer_costs costs(placer_opts.place_algorithm);
@@ -631,8 +625,10 @@ void try_place(const Netlist<>& net_list,
         }
     }
 
-    cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode,
-                         device_ctx.rr_graph);
+    g_vpr_ctx.mutable_placement().cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode,
+                                                       device_ctx.rr_graph);
+    const auto& cube_bb = g_vpr_ctx.mutable_placement().cube_bb;
+
     VTR_LOG("\n");
     VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
@@ -914,7 +910,7 @@ void try_place(const Netlist<>& net_list,
                          place_delay_model.get(), placer_criticalities.get(),
                          placer_setup_slacks.get(), timing_info.get(), *move_generator,
                          *manual_move_generator, pin_timing_invalidator.get(),
-                         blocks_affected, placer_opts, noc_opts, move_type_stat, cube_bb);
+                         blocks_affected, placer_opts, noc_opts, move_type_stat);
 
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
@@ -985,8 +981,7 @@ void try_place(const Netlist<>& net_list,
                                  *current_move_generator, *manual_move_generator,
                                  blocks_affected, timing_info.get(),
                                  placer_opts.place_algorithm, move_type_stat,
-                                 timing_bb_factor,
-                                 cube_bb);
+                                 timing_bb_factor);
 
             //move the update used move_generator to its original variable
             update_move_generator(move_generator, move_generator2, agent_state,
@@ -1051,8 +1046,7 @@ void try_place(const Netlist<>& net_list,
                              *current_move_generator, *manual_move_generator,
                              blocks_affected, timing_info.get(),
                              placer_opts.place_quench_algorithm, move_type_stat,
-                             timing_bb_factor,
-                             cube_bb);
+                             timing_bb_factor);
 
         //move the update used move_generator to its original variable
         update_move_generator(move_generator, move_generator2, agent_state,
@@ -1245,8 +1239,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  SetupTimingInfo* timing_info,
                                  const t_place_algorithm& place_algorithm,
                                  MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor,
-                                 const bool cube_bb) {
+                                 float timing_bb_factor) {
     int inner_crit_iter_count, inner_iter;
 
     int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature?
@@ -1263,7 +1256,7 @@ static void placement_inner_loop(const t_annealing_state* state,
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
                                              placer_opts, noc_opts, move_type_stat, place_algorithm,
-                                             timing_bb_factor, manual_move_enabled, cube_bb);
+                                             timing_bb_factor, manual_move_enabled);
 
         if (swap_result == ACCEPTED) {
             /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
@@ -1434,8 +1427,7 @@ static float starting_t(const t_annealing_state* state,
                         t_pl_blocks_to_be_moved& blocks_affected,
                         const t_placer_opts& placer_opts,
                         const t_noc_opts& noc_opts,
-                        MoveTypeStat& move_type_stat,
-                        const bool cube_bb) {
+                        MoveTypeStat& move_type_stat) {
     if (annealing_sched.type == USER_SCHED) {
         return (annealing_sched.init_t);
     }
@@ -1468,7 +1460,7 @@ static float starting_t(const t_annealing_state* state,
                                              manual_move_generator, timing_info, pin_timing_invalidator,
                                              blocks_affected, delay_model, criticalities, setup_slacks,
                                              placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm,
-                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, cube_bb);
+                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled);
 
         if (swap_result == ACCEPTED) {
             num_accepted++;
@@ -1588,8 +1580,7 @@ static e_move_result try_swap(const t_annealing_state* state,
                               MoveTypeStat& move_type_stat,
                               const t_place_algorithm& place_algorithm,
                               float timing_bb_factor,
-                              bool manual_move_enabled,
-                              const bool cube_bb) {
+                              bool manual_move_enabled) {
     /* Picks some block and moves it to another spot.  If this spot is   *
      * occupied, switch the blocks.  Assess the change in cost function. *
      * rlim is the range limiter.                                        *
@@ -1690,7 +1681,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         //Also find all the pins affected by the swap, and calculates new connection
         //delays and timing costs and store them in proposed_* data structures.
         int num_nets_affected = find_affected_nets_and_update_costs(
-            place_algorithm, delay_model, criticalities, cube_bb, blocks_affected,
+            place_algorithm, delay_model, criticalities, blocks_affected,
             bb_delta_c, timing_delta_c);
 
         //For setup slack analysis, we first do a timing analysis to get the newest
@@ -1956,7 +1947,6 @@ static int find_affected_nets_and_update_costs(
     const t_place_algorithm& place_algorithm,
     const PlaceDelayModel* delay_model,
     const PlacerCriticalities* criticalities,
-    const bool cube_bb,
     t_pl_blocks_to_be_moved& blocks_affected,
     double& bb_delta_c,
     double& timing_delta_c) {
@@ -1966,6 +1956,8 @@ static int find_affected_nets_and_update_costs(
 
     int num_affected_nets = 0;
 
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
     /* Go through all the blocks moved. */
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;

From d52b8b8960f0798f4f7c06d630c06a73364f10ee Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 10:35:01 -0400
Subject: [PATCH 205/257] remove initial placement id


From 4246443d5568f1fd429b81d5dd35c6a29eb0f12d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 10:49:24 -0400
Subject: [PATCH 206/257] rename to layer_num_grid_locs


From e3faf058f78c0c726ed263d8b56d9464433dcb10 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 14:08:08 -0400
Subject: [PATCH 207/257] pass cube_bb to the global state


From 59b35834a36d36aa03e949b156c90e9bfbd034a6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 25 Oct 2023 20:13:33 -0400
Subject: [PATCH 208/257] alloc swap data structure bb based on cube bb

---
 vpr/src/place/place.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3b12089e347..dcd087b06b7 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -261,9 +261,10 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
                                              const t_placer_opts& placer_opts,
                                              const t_noc_opts& noc_opts,
                                              t_direct_inf* directs,
-                                             int num_directs);
+                                             int num_directs,
+                                             const bool cube_bb);
 
-static void alloc_and_load_try_swap_structs();
+static void alloc_and_load_try_swap_structs(const bool cube_bb);
 static void free_try_swap_structs();
 
 static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
@@ -650,7 +651,7 @@ void try_place(const Netlist<>& net_list,
 
     init_chan(width_fac, chan_width_dist, graph_directionality);
 
-    alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs);
+    alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs, cube_bb);
 
     vtr::ScopedStartFinishTimer timer("Placement");
 
@@ -2543,7 +2544,8 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
                                              const t_placer_opts& placer_opts,
                                              const t_noc_opts& noc_opts,
                                              t_direct_inf* directs,
-                                             int num_directs) {
+                                             int num_directs,
+                                             const bool cube_bb) {
     int max_pins_per_clb;
     unsigned int ipin;
 
@@ -2619,7 +2621,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     alloc_and_load_for_fast_cost_update(place_cost_exp);
 
-    alloc_and_load_try_swap_structs();
+    alloc_and_load_try_swap_structs(cube_bb);
 
     place_ctx.pl_macros = alloc_and_load_placement_macros(directs, num_directs);
 
@@ -2667,7 +2669,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
     }
 }
 
-static void alloc_and_load_try_swap_structs() {
+static void alloc_and_load_try_swap_structs(const bool cube_bb) {
     /* Allocate the local bb_coordinate storage, etc. only once. */
     /* Allocate with size cluster_ctx.clb_nlist.nets().size() for any number of nets affected. */
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -2676,11 +2678,11 @@ static void alloc_and_load_try_swap_structs() {
 
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    if (num_layers == 1) {
+    if (cube_bb) {
         ts_bb_edge_new.resize(num_nets, t_bb());
         ts_bb_coord_new.resize(num_nets, t_bb());
     } else {
-        VTR_ASSERT(num_layers > 1);
+        VTR_ASSERT(!cube_bb);
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));

From 58034d83a742b4ffeca72dff5a7c261729dc63fe Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 25 Oct 2023 20:17:36 -0400
Subject: [PATCH 209/257] add cube bb to check place

---
 vpr/src/place/place.cpp | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index dcd087b06b7..b31eb4c69f4 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -301,12 +301,14 @@ static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
                         const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts);
+                        const t_noc_opts& noc_opts,
+                        const bool cube_bb);
 
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm);
+                                 const t_place_algorithm& place_algorithm,
+                                 const bool cube_bb);
 
 static int check_placement_consistency();
 static int check_block_placement_consistency();
@@ -806,8 +808,12 @@ void try_place(const Netlist<>& net_list,
     costs.cost = get_total_cost(&costs, placer_opts, noc_opts);
 
     //Sanity check that initial placement is legal
-    check_place(costs, place_delay_model.get(), placer_criticalities.get(),
-                placer_opts.place_algorithm, noc_opts);
+    check_place(costs,
+                place_delay_model.get(),
+                placer_criticalities.get(),
+                placer_opts.place_algorithm,
+                noc_opts,
+                cube_bb);
 
     //Initial pacement statistics
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
@@ -1107,8 +1113,12 @@ void try_place(const Netlist<>& net_list,
         place_sync_external_block_connections(block_id);
     }
 
-    check_place(costs, place_delay_model.get(), placer_criticalities.get(),
-                placer_opts.place_algorithm, noc_opts);
+    check_place(costs,
+                place_delay_model.get(),
+                placer_criticalities.get(),
+                placer_opts.place_algorithm,
+                noc_opts,
+                cube_bb);
 
     //Some stats
     VTR_LOG("\n");
@@ -1889,7 +1899,7 @@ static e_move_result try_swap(const t_annealing_state* state,
 #if 0
     // Check that each accepted swap yields a valid placement. This will
     // greatly slow the placer, but can debug some issues.
-    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
+    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts, cube_bb);
 #endif
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
     return move_outcome;
@@ -3833,7 +3843,8 @@ static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
                         const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts) {
+                        const t_noc_opts& noc_opts,
+                        bool const cube_bb) {
     /* Checks that the placement has not confused our data structures. *
      * i.e. the clb and block structures agree about the locations of  *
      * every block, blocks are in legal spots, etc.  Also recomputes   *
@@ -3844,7 +3855,8 @@ static void check_place(const t_placer_costs& costs,
 
     error += check_placement_consistency();
     error += check_placement_costs(costs, delay_model, criticalities,
-                                   place_algorithm);
+                                   place_algorithm,
+                                   cube_bb);
     error += check_placement_floorplanning();
 
     // check the NoC costs during placement if the user is using the NoC supported flow
@@ -3867,17 +3879,18 @@ static void check_place(const t_placer_costs& costs,
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm) {
+                                 const t_place_algorithm& place_algorithm,
+                                 const bool cube_bb) {
     int error = 0;
     double bb_cost_check;
     double timing_cost_check;
 
     int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    if (num_layers == 1) {
+    if (cube_bb) {
         bb_cost_check = comp_bb_cost(CHECK);
     } else {
-        VTR_ASSERT_SAFE(num_layers > 1);
+        VTR_ASSERT_SAFE(!cube_bb);
         bb_cost_check = comp_layer_bb_cost(CHECK);
     }
 

From b11c27d2bad366de35696a29530a3efa15e699d5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 25 Oct 2023 20:19:12 -0400
Subject: [PATCH 210/257] call comp bb based on cube bb in try place

---
 vpr/src/place/place.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index b31eb4c69f4..997ca90e9f8 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -697,10 +697,10 @@ void try_place(const Netlist<>& net_list,
     /* Gets initial cost and loads bounding boxes. */
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
-        if (num_layers == 1) {
+        if (cube_bb) {
             costs.bb_cost = comp_bb_cost(NORMAL);
         } else {
-            VTR_ASSERT_SAFE(num_layers > 1);
+            VTR_ASSERT_SAFE(!cube_bb);
             costs.bb_cost = comp_layer_bb_cost(NORMAL);
         }
 
@@ -782,7 +782,12 @@ void try_place(const Netlist<>& net_list,
         VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
 
         /* Total cost is the same as wirelength cost normalized*/
-        costs.bb_cost = comp_bb_cost(NORMAL);
+        if (cube_bb) {
+            costs.bb_cost = comp_bb_cost(NORMAL);
+        } else {
+            VTR_ASSERT_SAFE(!cube_bb);
+            costs.bb_cost = comp_layer_bb_cost(NORMAL);
+        }
         costs.bb_cost_norm = 1 / costs.bb_cost;
 
         /* Timing cost and normalization factors are not used */

From 3c0161ff65210b544835afdc5e128d89b3d9412e Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Wed, 25 Oct 2023 20:28:17 -0400
Subject: [PATCH 211/257] allocate the correct bb based on cube bb

---
 vpr/src/place/place.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 997ca90e9f8..edf362037f5 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -277,7 +277,9 @@ static double comp_bb_cost(e_cost_methods method);
 
 static double comp_layer_bb_cost(e_cost_methods method);
 
-static void update_move_nets(int num_nets_affected);
+static void update_move_nets(int num_nets_affected,
+                             const bool cube_bb);
+
 static void reset_move_nets(int num_nets_affected);
 
 static e_move_result try_swap(const t_annealing_state* state,
@@ -1521,18 +1523,18 @@ static float starting_t(const t_annealing_state* state,
     return init_temp;
 }
 
-static void update_move_nets(int num_nets_affected) {
+static void update_move_nets(int num_nets_affected,
+                             const bool cube_bb) {
     /* update net cost functions and reset flags. */
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
     for (int inet_affected = 0; inet_affected < num_nets_affected;
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
-        if (num_layers == 1) {
+        if (cube_bb) {
             place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
         } else {
             place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
@@ -1540,7 +1542,7 @@ static void update_move_nets(int num_nets_affected) {
         }
 
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
-            if (num_layers == 1) {
+            if (cube_bb) {
                 place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
             } else {
                 place_move_ctx.layer_bb_num_on_edges[net_id] = layer_ts_bb_edge_new[net_id];
@@ -1801,7 +1803,8 @@ static e_move_result try_swap(const t_annealing_state* state,
             }
 
             /* Update net cost functions and reset flags. */
-            update_move_nets(num_nets_affected);
+            update_move_nets(num_nets_affected,
+                             cube_bb);
 
             /* Update clb data structures since we kept the move. */
             commit_move_blocks(blocks_affected);
@@ -2619,11 +2622,11 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
     net_cost.resize(num_nets, -1.);
     proposed_net_cost.resize(num_nets, -1.);
 
-    if (num_layers == 1) {
+    if (cube_bb) {
         place_move_ctx.bb_coords.resize(num_nets, t_bb());
         place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
     } else {
-        VTR_ASSERT(num_layers > 1);
+        VTR_ASSERT(!cube_bb);
         place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));

From 8860c9901b5684ed57145dc31374be75b28c1ca5 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:15:26 -0400
Subject: [PATCH 212/257] update get_bb_from_scratch to accept
 num_pin_layer_sink

---
 vpr/src/place/place.cpp | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index edf362037f5..8419778da1d 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -453,7 +453,8 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
 
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
-                                t_bb& num_on_edges);
+                                t_bb& num_on_edges,
+                                std::vector<int>& num_sink_pin_layer);
 
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
@@ -2495,7 +2496,8 @@ static double comp_bb_cost(e_cost_methods method) {
                 && method == NORMAL) {
                 get_bb_from_scratch(net_id,
                                     place_move_ctx.bb_coords[net_id],
-                                    place_move_ctx.bb_num_on_edges[net_id]);
+                                    place_move_ctx.bb_num_on_edges[net_id],
+                                    place_move_ctx.num_sink_pin_layer[net_id]);
             } else {
                 get_non_updateable_bb(net_id, place_move_ctx.bb_coords[net_id]);
             }
@@ -2727,8 +2729,11 @@ static void free_try_swap_structs() {
  * from only the block location information).  It updates both the       *
  * coordinate and number of pins on each edge information.  It           *
  * should only be called when the bounding box information is not valid. */
-static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_edges) {
-    int pnum, x, y, xmin, xmax, ymin, ymax;
+static void get_bb_from_scratch(ClusterNetId net_id,
+                                t_bb& coords,
+                                t_bb& num_on_edges,
+                                std::vector<int>& num_sink_pin_layer) {
+    int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax;
     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -2756,6 +2761,8 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_
     xmax_edge = 1;
     ymax_edge = 1;
 
+    std::fill(num_sink_pin_layer.begin(), num_sink_pin_layer.end(), 0);
+
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         pnum = tile_pin_index(pin_id);
@@ -2763,6 +2770,7 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_
             + physical_tile_type(bnum)->pin_width_offset[pnum];
         y = place_ctx.block_locs[bnum].loc.y
             + physical_tile_type(bnum)->pin_height_offset[pnum];
+        pin_layer = place_ctx.block_locs[bnum].loc.layer;
 
         /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
          * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
@@ -2799,6 +2807,8 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb& coords, t_bb& num_on_
             ymax = y;
             ymax_edge = 1;
         }
+
+        num_sink_pin_layer[pin_layer]++;
     }
 
     /* Copy the coordinates and number on edges information into the proper   *
@@ -3245,7 +3255,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
             if (curr_bb_edge->xmax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3277,7 +3287,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
             if (curr_bb_edge->xmin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3318,7 +3328,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
             if (curr_bb_edge->ymax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3350,7 +3360,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
             if (curr_bb_edge->ymin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {

From 7f5bb1754a77647bfda3e82a90be8bfaa2414d0b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:16:30 -0400
Subject: [PATCH 213/257] add num_sink_layer to update_bb - initialize the
 placement context of it regardless of being cube_Bb

---
 vpr/src/place/place.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8419778da1d..6c8c1724e15 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -361,6 +361,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
+                      std::vector<int>& num_sink_pin_layer,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc);
 
@@ -2085,6 +2086,7 @@ static void update_net_bb(const ClusterNetId net,
         update_bb(net,
                   ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
+                  ts_layer_sink_pin_count[net],
                   pin_old_loc,
                   pin_new_loc);
     }
@@ -2631,9 +2633,10 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         VTR_ASSERT(!cube_bb);
         place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-        place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
     }
 
+    place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
+
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
      * been recomputed.                                                          */
@@ -3207,6 +3210,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
+                      std::vector<int>& num_sink_pin_layer,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc) {
     /* Updates the bounding box of a net by storing its coordinates in    *

From f1f70bf681cc72b1c8679128283755deab6ba9a4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:19:16 -0400
Subject: [PATCH 214/257] move ts_layer_sink_pin_count initialization out of if
 block since it is used in both cases

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6c8c1724e15..63feb36c256 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1540,8 +1540,8 @@ static void update_move_nets(int num_nets_affected,
             place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
         } else {
             place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
-            place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
         }
+        place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
 
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
             if (cube_bb) {
@@ -2708,8 +2708,8 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
         VTR_ASSERT(!cube_bb);
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-        ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
     }
+    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();

From ebcf3b7fc2efa50f170d30bc7fc1c0c3d5085325 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:27:33 -0400
Subject: [PATCH 215/257] add ts_layer_sink_pin_count to get_non_updateable_bb

---
 vpr/src/place/place.cpp | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 63feb36c256..3fe3c95476b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -352,7 +352,8 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id,
-                                  t_bb& bb_coord_new);
+                                  t_bb& bb_coord_new,
+                                  std::vector<int>& num_sink_pin_layer);
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
@@ -2064,7 +2065,9 @@ static void update_net_bb(const ClusterNetId net,
         //For small nets brute-force bounding box update is faster
 
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net, ts_bb_coord_new[net]);
+            get_non_updateable_bb(net,
+                                  ts_bb_coord_new[net],
+                                  ts_layer_sink_pin_count[net]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -2501,7 +2504,9 @@ static double comp_bb_cost(e_cost_methods method) {
                                     place_move_ctx.bb_num_on_edges[net_id],
                                     place_move_ctx.num_sink_pin_layer[net_id]);
             } else {
-                get_non_updateable_bb(net_id, place_move_ctx.bb_coords[net_id]);
+                get_non_updateable_bb(net_id,
+                                      place_move_ctx.bb_coords[net_id],
+                                      place_move_ctx.num_sink_pin_layer[net_id]);
             }
 
             net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
@@ -3078,10 +3083,11 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
  * edges of the bounding box can be used.  Essentially, I am assuming *
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updateable_bb(ClusterNetId net_id,
-                                  t_bb& bb_coord_new) {
+                                  t_bb& bb_coord_new,
+                                  std::vector<int>& num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
-    int xmax, ymax, xmin, ymin, x, y;
+    int xmax, ymax, xmin, ymin, x, y, layer;
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -3101,6 +3107,8 @@ static void get_non_updateable_bb(ClusterNetId net_id,
     xmax = x;
     ymax = y;
 
+    std::fill(num_sink_pin_layer.begin(), num_sink_pin_layer.end(), 0);
+
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
         pnum = tile_pin_index(pin_id);
@@ -3108,6 +3116,7 @@ static void get_non_updateable_bb(ClusterNetId net_id,
             + physical_tile_type(bnum)->pin_width_offset[pnum];
         y = place_ctx.block_locs[bnum].loc.y
             + physical_tile_type(bnum)->pin_height_offset[pnum];
+        layer = place_ctx.block_locs[bnum].loc.layer;
 
         if (x < xmin) {
             xmin = x;
@@ -3120,6 +3129,8 @@ static void get_non_updateable_bb(ClusterNetId net_id,
         } else if (y > ymax) {
             ymax = y;
         }
+
+        num_sink_pin_layer[layer]++;
     }
 
     /* Now I've found the coordinates of the bounding box.  There are no *

From 199dfe787c4304ce3745b944994b29f977230280 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:37:02 -0400
Subject: [PATCH 216/257] update num_sink_layer in update_bb

---
 vpr/src/place/place.cpp | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3fe3c95476b..7df8a67ca86 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -364,7 +364,8 @@ static void update_bb(ClusterNetId net_id,
                       t_bb& bb_coord_new,
                       std::vector<int>& num_sink_pin_layer,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc);
+                      t_physical_tile_loc pin_new_loc,
+                      bool src_pin);
 
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
@@ -2072,6 +2073,7 @@ static void update_net_bb(const ClusterNetId net,
     } else {
         //For large nets, update bounding box incrementally
         int iblk_pin = tile_pin_index(blk_pin);
+        bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER;
 
         t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
         int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
@@ -2091,7 +2093,8 @@ static void update_net_bb(const ClusterNetId net,
                   ts_bb_coord_new[net],
                   ts_layer_sink_pin_count[net],
                   pin_old_loc,
-                  pin_new_loc);
+                  pin_new_loc,
+                  src_pin);
     }
 }
 
@@ -3223,7 +3226,8 @@ static void update_bb(ClusterNetId net_id,
                       t_bb& bb_coord_new,
                       std::vector<int>& num_sink_pin_layer,
                       t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc) {
+                      t_physical_tile_loc pin_new_loc,
+                      bool src_pin) {
     /* Updates the bounding box of a net by storing its coordinates in    *
      * the bb_coord_new data structure and the number of blocks on each   *
      * edge in the bb_edge_new data structure.  This routine should only  *
@@ -3408,6 +3412,16 @@ static void update_bb(ClusterNetId net_id,
         bb_edge_new.ymax = curr_bb_edge->ymax;
     }
 
+    /* Now account for the layer motion. */
+    if (device_ctx.grid.get_num_layers() > 1) {
+        /* We need to update it only if multiple layers are available */
+        if (!src_pin) {
+            /* if src pin is being moved, we don't need to update this data structure */
+            num_sink_pin_layer[pin_old_loc.layer_num]--;
+            num_sink_pin_layer[pin_new_loc.layer_num]++;
+        }
+    }
+
     if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         bb_updated_before[net_id] = UPDATED_ONCE;
     }

From 8405bfa623affe7d37c530d303c78b4ef58ba94d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:56:16 -0400
Subject: [PATCH 217/257] add cube_bb under place context

---
 vpr/src/place/place.cpp | 44 +++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 7df8a67ca86..90288bcecc0 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -261,8 +261,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
                                              const t_placer_opts& placer_opts,
                                              const t_noc_opts& noc_opts,
                                              t_direct_inf* directs,
-                                             int num_directs,
-                                             const bool cube_bb);
+                                             int num_directs);
 
 static void alloc_and_load_try_swap_structs(const bool cube_bb);
 static void free_try_swap_structs();
@@ -303,14 +302,12 @@ static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
                         const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts,
-                        const bool cube_bb);
+                        const t_noc_opts& noc_opts);
 
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm,
-                                 const bool cube_bb);
+                                 const t_place_algorithm& place_algorithm);
 
 static int check_placement_consistency();
 static int check_block_placement_consistency();
@@ -579,7 +576,6 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
-    int num_layers = device_ctx.grid.get_num_layers();
 
     t_placer_costs costs(placer_opts.place_algorithm);
 
@@ -636,7 +632,7 @@ void try_place(const Netlist<>& net_list,
 
     g_vpr_ctx.mutable_placement().cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode,
                                                        device_ctx.rr_graph);
-    const auto& cube_bb = g_vpr_ctx.mutable_placement().cube_bb;
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
 
     VTR_LOG("\n");
     VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer"));
@@ -659,7 +655,7 @@ void try_place(const Netlist<>& net_list,
 
     init_chan(width_fac, chan_width_dist, graph_directionality);
 
-    alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs, cube_bb);
+    alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs);
 
     vtr::ScopedStartFinishTimer timer("Placement");
 
@@ -823,8 +819,7 @@ void try_place(const Netlist<>& net_list,
                 place_delay_model.get(),
                 placer_criticalities.get(),
                 placer_opts.place_algorithm,
-                noc_opts,
-                cube_bb);
+                noc_opts);
 
     //Initial pacement statistics
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
@@ -872,7 +867,7 @@ void try_place(const Netlist<>& net_list,
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place", 0,
                                                0);
         VTR_LOG("Saving initial placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str(), false);
+        print_place(nullptr, nullptr, filename.c_str());
     }
 
     first_move_lim = get_initial_move_lim(placer_opts, annealing_sched);
@@ -1107,7 +1102,7 @@ void try_place(const Netlist<>& net_list,
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
                                                state.num_temps + 1, 0);
         VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
-        print_place(nullptr, nullptr, filename.c_str(), false);
+        print_place(nullptr, nullptr, filename.c_str());
     }
 
     // TODO:
@@ -1128,8 +1123,7 @@ void try_place(const Netlist<>& net_list,
                 place_delay_model.get(),
                 placer_criticalities.get(),
                 placer_opts.place_algorithm,
-                noc_opts,
-                cube_bb);
+                noc_opts);
 
     //Some stats
     VTR_LOG("\n");
@@ -1808,7 +1802,7 @@ static e_move_result try_swap(const t_annealing_state* state,
 
             /* Update net cost functions and reset flags. */
             update_move_nets(num_nets_affected,
-                             cube_bb);
+                             g_vpr_ctx.placement().cube_bb);
 
             /* Update clb data structures since we kept the move. */
             commit_move_blocks(blocks_affected);
@@ -1911,7 +1905,7 @@ static e_move_result try_swap(const t_annealing_state* state,
 #if 0
     // Check that each accepted swap yields a valid placement. This will
     // greatly slow the placer, but can debug some issues.
-    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts, cube_bb);
+    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
 #endif
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
     return move_outcome;
@@ -2574,8 +2568,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
                                              const t_placer_opts& placer_opts,
                                              const t_noc_opts& noc_opts,
                                              t_direct_inf* directs,
-                                             int num_directs,
-                                             const bool cube_bb) {
+                                             int num_directs) {
     int max_pins_per_clb;
     unsigned int ipin;
 
@@ -2583,6 +2576,8 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.mutable_placement();
 
+    const auto& cube_bb = place_ctx.cube_bb;
+
     auto& p_timing_ctx = g_placer_ctx.mutable_timing();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
@@ -3890,8 +3885,7 @@ static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
                         const t_place_algorithm& place_algorithm,
-                        const t_noc_opts& noc_opts,
-                        bool const cube_bb) {
+                        const t_noc_opts& noc_opts) {
     /* Checks that the placement has not confused our data structures. *
      * i.e. the clb and block structures agree about the locations of  *
      * every block, blocks are in legal spots, etc.  Also recomputes   *
@@ -3902,8 +3896,7 @@ static void check_place(const t_placer_costs& costs,
 
     error += check_placement_consistency();
     error += check_placement_costs(costs, delay_model, criticalities,
-                                   place_algorithm,
-                                   cube_bb);
+                                   place_algorithm);
     error += check_placement_floorplanning();
 
     // check the NoC costs during placement if the user is using the NoC supported flow
@@ -3926,13 +3919,12 @@ static void check_place(const t_placer_costs& costs,
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 const t_place_algorithm& place_algorithm,
-                                 const bool cube_bb) {
+                                 const t_place_algorithm& place_algorithm) {
     int error = 0;
     double bb_cost_check;
     double timing_cost_check;
 
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
 
     if (cube_bb) {
         bb_cost_check = comp_bb_cost(CHECK);

From fb019b3d25e2b28c5573a7b0e06d7c63b7a770c6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 15:06:56 -0400
Subject: [PATCH 218/257] change some unnecessary assertion to assert_safe

---
 vpr/src/place/median_move_generator.cpp | 2 +-
 vpr/src/place/place.cpp                 | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index d9a2e9bd8c8..9b055d3342c 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -118,7 +118,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
             if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
-                VTR_ASSERT(layer_blk_cnt[from_layer] > 0);
+                VTR_ASSERT_SAFE(layer_blk_cnt[from_layer] > 0);
                 layer_blk_cnt[from_layer]--;
             }
         }
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 90288bcecc0..51339151744 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2633,7 +2633,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         place_move_ctx.bb_coords.resize(num_nets, t_bb());
         place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
     } else {
-        VTR_ASSERT(!cube_bb);
+        VTR_ASSERT_SAFE(!cube_bb);
         place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
@@ -2708,7 +2708,7 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
         ts_bb_edge_new.resize(num_nets, t_bb());
         ts_bb_coord_new.resize(num_nets, t_bb());
     } else {
-        VTR_ASSERT(!cube_bb);
+        VTR_ASSERT_SAFE(!cube_bb);
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }

From d916420a7053476b19bf82de3162044c5a941ddf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 10:59:54 -0400
Subject: [PATCH 219/257] use global cube_bb to check whether a union of bbs
 needs to be used

---
 vpr/src/place/median_move_generator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 9b055d3342c..9b4b08f3db9 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -73,7 +73,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 continue;
         } else {
             t_bb union_bb;
-            if (is_multi_layer) {
+            if (!g_vpr_ctx.placement().cube_bb) {
                 union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
             }
 
@@ -281,7 +281,7 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
     t_bb union_bb_edge;
     t_bb union_bb;
-    if (is_multi_layer) {
+    if (!g_vpr_ctx.placement().cube_bb) {
         std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_num_on_edges[net_id],
                                                              place_move_ctx.layer_bb_coords[net_id]);
     }

From 5d9a116d5ef583d89b8bcabfa1978a03896370f4 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 11:02:40 -0400
Subject: [PATCH 220/257] use cube bb to select the correct bb

---
 vpr/src/place/median_move_generator.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 9b4b08f3db9..5b7dd3f57a7 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -73,11 +73,12 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 continue;
         } else {
             t_bb union_bb;
-            if (!g_vpr_ctx.placement().cube_bb) {
+            const bool& cube_bb = g_vpr_ctx.placement().cube_bb;
+            if (!cube_bb) {
                 union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
             }
 
-            const auto& net_bb_coords = is_multi_layer ? union_bb : place_move_ctx.bb_coords[net_id];
+            const auto& net_bb_coords = cube_bb ? place_move_ctx.bb_coords[net_id]: union_bb;
             //use the incremental update of the bb
             bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
             pnum = tile_pin_index(pin_id);
@@ -281,14 +282,15 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
 
     t_bb union_bb_edge;
     t_bb union_bb;
-    if (!g_vpr_ctx.placement().cube_bb) {
+    const bool& cube_bb = g_vpr_ctx.placement().cube_bb;
+    if (!cube_bb) {
         std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_num_on_edges[net_id],
                                                              place_move_ctx.layer_bb_coords[net_id]);
     }
 
     /* The net had NOT been updated before, could use the old values */
-    const t_bb& curr_bb_edge = is_multi_layer ? union_bb_edge : place_move_ctx.bb_num_on_edges[net_id];
-    const t_bb& curr_bb_coord = is_multi_layer ? union_bb : place_move_ctx.bb_coords[net_id];
+    const t_bb& curr_bb_edge = cube_bb ? place_move_ctx.bb_num_on_edges[net_id]: union_bb_edge;
+    const t_bb& curr_bb_coord = cube_bb ? place_move_ctx.bb_coords[net_id]: union_bb;
 
     /* Check if I can update the bounding box incrementally. */
 

From 3d536c9cde6c8b4b04664a505702636f9f92cd6f Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 26 Oct 2023 14:44:20 -0400
Subject: [PATCH 221/257] initialize num_sink_pin_layer for all layer in
 upbdate_bb

---
 vpr/src/place/place.cpp | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 51339151744..8045ad37738 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -359,7 +359,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::vector<int>& num_sink_pin_layer,
+                      std::vector<int>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin);
@@ -3219,7 +3219,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::vector<int>& num_sink_pin_layer,
+                      std::vector<int>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin) {
@@ -3238,9 +3238,13 @@ static void update_bb(ClusterNetId net_id,
     //TODO: account for multiple physical pin instances per logical pin
     const t_bb *curr_bb_edge, *curr_bb_coord;
 
+    const std::vector<int>* curr_num_sink_pin_layer;
+
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
+    const int num_layers = device_ctx.grid.get_num_layers();
+
     pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
@@ -3254,11 +3258,13 @@ static void update_bb(ClusterNetId net_id,
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
         curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        curr_num_sink_pin_layer = &place_move_ctx.num_sink_pin_layer[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
         curr_bb_coord = &bb_coord_new;
         curr_bb_edge = &bb_edge_new;
+        curr_num_sink_pin_layer = &num_sink_pin_layer_new;
     }
 
     /* Check if I can update the bounding box incrementally. */
@@ -3269,7 +3275,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
             if (curr_bb_edge->xmax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3301,7 +3307,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
             if (curr_bb_edge->xmin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3342,7 +3348,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
             if (curr_bb_edge->ymax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3374,7 +3380,7 @@ static void update_bb(ClusterNetId net_id,
 
         if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
             if (curr_bb_edge->ymin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer);
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
                 bb_updated_before[net_id] = GOT_FROM_SCRATCH;
                 return;
             } else {
@@ -3408,12 +3414,15 @@ static void update_bb(ClusterNetId net_id,
     }
 
     /* Now account for the layer motion. */
-    if (device_ctx.grid.get_num_layers() > 1) {
+    if (num_layers > 1) {
         /* We need to update it only if multiple layers are available */
+        num_sink_pin_layer_new = (*curr_num_sink_pin_layer);
         if (!src_pin) {
             /* if src pin is being moved, we don't need to update this data structure */
-            num_sink_pin_layer[pin_old_loc.layer_num]--;
-            num_sink_pin_layer[pin_new_loc.layer_num]++;
+            if (pin_old_loc.layer_num != pin_new_loc.layer_num) {
+                num_sink_pin_layer_new[pin_old_loc.layer_num] = (*curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1;
+                num_sink_pin_layer_new[pin_new_loc.layer_num] = (*curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
+            }
         }
     }
 

From 4e63b69b11ab117b44e26d80311e7f4b6a29faaf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 16:17:15 -0400
Subject: [PATCH 222/257] add online doc for added parameters

---
 doc/src/vpr/command_line_usage.rst        | 16 ++++++++++++++++
 libs/librrgraph/src/base/rr_graph_utils.h |  7 +++++++
 vpr/src/base/read_place.cpp               |  3 ---
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
index 0c110d01ee9..c0fa9b60456 100644
--- a/doc/src/vpr/command_line_usage.rst
+++ b/doc/src/vpr/command_line_usage.rst
@@ -381,6 +381,9 @@ Use the options below to override this default naming behaviour.
 .. option:: --write_placement_delay_lookup <file>
 
     Writes the placement delay lookup to the specified file.
+.. option:: --write_initial_place_file <file>
+
+    Writes out the the placement chosen by the initial placement algorithm to the specified file
 
 .. option:: --outfile_prefix <string>
 
@@ -769,6 +772,19 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe
 
     **Default:**  ``criticality_timing``
 
+.. option:: --place_bounding_box_mode {auto_bb | cube_bb | per_layer_bb}
+
+    Specifies the type of the wirelength estimator used during placement. For single layer architectures, cube_bb (a 3D bounding box) is always used (and is the same as per_layer_bb).
+    For 3D architectures, cube_bb is appropriate if you can cross between layers at switch blocks, while if you can only cross between layers at output pins per_layer_bb (one bouding box per layer) is more accurate and appropriate.
+
+    ``auto_bb``: The bounding box type is determined automatically based on the cross-layer connections.
+
+    ``cube_bb``: ``cube_bb`` bounding box is used to estimate the wirelength.
+
+    ``per_layer_bb``: ``per_layer_bb`` bounding box is used to estimate the wirelength
+
+    **Default:** ``auto_bb``
+
 .. option:: --place_chan_width <int>
 
     Tells VPR how many tracks a channel of relative width 1 is expected to need to complete routing of this circuit.
diff --git a/libs/librrgraph/src/base/rr_graph_utils.h b/libs/librrgraph/src/base/rr_graph_utils.h
index aeff17b8d5d..6ef6148c1fa 100644
--- a/libs/librrgraph/src/base/rr_graph_utils.h
+++ b/libs/librrgraph/src/base/rr_graph_utils.h
@@ -48,5 +48,12 @@ vtr::vector<RRNodeId, std::vector<RREdgeId>> get_fan_in_list(const RRGraphView&
 int seg_index_of_cblock(const RRGraphView& rr_graph, t_rr_type from_rr_type, int to_node);
 int seg_index_of_sblock(const RRGraphView& rr_graph, int from_node, int to_node);
 
+/**
+ * @brief This function checks whether all inter-die connections are form OPINs. Return "true"
+ * if that is the case. Can be used for multiple purposes. For example, to determine which type of bounding
+ * box to be used to estimate the wire-length of a net.
+ * @param rr_graph
+ * @return
+ */
 bool inter_layer_connections_limited_to_opin(const RRGraphView& rr_graph);
 #endif
\ No newline at end of file
diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index c1a1862ba86..17c40e4781e 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -306,9 +306,6 @@ void read_place_body(std::ifstream& placement_file,
  *
  * The architecture and netlist files used to generate this placement are recorded
  * in the file to avoid loading a placement with the wrong support file later.
- *
- * The is_initial_place bool indicates whether this is the initial placement or not. It is used to determine the
- * suffix of the output file name.
  */
 void print_place(const char* net_file,
                  const char* net_id,

From be6acd24d438ea4f8dbec69c7aa14cd62d21f282 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 16:58:55 -0400
Subject: [PATCH 223/257] assert if centriod layer is not valid

---
 vpr/src/place/centroid_move_generator.cpp          | 3 ++-
 vpr/src/place/weighted_centroid_move_generator.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 4fbfc4a7521..730b9872c29 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -38,8 +38,9 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
 
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
+    VTR_ASSERT_SAFE(centroid.layer >= 0);
 
-    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
+    to.layer = centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index cba14eb0869..74299e2384c 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -38,8 +38,9 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
 
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
+    VTR_ASSERT_SAFE(centroid.layer >= 0);
 
-    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
+    to.layer = centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From 36a7b04b915a78a733026e6ce9f2ee55d2e7e410 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 17:31:20 -0400
Subject: [PATCH 224/257] remove the extra line to initialize layer in rl moves

---
 vpr/src/place/centroid_move_generator.cpp       |  1 -
 .../place/feasible_region_move_generator.cpp    |  1 +
 vpr/src/place/median_move_generator.cpp         | 17 ++++++++++++-----
 .../place/weighted_centroid_move_generator.cpp  |  1 -
 .../place/weighted_median_move_generator.cpp    | 10 ++++++----
 5 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 730b9872c29..309d4f91855 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -34,7 +34,6 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
                                     placer_opts.place_dm_rlim};
 
     t_pl_loc to, centroid;
-    to.layer = from.layer;
 
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index e57f660ab72..995c2a37836 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -33,6 +33,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the feasible region */
     t_pl_loc to;
+    // Currently, we don't change the layer for this move
     to.layer = from.layer;
     int ipin;
     ClusterBlockId bnum;
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 5b7dd3f57a7..0ca201da05d 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -39,7 +39,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
 
     /* Calculate the median region */
     t_pl_loc to;
-    to.layer = from_layer;
 
     t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN);
     t_bb limit_coords;
@@ -118,7 +117,9 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
-            if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
+            // If the pin under consideration is of type sink, it shouldn't be added to layer_blk_cnt since the block
+            // is moving
+            if(cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK) {
                 VTR_ASSERT_SAFE(layer_blk_cnt[from_layer] > 0);
                 layer_blk_cnt[from_layer]--;
             }
@@ -149,10 +150,16 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     t_pl_loc median_point;
     median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2;
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
-    // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
-    median_point.layer = from.layer;
+
+    // Before calling find_to_loc_centroid a valid layer should be assigned to "to" location. If there are multiple layers, the layer
+    // with highest number of sinks will be used. Otherwise, the same layer as "from" loc is assigned.
     if (is_multi_layer) {
-        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+        int layer_num = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+        median_point.layer = layer_num;
+        to.layer = layer_num;
+    } else {
+        median_point.layer = from.layer;
+        to.layer = from.layer;
     }
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 74299e2384c..91ccfa0bc76 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -34,7 +34,6 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
                                     placer_opts.place_dm_rlim};
 
     t_pl_loc to, centroid;
-    to.layer = from.layer;
 
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 18ea19068e5..bf6923bec4c 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -37,7 +37,6 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     /* Calculate the Edge weighted median region */
     t_pl_loc to;
-    to.layer = from.layer;
 
     t_bb_cost coords;
     t_bb limit_coords;
@@ -120,11 +119,14 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     t_pl_loc w_median_point;
     w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2;
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
-    // TODO: Currently, we don't move blocks between different types of layers
-    w_median_point.layer = from.layer;
 
     if (is_multi_layer) {
-        to.layer = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+        int layer_num = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
+        w_median_point.layer = layer_num;
+        to.layer = layer_num;
+    } else {
+        w_median_point.layer = from.layer;
+        to.layer = from.layer;
     }
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;

From f5f0abe2f6ee43f15f9882d3a3913023362afb7d Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 18:01:42 -0400
Subject: [PATCH 225/257] comment on cube bb and per-layer bb

---
 vpr/src/place/median_move_generator.cpp |  4 +++-
 vpr/src/place/move_utils.cpp            |  2 ++
 vpr/src/place/move_utils.h              | 11 +++++++++--
 vpr/src/place/place.cpp                 | 11 +++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 0ca201da05d..c335c8faeef 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -295,7 +295,9 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
                                                              place_move_ctx.layer_bb_coords[net_id]);
     }
 
-    /* The net had NOT been updated before, could use the old values */
+    /* In this move, we use a 3D bounding box. Thus, if per-layer BB is used by placer, we need to take a union of BBs and use that for the rest of
+     * operations in this move
+     */
     const t_bb& curr_bb_edge = cube_bb ? place_move_ctx.bb_num_on_edges[net_id]: union_bb_edge;
     const t_bb& curr_bb_coord = cube_bb ? place_move_ctx.bb_coords[net_id]: union_bb;
 
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 11e72d709e7..ca10cfc500b 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1333,6 +1333,8 @@ int get_random_layer(t_logical_block_type_ptr logical_block) {
 t_bb union_2d_bb(const std::vector<t_2D_bb>& bb_vec) {
     t_bb merged_bb;
 
+    // Not all 2d_bbs are valid. Thus, if one of the coordinates in the 2D_bb is not valid (equal to OPEN),
+    // we need to skip it.
     for (const auto& layer_bb : bb_vec) {
         if (layer_bb.xmin == OPEN) {
             VTR_ASSERT_SAFE(layer_bb.xmax == OPEN);
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index b9f5873425a..50bdbd76998 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -275,7 +275,6 @@ std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_bl
  * @param compressed_block_grid
  * @param compressed_locs
  * @param rlim
- * @param num_layers
  * @return A compressed search range for each layer
  */
 t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
@@ -293,7 +292,6 @@ t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& comp
  * @param from_compressed_loc
  * @param target_compressed_loc
  * @param rlim
- * @param num_layers
  * @return
  */
 t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
@@ -326,6 +324,15 @@ bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr t
 
 std::string e_move_result_to_string(e_move_result move_outcome);
 
+/**
+ * @brif Iterate over all layers that have a physical tile at the x-y location specified by "loc" that can accomodate "logical_block".
+ * If the location in the layer specified by "layer_num" is empty, return that layer. Otherwise,
+ * return a layer that is not occupied at that location. If there isn't any, again, return the layer of loc.
+ *
+ * @param logical_block
+ * @param loc
+ * @return
+ */
 int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc);
 
 int get_random_layer(t_logical_block_type_ptr logical_block);
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8045ad37738..201f0a83271 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -254,6 +254,12 @@ std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
 void print_clb_placement(const char* fname);
 #endif
 
+/**
+ * @brief determine the type of the bounding box used by the placer to predict the wirelength
+ *
+ * @param place_bb_mode The bounding box mode passed by the CLI
+ * @param rr_graph The routing resource graph
+ */
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
                        const RRGraphView& rr_graph);
 
@@ -1916,19 +1922,24 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     bool cube_bb;
     const int number_layers = g_vpr_ctx.device().grid.get_num_layers();
 
+    // If the FPGA has only layer, then we can only use cube bounding box
     if (number_layers == 1) {
         cube_bb = true;
     } else {
         VTR_ASSERT(number_layers > 1);
         if (place_bb_mode == AUTO_BB) {
+            // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not
+            // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen.
             if (inter_layer_connections_limited_to_opin(rr_graph)) {
                 cube_bb = false;
             } else {
                 cube_bb = true;
             }
         } else if (place_bb_mode == CUBE_BB) {
+            // The user has specifically asked for CUBE_BB
             cube_bb = true;
         } else {
+            // The user has specifically asked for PER_LAYER_BB
             VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB);
             cube_bb = false;
         }

From cea3c3e82570f9c4aa9d501f80a60d0bd9391ef9 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 27 Oct 2023 21:06:05 -0400
Subject: [PATCH 226/257] remove redundant resizings

---
 vpr/src/place/place.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 201f0a83271..8396ad97d66 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2841,9 +2841,9 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     num_on_edges.ymax = ymax_edge;
 }
 
-/* This routine finds the bounding box of each net from scratch (i.e.   *
- * from only the block location information).  It updates both the       *
- * coordinate and number of pins on each edge information.  It           *
+/* This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e.   *
+ * from only the block location information).  It updates the       *
+ * coordinate, number of pins on each edge information, and the number of sinks on each layer.  It           *
  * should only be called when the bounding box information is not valid. */
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
@@ -2851,9 +2851,6 @@ static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<int>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
-    num_on_edges.resize(num_layers, t_2D_bb());
-    coords.resize(num_layers, t_2D_bb());
-    layer_pin_sink_count.resize(num_layers, 0);
     std::vector<int> xmin(num_layers, OPEN);
     std::vector<int> xmax(num_layers, OPEN);
     std::vector<int> ymin(num_layers, OPEN);

From 84d512d05909987f1c0157919c860d7ae0155cdf Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gamil.com>
Date: Tue, 31 Oct 2023 07:25:09 -0400
Subject: [PATCH 227/257] comment on cross-layer link delay field

---
 vpr/src/place/place_delay_model.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index 2e2574904bc..b10eae12204 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -144,6 +144,7 @@ class OverrideDelayModel : public PlaceDelayModel {
 
   private:
     std::unique_ptr<DeltaDelayModel> base_delay_model_;
+    /* Minimum delay of cross-layer connections */
     float cross_layer_delay_;
     bool is_flat_;
 

From 2d4ab8f8e8a72b6943d64a1a661b3bc10771e9bc Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 09:15:19 -0400
Subject: [PATCH 228/257] remove an unused variable

---
 vpr/src/place/median_move_generator.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index c335c8faeef..df89e96e089 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -280,8 +280,6 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
-    bool is_multi_layer = (device_ctx.grid.get_num_layers() > 1);
-
     xnew = std::max(std::min<int>(xnew, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
     ynew = std::max(std::min<int>(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     xold = std::max(std::min<int>(xold, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels

From d49596261f45ee3fca023dcffb3fad48d15d24bb Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 09:22:57 -0400
Subject: [PATCH 229/257] remove layer parameter from get_bb_incrementally

---
 vpr/src/place/median_move_generator.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index df89e96e089..c7740865372 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -5,7 +5,7 @@
 #include "placer_globals.h"
 #include "move_utils.h"
 
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int layer);
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew);
 
 static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net);
 
@@ -86,7 +86,6 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             yold = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum];
             xold = std::max(std::min(xold, (int)device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
             yold = std::max(std::min(yold, (int)device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-            int block_layer = place_ctx.block_locs[bnum].loc.layer;
 
             //To calulate the bb incrementally while excluding the moving block
             //assume that the moving block is moved to a non-critical coord of the bb
@@ -102,7 +101,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 ynew = net_bb_coords.ymin;
             }
 
-            if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew, block_layer)) {
+            if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew)) {
                 get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net);
                 if (skip_net)
                     continue;
@@ -274,7 +273,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_co
  * the pins always lie on the outside of the bounding box.            *
  * The x and y coordinates are the pin's x and y coordinates.         */
 /* IO blocks are considered to be one cell in for simplicity.         */
-static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew, int /* layer */) {
+static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();

From 1eda7f3229be86947bbc52c3d5e11f33933dc757 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 09:47:35 -0400
Subject: [PATCH 230/257] comment on union_2d_bb functions

---
 vpr/src/place/move_utils.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 50bdbd76998..f9369acd4f7 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -337,8 +337,21 @@ int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc)
 
 int get_random_layer(t_logical_block_type_ptr logical_block);
 
+/**
+ * @brief Iterate over all layers and get the maximum x and y over that layers that have a valid value. set the layer min and max
+ * based on the layers that have a valid BB.
+ * @param tbb_vec
+ * @return 3D bounding box
+ */
 t_bb union_2d_bb(const std::vector<t_2D_bb>& tbb_vec);
 
+/**
+ * @brief Iterate over all layers and get the maximum x and y over that layers that have a valid value. Create the "num_edge" in a similar way. This data structure
+ * stores how many blocks are on each edge of the BB. set the layer min and max based on the layers that have a valid BB.
+ * @param num_edge_vec
+ * @param bb_vec
+ * @return num_edge, 3D bb
+ */
 std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                                        const std::vector<t_2D_bb>& bb_vec);
 

From 1256995a2ba6c9165c7c98a895e8710c5c8d3f87 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 10:37:30 -0400
Subject: [PATCH 231/257] set bb_coord_new to zero if there isn't any sink on
 that layer

---
 vpr/src/place/place.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 8396ad97d66..22816e8d128 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3216,11 +3216,18 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
      * clip to 1 in both directions as well (since minimum channel index *
      * is 0).  See route_common.cpp for a channel diagram.               */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
         bb_coord_new[layer_num].layer_num = layer_num;
+        if (num_sink_layer[layer_num] == 0) {
+            bb_coord_new[layer_num].xmin = OPEN;
+            bb_coord_new[layer_num].ymin = OPEN;
+            bb_coord_new[layer_num].xmax = OPEN;
+            bb_coord_new[layer_num].ymax = OPEN;
+        } else {
+            bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+            bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+            bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+            bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        }
     }
 }
 

From b08492ae3a863a31e64032bc00d800236d3fc4ed Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 10:45:29 -0400
Subject: [PATCH 232/257] initialize xmin,etc with src locs

---
 vpr/src/place/place.cpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 22816e8d128..a52572d312c 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3161,10 +3161,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
     num_sink_layer = std::vector<int>(num_layers, 0);
-    std::vector<int> xmin(num_layers, OPEN);
-    std::vector<int> ymin(num_layers, OPEN);
-    std::vector<int> xmax(num_layers, OPEN);
-    std::vector<int> ymax(num_layers, OPEN);
+
     int pnum;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -3178,12 +3175,10 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
     int src_y = place_ctx.block_locs[bnum].loc.y
                 + physical_tile_type(bnum)->pin_height_offset[pnum];
 
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        xmin[layer_num] = src_x;
-        ymin[layer_num] = src_y;
-        xmax[layer_num] = src_x;
-        ymax[layer_num] = src_y;
-    }
+    std::vector<int> xmin(num_layers, src_x);
+    std::vector<int> ymin(num_layers, src_y);
+    std::vector<int> xmax(num_layers, src_x);
+    std::vector<int> ymax(num_layers, src_y);
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);

From 10990d13a6b32a9aa89084dba7c0062c3bd832a2 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 11:07:42 -0400
Subject: [PATCH 233/257] remove the multi_layer parameter from
 SimpleRLMoveGenerator constructor

---
 vpr/src/place/RL_agent_util.cpp         | 9 ++++-----
 vpr/src/place/simpleRL_move_generator.h | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index a2177d4ec5b..424e6076e5d 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -2,7 +2,6 @@
 #include "manual_move_generator.h"
 
 void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std::unique_ptr<MoveGenerator>& move_generator2, const t_placer_opts& placer_opts, int move_lim) {
-    bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
 
     if (placer_opts.RL_agent_placement == false) {
         if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -62,13 +61,13 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
                                                                             placer_opts.place_agent_epsilon);
             }
             karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1, is_multi_layer);
+            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1);
             //agent's 2nd state
             karmed_bandit_agent2 = std::make_unique<EpsilonGreedyAgent>(num_2nd_state_avail_moves,
                                                                         e_agent_space::MOVE_TYPE,
                                                                         placer_opts.place_agent_epsilon);
             karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2, is_multi_layer);
+            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2);
         } else {
             std::unique_ptr<SoftmaxAgent> karmed_bandit_agent1, karmed_bandit_agent2;
             //agent's 1st state
@@ -82,12 +81,12 @@ void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std:
                                                                       e_agent_space::MOVE_TYPE);
             }
             karmed_bandit_agent1->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1, is_multi_layer);
+            move_generator = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent1);
             //agent's 2nd state
             karmed_bandit_agent2 = std::make_unique<SoftmaxAgent>(num_2nd_state_avail_moves,
                                                                   e_agent_space::MOVE_TYPE);
             karmed_bandit_agent2->set_step(placer_opts.place_agent_gamma, move_lim);
-            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2, is_multi_layer);
+            move_generator2 = std::make_unique<SimpleRLMoveGenerator>(karmed_bandit_agent2);
         }
     }
 }
diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index 347714010ea..e695529d084 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -217,7 +217,7 @@ class SimpleRLMoveGenerator : public MoveGenerator {
      */
     template<class T,
              class = typename std::enable_if<std::is_same<T, EpsilonGreedyAgent>::value || std::is_same<T, SoftmaxAgent>::value>::type>
-    explicit SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool is_multi_layer);
+    explicit SimpleRLMoveGenerator(std::unique_ptr<T>& agent);
 
     // Updates affected_blocks with the proposed move, while respecting the current rlim
     e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* criticalities) override;
@@ -227,7 +227,7 @@ class SimpleRLMoveGenerator : public MoveGenerator {
 };
 
 template<class T, class>
-SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent, bool /*is_multi_layer*/) {
+SimpleRLMoveGenerator::SimpleRLMoveGenerator(std::unique_ptr<T>& agent) {
     avail_moves.resize((int)e_move_type::NUMBER_OF_AUTO_MOVES);
 
     avail_moves[(int)e_move_type::UNIFORM] = std::make_unique<UniformMoveGenerator>();

From b109b9f032ebe56bf1fd3345ef3e68d1e3f92964 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 11:08:31 -0400
Subject: [PATCH 234/257] remove uniform_inter_layer_move_generator

---
 .../uniform_inter_layer_move_generator.cpp    | 45 -------------------
 .../uniform_inter_layer_move_generator.h      | 18 --------
 2 files changed, 63 deletions(-)
 delete mode 100644 vpr/src/place/uniform_inter_layer_move_generator.cpp
 delete mode 100644 vpr/src/place/uniform_inter_layer_move_generator.h

diff --git a/vpr/src/place/uniform_inter_layer_move_generator.cpp b/vpr/src/place/uniform_inter_layer_move_generator.cpp
deleted file mode 100644
index e7b41f32acc..00000000000
--- a/vpr/src/place/uniform_inter_layer_move_generator.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "uniform_inter_layer_move_generator.h"
-#include "globals.h"
-#include "place_constraints.h"
-#include "move_utils.h"
-
-e_create_move UniformInterLayerMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float /*rlim*/, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) {
-    // If this moved is called, we know that there are at least two layers.
-    VTR_ASSERT(g_vpr_ctx.device().grid.get_num_layers() > 1);
-    //Find a movable block based on blk_type
-    ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, false, nullptr, nullptr);
-
-    if (!b_from) { //No movable block found
-        return e_create_move::ABORT;
-    }
-
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    t_pl_loc from = place_ctx.block_locs[b_from].loc;
-    auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
-    VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
-
-    const auto& block_compressed_grid = g_vpr_ctx.placement().compressed_block_grids[cluster_from_type->index];
-
-    const auto& compatible_layers = block_compressed_grid.get_layer_nums();
-
-    if (compatible_layers.size() < 2) {
-        return e_create_move::ABORT;
-    }
-
-    int to_layer = compatible_layers[vtr::irand((int)compatible_layers.size() - 1)];
-
-    t_pl_loc to = from;
-    to.layer = to_layer;
-
-    e_create_move create_move = ::create_move(blocks_affected, b_from, to);
-
-    //Check that all the blocks affected by the move would still be in a legal floorplan region after the swap
-    if (!floorplan_legal(blocks_affected)) {
-        return e_create_move::ABORT;
-    }
-
-    return create_move;
-}
\ No newline at end of file
diff --git a/vpr/src/place/uniform_inter_layer_move_generator.h b/vpr/src/place/uniform_inter_layer_move_generator.h
deleted file mode 100644
index 403fb97a67f..00000000000
--- a/vpr/src/place/uniform_inter_layer_move_generator.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H
-#define VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H
-
-#include "move_generator.h"
-#include "timing_place.h"
-
-/**
- * @brief Uniform inter-layer move generator
- *
- * randomly picks a from_block with equal probabilities for all blocks, and then moves it randomly within
- * a range limit centered on from_block in the compressed block grid space
- */
-
-class UniformInterLayerMoveGenerator : public MoveGenerator {
-    e_create_move propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& /*placer_opts*/, const PlacerCriticalities* /*criticalities*/) override;
-};
-
-#endif //VTR_UNIFORM_INTER_LAYER_MOVE_GENERATOR_H

From 93fb9d7bb27f223943136239f7285637f9c1b5f3 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 11:15:30 -0400
Subject: [PATCH 235/257] fix a bug - comment on weighted_move layer

---
 vpr/src/place/simpleRL_move_generator.h          | 1 -
 vpr/src/place/weighted_median_move_generator.cpp | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h
index e695529d084..de108313023 100644
--- a/vpr/src/place/simpleRL_move_generator.h
+++ b/vpr/src/place/simpleRL_move_generator.h
@@ -8,7 +8,6 @@
 #include "uniform_move_generator.h"
 #include "critical_uniform_move_generator.h"
 #include "centroid_move_generator.h"
-#include "uniform_inter_layer_move_generator.h"
 
 /**
  * @brief KArmedBanditAgent is the base class for RL agents that target the k-armed bandit problems
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index bf6923bec4c..0d40d6f2442 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -120,6 +120,8 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2;
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
 
+    // If multiple layers are available, we would choose the median layer, otherwise the same layer (layer #0) as the from_loc would be chosen
+    //#TODO: Since we are now only considering 2 layers, the layer with maximum number of sinks should be chosen. we need to update it to get the true median
     if (is_multi_layer) {
         int layer_num = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end()));
         w_median_point.layer = layer_num;

From 977d98e08db29391082161f19d7528d045748f86 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 11:40:13 -0400
Subject: [PATCH 236/257] comment on why we need to decreament number of sinks

---
 vpr/src/place/weighted_median_move_generator.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 0d40d6f2442..285a374945b 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -76,10 +76,12 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         place_move_ctx.X_coord.insert(place_move_ctx.X_coord.end(), ceil(coords.xmax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.xmax.edge);
         place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymin.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymin.edge);
         place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymax.edge);
+        // If multile layers are available, I need to keep track of how many sinks are in each layer.
         if (is_multi_layer) {
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
+            // If the pin under consideration if of type sink, it is counted in place_move_ctx.num_sink_pin_layer, and we don't want to consider the moving pins
             if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
                 VTR_ASSERT(layer_blk_cnt[from.layer] > 0);
                 layer_blk_cnt[from.layer]--;

From 015de7bb8f0f7d2cbb34c1b5406fc2fda5825f5c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 19:54:41 -0400
Subject: [PATCH 237/257] update has_path_to_sink to accomodate for sw block
 inter-layer connections

---
 vpr/src/route/connection_router.cpp | 20 ++++++--------------
 vpr/src/route/connection_router.h   |  3 +++
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index c87c1edf68c..4dd5bd653ac 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -8,27 +8,19 @@
 static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                                     const RRGraphView* rr_graph,
                                     RRNodeId from_node,
-                                    RRNodeId sink_node) {
-    // ASSUMPTION: Only OPINs can connect to other layers
+                                    RRNodeId sink_node,
+                                    bool is_inter_layer_opin_connection) {
 
     int sink_layer = rr_graph->node_layer(sink_node);
 
-    if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE) {
+    if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE || !is_inter_layer_opin_connection) {
         return true;
     } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY || rr_graph->node_type(from_node) == IPIN) {
         return false;
     } else {
-        VTR_ASSERT(rr_graph->node_type(from_node) == OPIN);
+        VTR_ASSERT(rr_graph->node_type(from_node) == OPIN && is_inter_layer_opin_connection);
         auto edges = rr_nodes.edge_range(from_node);
 
-        //        for (RREdgeId from_edge : edges) {
-        //            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
-        //            rr_nodes.prefetch_node(to_node);
-        //
-        //            int switch_idx = rr_nodes.edge_switch(from_edge);
-        //            VTR_PREFETCH(&rr_switch_inf_[switch_idx], 0, 0);
-        //        }
-
         for (RREdgeId from_edge : edges) {
             RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
             if (rr_graph->node_layer(to_node) == sink_layer) {
@@ -978,7 +970,7 @@ void ConnectionRouter<Heap>::add_route_tree_to_heap(
     /* Pre-order depth-first traversal */
     // IPINs and SINKS are not re_expanded
     if (rt_node.re_expand) {
-        if (target_node.is_valid() && !has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node))) {
+        if (target_node.is_valid() && !has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node), only_opin_inter_layer)) {
             return;
         }
         add_route_tree_node_to_heap(rt_node,
@@ -1134,7 +1126,7 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
                         continue;
                 }
 
-                if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), target_node)) {
+                if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), target_node, only_opin_inter_layer)) {
                     continue;
                 }
                 // Put the node onto the heap
diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h
index 5834e852409..093ab8fed83 100644
--- a/vpr/src/route/connection_router.h
+++ b/vpr/src/route/connection_router.h
@@ -47,6 +47,7 @@ class ConnectionRouter : public ConnectionRouterInterface {
         , router_debug_(false) {
         heap_.init_heap(grid);
         heap_.set_prune_limit(rr_nodes_.size(), kHeapPruneFactor * rr_nodes_.size());
+        only_opin_inter_layer = (grid.get_num_layers() > 1) && inter_layer_connections_limited_to_opin(*rr_graph);
     }
 
     // Clear's the modified list.  Should be called after reset_path_costs
@@ -286,6 +287,8 @@ class ConnectionRouter : public ConnectionRouterInterface {
     HeapImplementation heap_;
     bool router_debug_;
 
+    bool only_opin_inter_layer;
+
     // The path manager for RCV, keeps track of the route tree as a set, also manages the allocation of the heap types
     PathManager rcv_path_manager;
 };

From dca4108260b08b2aa1a9c06fab853d58ee739fb1 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 3 Nov 2023 19:58:47 -0400
Subject: [PATCH 238/257] forward declaration of static functions

---
 vpr/src/route/connection_router.cpp | 162 +++++++++++++++-------------
 1 file changed, 89 insertions(+), 73 deletions(-)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 4dd5bd653ac..f9b796eed7b 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -5,84 +5,21 @@
 #include "bucket.h"
 #include "rr_graph_fwd.h"
 
-static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
-                                    const RRGraphView* rr_graph,
-                                    RRNodeId from_node,
-                                    RRNodeId sink_node,
-                                    bool is_inter_layer_opin_connection) {
+static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
+                             const RRGraphView* rr_graph,
+                             RRNodeId from_node,
+                             RRNodeId sink_node,
+                             bool is_inter_layer_opin_connection);
 
-    int sink_layer = rr_graph->node_layer(sink_node);
+static bool relevant_node_to_target(const RRGraphView* rr_graph,
+                                    RRNodeId node_to_add,
+                                    RRNodeId target_node);
 
-    if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE || !is_inter_layer_opin_connection) {
-        return true;
-    } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY || rr_graph->node_type(from_node) == IPIN) {
-        return false;
-    } else {
-        VTR_ASSERT(rr_graph->node_type(from_node) == OPIN && is_inter_layer_opin_connection);
-        auto edges = rr_nodes.edge_range(from_node);
-
-        for (RREdgeId from_edge : edges) {
-            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
-            if (rr_graph->node_layer(to_node) == sink_layer) {
-                return true;
-            }
-        }
-        return false;
-    }
-}
-
-static inline bool relevant_node_to_target(const RRGraphView* rr_graph,
-                                           RRNodeId node_to_add,
-                                           RRNodeId target_node) {
-    VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK);
-    auto node_to_add_type = rr_graph->node_type(node_to_add);
-    if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) {
-        return true;
-    } else if (node_in_same_physical_tile(node_to_add, target_node)) {
-        VTR_ASSERT(node_to_add_type == IPIN);
-        return true;
-    }
-    return false;
-}
-
-inline void update_router_stats(const DeviceContext& device_ctx,
+static void update_router_stats(const DeviceContext& device_ctx,
                                 const RRGraphView* rr_graph,
                                 RouterStats* router_stats,
                                 RRNodeId rr_node_id,
-                                bool is_push) {
-    if (is_push) {
-        router_stats->heap_pushes++;
-    } else {
-        router_stats->heap_pops++;
-    }
-
-    auto node_type = rr_graph->node_type(rr_node_id);
-    VTR_ASSERT(node_type != NUM_RR_TYPES);
-    t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id),
-                                                                                rr_graph->node_ylow(rr_node_id),
-                                                                                rr_graph->node_layer(rr_node_id)});
-
-    if (is_inter_cluster_node(physical_type,
-                              node_type,
-                              rr_graph->node_ptc_num(rr_node_id))) {
-        if (is_push) {
-            router_stats->inter_cluster_node_pushes++;
-            router_stats->inter_cluster_node_type_cnt_pushes[node_type]++;
-        } else {
-            router_stats->inter_cluster_node_pops++;
-            router_stats->inter_cluster_node_type_cnt_pops[node_type]++;
-        }
-
-    } else {
-        if (is_push) {
-            router_stats->intra_cluster_node_pushes++;
-            router_stats->intra_cluster_node_type_cnt_pushes[node_type]++;
-        } else {
-            router_stats->intra_cluster_node_pops++;
-            router_stats->intra_cluster_node_type_cnt_pops[node_type]++;
-        }
-    }
-}
+                                bool is_push);
 
 /** return tuple <found_path, retry_with_full_bb, cheapest> */
 template<typename Heap>
@@ -1175,6 +1112,85 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
     return bounding_box;
 }
 
+static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
+                                    const RRGraphView* rr_graph,
+                                    RRNodeId from_node,
+                                    RRNodeId sink_node,
+                                    bool is_inter_layer_opin_connection) {
+
+    int sink_layer = rr_graph->node_layer(sink_node);
+
+    if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE || !is_inter_layer_opin_connection) {
+        return true;
+    } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY || rr_graph->node_type(from_node) == IPIN) {
+        return false;
+    } else {
+        VTR_ASSERT(rr_graph->node_type(from_node) == OPIN && is_inter_layer_opin_connection);
+        auto edges = rr_nodes.edge_range(from_node);
+
+        for (RREdgeId from_edge : edges) {
+            RRNodeId to_node = rr_nodes.edge_sink_node(from_edge);
+            if (rr_graph->node_layer(to_node) == sink_layer) {
+                return true;
+            }
+        }
+        return false;
+    }
+}
+
+static inline bool relevant_node_to_target(const RRGraphView* rr_graph,
+                                           RRNodeId node_to_add,
+                                           RRNodeId target_node) {
+    VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK);
+    auto node_to_add_type = rr_graph->node_type(node_to_add);
+    if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) {
+        return true;
+    } else if (node_in_same_physical_tile(node_to_add, target_node)) {
+        VTR_ASSERT(node_to_add_type == IPIN);
+        return true;
+    }
+    return false;
+}
+
+static inline void update_router_stats(const DeviceContext& device_ctx,
+                                       const RRGraphView* rr_graph,
+                                       RouterStats* router_stats,
+                                       RRNodeId rr_node_id,
+                                       bool is_push) {
+    if (is_push) {
+        router_stats->heap_pushes++;
+    } else {
+        router_stats->heap_pops++;
+    }
+
+    auto node_type = rr_graph->node_type(rr_node_id);
+    VTR_ASSERT(node_type != NUM_RR_TYPES);
+    t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id),
+                                                                                rr_graph->node_ylow(rr_node_id),
+                                                                                rr_graph->node_layer(rr_node_id)});
+
+    if (is_inter_cluster_node(physical_type,
+                              node_type,
+                              rr_graph->node_ptc_num(rr_node_id))) {
+        if (is_push) {
+            router_stats->inter_cluster_node_pushes++;
+            router_stats->inter_cluster_node_type_cnt_pushes[node_type]++;
+        } else {
+            router_stats->inter_cluster_node_pops++;
+            router_stats->inter_cluster_node_type_cnt_pops[node_type]++;
+        }
+
+    } else {
+        if (is_push) {
+            router_stats->intra_cluster_node_pushes++;
+            router_stats->intra_cluster_node_type_cnt_pushes[node_type]++;
+        } else {
+            router_stats->intra_cluster_node_pops++;
+            router_stats->intra_cluster_node_type_cnt_pops[node_type]++;
+        }
+    }
+}
+
 std::unique_ptr<ConnectionRouterInterface> make_connection_router(e_heap_type heap_type,
                                                                   const DeviceGrid& grid,
                                                                   const RouterLookahead& router_lookahead,

From 0a1c49920085c8ccb22b7ece59652cac23f8b9db Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 11:06:29 -0400
Subject: [PATCH 239/257] comment on has_path_to_sink

---
 vpr/src/route/connection_router.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index f9b796eed7b..4780f2204f3 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -5,6 +5,16 @@
 #include "bucket.h"
 #include "rr_graph_fwd.h"
 
+/**
+ * @brief This function is relevant when the architecture is 3D. If inter-layer connections are only from OPINs (determine by is_inter_layer_opin_connection),
+ * then nodes (other that OPINs) which are on the other layer than sink's layer, don't need to be pushed back to the heap.
+ * @param rr_nodes
+ * @param rr_graph
+ * @param from_node
+ * @param sink_node
+ * @param is_inter_layer_opin_connection It is true if the architecture is 3D and inter-layer connections are only from OPINs.
+ * @return
+ */
 static bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                              const RRGraphView* rr_graph,
                              RRNodeId from_node,

From b8617f4fa989f579aa7b51afa8f0292dcbd94a80 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 11:10:03 -0400
Subject: [PATCH 240/257] comment on the case that BB should cover all layers

---
 vpr/src/route/router_delay_profiling.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index 3add7d02962..eac8fdf28c4 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -58,6 +58,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node,
     bounding_box.xmax = device_ctx.grid.width() + 1;
     bounding_box.ymin = 0;
     bounding_box.ymax = device_ctx.grid.height() + 1;
+    // If layer num is not specified, it means the BB should cover all layers
     if (layer_num == OPEN) {
         bounding_box.layer_min = 0;
         bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1;

From 78be619cf12b4d9291f061ed30dc22cb7482e85c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 11:47:56 -0400
Subject: [PATCH 241/257] comment on router delay profiler calculate_delay

---
 vpr/src/route/router_delay_profiling.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index 3540d8d29ca..88ce11605e5 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -13,6 +13,19 @@ class RouterDelayProfiler {
     RouterDelayProfiler(const Netlist<>& net_list,
                         const RouterLookahead* lookahead,
                         bool is_flat);
+
+    /**
+     * @brief Returns true as long as found some way to hook up this net, even if that
+     * way resulted in overuse of resources (congestion).  If there is no way
+     * to route this net, even ignoring congestion, it returns false.  In this
+     * case the rr_graph is disconnected and you can give up.
+     * @param source_node
+     * @param sink_node
+     * @param router_opts 
+     * @param net_delay
+     * @param layer_num
+     * @return
+     */
     bool calculate_delay(RRNodeId source_node,
                          RRNodeId sink_node,
                          const t_router_opts& router_opts,

From 8135d805a0067fe9158227225293a00192d77e5b Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 16:44:38 -0400
Subject: [PATCH 242/257] comment on get_cost_from_src_opin

---
 vpr/src/route/router_delay_profiling.h | 2 +-
 vpr/src/route/router_lookahead_map.cpp | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index 88ce11605e5..11d8eb25f1d 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -21,7 +21,7 @@ class RouterDelayProfiler {
      * case the rr_graph is disconnected and you can give up.
      * @param source_node
      * @param sink_node
-     * @param router_opts 
+     * @param router_opts
      * @param net_delay
      * @param layer_num
      * @return
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index ffb58ad95a6..5f5f8e886f2 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -255,6 +255,14 @@ static void store_min_cost_to_sinks(std::unordered_map<int, std::unordered_map<i
  */
 static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opin_global_cost_map);
 
+/**
+ * @brief Iterate over all of the wire segments accessible from the SOURCE/OPIN (stored in src_opin_delay_map) and return the minimum cost (congestion and delay) across them to the sink
+ * @param src_opin_delay_map
+ * @param layer_num
+ * @param delta_x
+ * @param delta_y
+ * @return (delay, congestion)
+ */
 static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::t_reachable_wire_inf>& src_opin_delay_map,
                                                       int layer_num,
                                                       int delta_x,

From 1879400441d674d9924850ce6f238e1d6dcb9b37 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 17:07:41 -0400
Subject: [PATCH 243/257] comment on compute_router_src_opin_lookahead

---
 vpr/src/route/router_lookahead_map_utils.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 8eeae8357b9..0245208fdf7 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -291,6 +291,11 @@ typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_d
 // and the tile's IPIN. If there are many connections to the same IPIN, the one with the minimum delay is selected.
 typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins_delays;
 
+/**
+ * @brief For each tile, iterate over its OPINs and store which segment types are accessible from each OPIN
+ * @param is_flat
+ * @return (segments accessible on the same type, segments accessible on other layer)
+ */
 std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();

From 122ab4dc2cd33aacfc401ee3c021f038cb23feee Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 17:13:48 -0400
Subject: [PATCH 244/257] remove redundant min func

---
 vpr/src/util/vpr_utils.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 22bae088a22..b200a06ba7f 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -2514,9 +2514,13 @@ float get_min_cross_layer_delay(const std::vector<t_arch_switch_inf>& arch_switc
                                 const int wire_to_ipin_arch_sw_id) {
     float min_delay = std::numeric_limits<float>::max();
 
+    // Check whether the inter-layer switch type for connection block is defined. If it is,
+    // get the delay of it.
     if (wire_to_ipin_arch_sw_id != OPEN) {
-        min_delay = std::min(min_delay, arch_switch_inf[wire_to_ipin_arch_sw_id].Tdel());
+        min_delay = arch_switch_inf[wire_to_ipin_arch_sw_id].Tdel();
     }
+
+    // Iterate over inter-layer switch types of segments to find the minimum delay
     for (const auto& seg_inf : segment_inf) {
         int cross_layer_sw_arch_id = seg_inf.arch_opin_between_dice_switch;
         if (cross_layer_sw_arch_id != OPEN) {

From 17074d3daf6e6a1f10477433ee96fc75c1937bad Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 17:18:16 -0400
Subject: [PATCH 245/257] comment on get_min_cross_layer_delay

---
 vpr/src/util/vpr_utils.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index e213a4cb733..75842967cd1 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -311,6 +311,14 @@ t_arch_switch_inf create_internal_arch_sw(float delay);
 
 void add_pb_child_to_list(std::list<const t_pb*>& pb_list, const t_pb* parent_pb);
 
+/**
+ * @brief Iterate over all inter-layer switch types and return the minimum delay of it.
+ * useful four router lookahead to to have some estimate of the cost of crossing a layer
+ * @param arch_switch_inf
+ * @param segment_inf
+ * @param wire_to_ipin_arch_sw_id
+ * @return
+ */
 float get_min_cross_layer_delay(const std::vector<t_arch_switch_inf>& arch_switch_inf,
                                 const std::vector<t_segment_inf>& segment_inf,
                                 const int wire_to_ipin_arch_sw_id);

From f45c4d276878e66c42677d9c34f74737a5b98751 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Sat, 4 Nov 2023 18:05:19 -0400
Subject: [PATCH 246/257] centroid loc is not always valid

---
 vpr/src/place/centroid_move_generator.cpp          | 5 +++--
 vpr/src/place/weighted_centroid_move_generator.cpp | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 309d4f91855..f1316701998 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -37,9 +37,10 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
 
     /* Calculate the centroid location*/
     calculate_centroid_loc(b_from, false, centroid, nullptr);
-    VTR_ASSERT_SAFE(centroid.layer >= 0);
 
-    to.layer = centroid.layer;
+    // Centroid location is not necessarily a valid location, and the downstream location expect a valid
+    // layer for "to" location. So if the layer is not valid, we set it to the same layer as from loc.
+    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
     /* Find a location near the weighted centroid_loc */
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 91ccfa0bc76..d33b6fa2ebe 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -37,9 +37,10 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
 
     /* Calculate the weighted centroid */
     calculate_centroid_loc(b_from, true, centroid, criticalities);
-    VTR_ASSERT_SAFE(centroid.layer >= 0);
 
-    to.layer = centroid.layer;
+    // Centroid location is not necessarily a valid location, and the downstream location expect a valid
+    // layer for "to" location. So if the layer is not valid, we set it to the same layer as from loc.
+    to.layer = (centroid.layer < 0) ? from.layer : centroid.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }

From b2c9e71549201f645db7c89674ea57ad81375d87 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Mon, 6 Nov 2023 08:33:29 -0500
Subject: [PATCH 247/257] make format

---
 vpr/src/base/read_options.cpp                    | 1 -
 vpr/src/place/RL_agent_util.cpp                  | 1 -
 vpr/src/place/median_move_generator.cpp          | 8 ++++----
 vpr/src/place/place.cpp                          | 3 ---
 vpr/src/place/weighted_median_move_generator.cpp | 2 +-
 vpr/src/route/connection_router.cpp              | 1 -
 6 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index eb31f168862..32929c4fc9a 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2061,7 +2061,6 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .choices({"auto_bb", "cube_bb", "per_layer_bb"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-
     place_grp.add_argument<bool, ParseOnOff>(args.RL_agent_placement, "--RL_agent_placement")
         .help(
             "Uses a Reinforcement Learning (RL) agent in choosing the appropiate move type in placement."
diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index 424e6076e5d..c0ee94cc7ce 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -2,7 +2,6 @@
 #include "manual_move_generator.h"
 
 void create_move_generators(std::unique_ptr<MoveGenerator>& move_generator, std::unique_ptr<MoveGenerator>& move_generator2, const t_placer_opts& placer_opts, int move_lim) {
-
     if (placer_opts.RL_agent_placement == false) {
         if (placer_opts.place_algorithm.is_timing_driven()) {
             VTR_LOG("Using static probabilities for choosing each move type\n");
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index c7740865372..d91c3a76974 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -77,7 +77,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
                 union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]);
             }
 
-            const auto& net_bb_coords = cube_bb ? place_move_ctx.bb_coords[net_id]: union_bb;
+            const auto& net_bb_coords = cube_bb ? place_move_ctx.bb_coords[net_id] : union_bb;
             //use the incremental update of the bb
             bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
             pnum = tile_pin_index(pin_id);
@@ -118,7 +118,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
             }
             // If the pin under consideration is of type sink, it shouldn't be added to layer_blk_cnt since the block
             // is moving
-            if(cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK) {
+            if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK) {
                 VTR_ASSERT_SAFE(layer_blk_cnt[from_layer] > 0);
                 layer_blk_cnt[from_layer]--;
             }
@@ -295,8 +295,8 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xo
     /* In this move, we use a 3D bounding box. Thus, if per-layer BB is used by placer, we need to take a union of BBs and use that for the rest of
      * operations in this move
      */
-    const t_bb& curr_bb_edge = cube_bb ? place_move_ctx.bb_num_on_edges[net_id]: union_bb_edge;
-    const t_bb& curr_bb_coord = cube_bb ? place_move_ctx.bb_coords[net_id]: union_bb;
+    const t_bb& curr_bb_edge = cube_bb ? place_move_ctx.bb_num_on_edges[net_id] : union_bb_edge;
+    const t_bb& curr_bb_coord = cube_bb ? place_move_ctx.bb_coords[net_id] : union_bb;
 
     /* Check if I can update the bounding box incrementally. */
 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a52572d312c..4e74ce0f8b5 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -582,7 +582,6 @@ void try_place(const Netlist<>& net_list,
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
 
-
     t_placer_costs costs(placer_opts.place_algorithm);
 
     tatum::TimingPathInfo critical_path;
@@ -1533,7 +1532,6 @@ static void update_move_nets(int num_nets_affected,
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_move_ctx = g_placer_ctx.mutable_move();
 
-
     for (int inet_affected = 0; inet_affected < num_nets_affected;
          inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
@@ -1946,7 +1944,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     }
 
     return cube_bb;
-
 }
 
 /**
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index 285a374945b..b94ef3da60e 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -82,7 +82,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
                 layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
             }
             // If the pin under consideration if of type sink, it is counted in place_move_ctx.num_sink_pin_layer, and we don't want to consider the moving pins
-            if(cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
+            if (cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {
                 VTR_ASSERT(layer_blk_cnt[from.layer] > 0);
                 layer_blk_cnt[from.layer]--;
             }
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 4780f2204f3..b99fb60b650 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -1127,7 +1127,6 @@ static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
                                     RRNodeId from_node,
                                     RRNodeId sink_node,
                                     bool is_inter_layer_opin_connection) {
-
     int sink_layer = rr_graph->node_layer(sink_node);
 
     if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE || !is_inter_layer_opin_connection) {

From 9b1c60d8dfb9cf486d6e72ba975f4c59e2f71403 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 7 Nov 2023 14:16:36 -0500
Subject: [PATCH 248/257] experiment: change std vec to array for num sink on
 each layer per net

---
 vpr/src/place/place.cpp        | 70 ++++++++++++++++++----------------
 vpr/src/place/placer_context.h |  2 +-
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4e74ce0f8b5..6e7400cf96f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -150,7 +150,7 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
 static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
 static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
-static vtr::vector<ClusterNetId, std::vector<int>> ts_layer_sink_pin_count;
+static vtr::vector<ClusterNetId, std::array<int, 5>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
 /* These file-scoped variables keep track of the number of swaps       *
@@ -356,16 +356,16 @@ static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::vector<int>& num_sink_pin_layer);
+                                  std::array<int, 5>& num_sink_pin_layer);
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::vector<int>& num_sink_layer);
+                                        std::array<int, 5>& num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::vector<int>& num_sink_pin_layer_new,
+                      std::array<int, 5>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin);
@@ -373,7 +373,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::vector<int>& bb_pin_sink_count_new,
+                            std::array<int, 5>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin);
@@ -383,7 +383,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::vector<int>& bb_pin_sink_count_new,
+                                        std::array<int, 5>& bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new);
 
@@ -392,21 +392,21 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::vector<int>& bb_pin_sink_count_new,
+                                           std::array<int, 5>& bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new);
 
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::vector<int>& curr_layer_pin_sink_count,
-                                     std::vector<int>& bb_pin_sink_count_new,
+                                     const std::array<int, 5>& curr_layer_pin_sink_count,
+                                     std::array<int, 5>& bb_pin_sink_count_new,
                                      bool is_output_pin);
 
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::vector<int>& bb_layer_pin_sink_count,
+                                  std::array<int, 5>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,
@@ -455,23 +455,23 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::vector<int>& layer_pin_sink_count);
+                                 const std::array<int, 5>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::vector<int>& num_sink_pin_layer);
+                                std::array<int, 5>& num_sink_pin_layer);
 
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::vector<int>& layer_pin_sink_count);
+                                      std::array<int, 5>& layer_pin_sink_count);
 
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::vector<int>& layer_pin_sink_count);
+                                                const std::array<int, 5>& layer_pin_sink_count);
 
 static void free_try_swap_arrays();
 
@@ -2646,7 +2646,9 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
 
-    place_move_ctx.num_sink_pin_layer.resize(num_nets, std::vector<int>(num_layers, 0));
+    std::for_each(place_move_ctx.num_sink_pin_layer.begin(), place_move_ctx.num_sink_pin_layer.end(), [](auto& arr) {
+        std::fill(arr.begin(), arr.end(), 0);
+    });
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
@@ -2720,7 +2722,9 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
-    ts_layer_sink_pin_count.resize(num_nets, std::vector<int>(num_layers, OPEN));
+    std::for_each(ts_layer_sink_pin_count.begin(), ts_layer_sink_pin_count.end(), [](auto& arr) {
+        std::fill(arr.begin(), arr.end(), 0);
+    });
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2746,7 +2750,7 @@ static void free_try_swap_structs() {
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::vector<int>& num_sink_pin_layer) {
+                                std::array<int, 5>& num_sink_pin_layer) {
     int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax;
     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
 
@@ -2845,7 +2849,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::vector<int>& layer_pin_sink_count) {
+                                      std::array<int, 5>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
     std::vector<int> xmin(num_layers, OPEN);
@@ -2988,7 +2992,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::vector<int>& layer_pin_sink_count) {
+                                                const std::array<int, 5>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
@@ -3046,7 +3050,7 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::vector<int>& layer_pin_sink_count) {
+                                 const std::array<int, 5>& layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
@@ -3087,7 +3091,7 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::vector<int>& num_sink_pin_layer) {
+                                  std::array<int, 5>& num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     int xmax, ymax, xmin, ymin, x, y, layer;
@@ -3152,12 +3156,12 @@ static void get_non_updateable_bb(ClusterNetId net_id,
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::vector<int>& num_sink_layer) {
+                                        std::array<int, 5>& num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
-    num_sink_layer = std::vector<int>(num_layers, 0);
+   std::fill(num_sink_layer.begin(), num_sink_layer.end(), 0);
 
     int pnum;
 
@@ -3226,7 +3230,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::vector<int>& num_sink_pin_layer_new,
+                      std::array<int, 5>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin) {
@@ -3245,7 +3249,7 @@ static void update_bb(ClusterNetId net_id,
     //TODO: account for multiple physical pin instances per logical pin
     const t_bb *curr_bb_edge, *curr_bb_coord;
 
-    const std::vector<int>* curr_num_sink_pin_layer;
+    const std::array<int, 5>* curr_num_sink_pin_layer;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -3441,7 +3445,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::vector<int>& bb_pin_sink_count_new,
+                            std::array<int, 5>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin) {
@@ -3459,7 +3463,7 @@ static void update_layer_bb(ClusterNetId net_id,
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
     const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
-    const std::vector<int>* curr_layer_pin_sink_count;
+    const std::array<int, 5>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -3532,7 +3536,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::vector<int>& bb_pin_sink_count_new,
+                                        std::array<int, 5>& bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3644,7 +3648,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::vector<int>& bb_pin_sink_count_new,
+                                           std::array<int, 5>& bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3717,11 +3721,11 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::vector<int>& curr_layer_pin_sink_count,
-                                     std::vector<int>& bb_pin_sink_count_new,
+                                     const std::array<int, 5>& curr_layer_pin_sink_count,
+                                     std::array<int, 5>& bb_pin_sink_count_new,
                                      bool is_output_pin) {
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
-    bb_pin_sink_count_new = curr_layer_pin_sink_count;
+    std::copy(curr_layer_pin_sink_count.begin(), curr_layer_pin_sink_count.end(), bb_pin_sink_count_new.begin());
     if (!is_output_pin) {
         bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
         bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
@@ -3731,7 +3735,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::vector<int>& bb_layer_pin_sink_count,
+                                  std::array<int, 5>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 266a407dab8..e82e331da3b 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -104,7 +104,7 @@ struct PlacerMoveContext : public Context {
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
-    vtr::vector<ClusterNetId, std::vector<int>> num_sink_pin_layer;
+    vtr::vector<ClusterNetId, std::array<int, 5>> num_sink_pin_layer;
 
     // The first range limit calculated by the anneal
     float first_rlim;

From 65fb00782e34fbd6ecf611c792aa49fb0c07fef2 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Tue, 7 Nov 2023 14:55:01 -0500
Subject: [PATCH 249/257] initialize the num_sink ds to number of nets

---
 vpr/src/place/place.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6e7400cf96f..3e303806d43 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2646,6 +2646,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
 
+    place_move_ctx.num_sink_pin_layer.resize(num_nets);
     std::for_each(place_move_ctx.num_sink_pin_layer.begin(), place_move_ctx.num_sink_pin_layer.end(), [](auto& arr) {
         std::fill(arr.begin(), arr.end(), 0);
     });
@@ -2722,6 +2723,7 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
+    ts_layer_sink_pin_count.resize(num_nets);
     std::for_each(ts_layer_sink_pin_count.begin(), ts_layer_sink_pin_count.end(), [](auto& arr) {
         std::fill(arr.begin(), arr.end(), 0);
     });

From aa0b5e98fb665af60b97b8fb826586f649d96bf0 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 08:00:44 -0500
Subject: [PATCH 250/257] initialize num_sink per layer to OPEN instead of zero

---
 vpr/src/place/place.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 3e303806d43..4a6cc4dd7b3 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -2648,7 +2648,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 
     place_move_ctx.num_sink_pin_layer.resize(num_nets);
     std::for_each(place_move_ctx.num_sink_pin_layer.begin(), place_move_ctx.num_sink_pin_layer.end(), [](auto& arr) {
-        std::fill(arr.begin(), arr.end(), 0);
+        std::fill(arr.begin(), arr.end(), OPEN);
     });
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
@@ -2725,7 +2725,7 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
     }
     ts_layer_sink_pin_count.resize(num_nets);
     std::for_each(ts_layer_sink_pin_count.begin(), ts_layer_sink_pin_count.end(), [](auto& arr) {
-        std::fill(arr.begin(), arr.end(), 0);
+        std::fill(arr.begin(), arr.end(), OPEN);
     });
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 

From d18fc4f77aae0b8e1b8445ceb50fb6b2e4515b38 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 08:14:03 -0500
Subject: [PATCH 251/257] define max_num_layers and change place.cpp to use
 that

---
 vpr/src/base/vpr_types.h |  8 ++++++
 vpr/src/place/place.cpp  | 58 ++++++++++++++++++++--------------------
 2 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 4912addd2eb..2784c5e63da 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -65,6 +65,14 @@
 
 //#define VERBOSE //Prints additional intermediate data
 
+/*
+ * We need to define the maximum number of layers to address a specific issue.
+ * For certain data structures, such as `num_sink_pin_layer` in the placer context, dynamically allocating
+ * memory based on the number of layers can lead to a performance hit due to additional pointer chasing and
+ * cache locality concerns. Defining a constant variable helps optimize the memory allocation process.
+ */
+constexpr int MAX_NUM_LAYERS = 2;
+
 /**
  * @brief For update_screen. Denotes importance of update.
  *
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 4a6cc4dd7b3..dd34b5d20d2 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -150,7 +150,7 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
 static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
 static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
-static vtr::vector<ClusterNetId, std::array<int, 5>> ts_layer_sink_pin_count;
+static vtr::vector<ClusterNetId, std::array<int, MAX_NUM_LAYERS>> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
 /* These file-scoped variables keep track of the number of swaps       *
@@ -356,16 +356,16 @@ static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::array<int, 5>& num_sink_pin_layer);
+                                  std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer);
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::array<int, 5>& num_sink_layer);
+                                        std::array<int, MAX_NUM_LAYERS>& num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::array<int, 5>& num_sink_pin_layer_new,
+                      std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin);
@@ -373,7 +373,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::array<int, 5>& bb_pin_sink_count_new,
+                            std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin);
@@ -383,7 +383,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::array<int, 5>& bb_pin_sink_count_new,
+                                        std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new);
 
@@ -392,21 +392,21 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::array<int, 5>& bb_pin_sink_count_new,
+                                           std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new);
 
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::array<int, 5>& curr_layer_pin_sink_count,
-                                     std::array<int, 5>& bb_pin_sink_count_new,
+                                     const std::array<int, MAX_NUM_LAYERS>& curr_layer_pin_sink_count,
+                                     std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                      bool is_output_pin);
 
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::array<int, 5>& bb_layer_pin_sink_count,
+                                  std::array<int, MAX_NUM_LAYERS>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,
@@ -455,23 +455,23 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::array<int, 5>& layer_pin_sink_count);
+                                 const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::array<int, 5>& num_sink_pin_layer);
+                                std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer);
 
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::array<int, 5>& layer_pin_sink_count);
+                                      std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
 
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::array<int, 5>& layer_pin_sink_count);
+                                                const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
 
 static void free_try_swap_arrays();
 
@@ -2752,7 +2752,7 @@ static void free_try_swap_structs() {
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::array<int, 5>& num_sink_pin_layer) {
+                                std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer) {
     int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax;
     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
 
@@ -2851,7 +2851,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::array<int, 5>& layer_pin_sink_count) {
+                                      std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
     std::vector<int> xmin(num_layers, OPEN);
@@ -2994,7 +2994,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::array<int, 5>& layer_pin_sink_count) {
+                                                const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
@@ -3052,7 +3052,7 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::array<int, 5>& layer_pin_sink_count) {
+                                 const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
@@ -3093,7 +3093,7 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::array<int, 5>& num_sink_pin_layer) {
+                                  std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     int xmax, ymax, xmin, ymin, x, y, layer;
@@ -3158,7 +3158,7 @@ static void get_non_updateable_bb(ClusterNetId net_id,
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::array<int, 5>& num_sink_layer) {
+                                        std::array<int, MAX_NUM_LAYERS>& num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -3232,7 +3232,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::array<int, 5>& num_sink_pin_layer_new,
+                      std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin) {
@@ -3251,7 +3251,7 @@ static void update_bb(ClusterNetId net_id,
     //TODO: account for multiple physical pin instances per logical pin
     const t_bb *curr_bb_edge, *curr_bb_coord;
 
-    const std::array<int, 5>* curr_num_sink_pin_layer;
+    const std::array<int, MAX_NUM_LAYERS>* curr_num_sink_pin_layer;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -3447,7 +3447,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::array<int, 5>& bb_pin_sink_count_new,
+                            std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin) {
@@ -3465,7 +3465,7 @@ static void update_layer_bb(ClusterNetId net_id,
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
     const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
-    const std::array<int, 5>* curr_layer_pin_sink_count;
+    const std::array<int, MAX_NUM_LAYERS>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -3538,7 +3538,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::array<int, 5>& bb_pin_sink_count_new,
+                                        std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3650,7 +3650,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::array<int, 5>& bb_pin_sink_count_new,
+                                           std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3723,8 +3723,8 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::array<int, 5>& curr_layer_pin_sink_count,
-                                     std::array<int, 5>& bb_pin_sink_count_new,
+                                     const std::array<int, MAX_NUM_LAYERS>& curr_layer_pin_sink_count,
+                                     std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
                                      bool is_output_pin) {
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
     std::copy(curr_layer_pin_sink_count.begin(), curr_layer_pin_sink_count.end(), bb_pin_sink_count_new.begin());
@@ -3737,7 +3737,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::array<int, 5>& bb_layer_pin_sink_count,
+                                  std::array<int, MAX_NUM_LAYERS>& bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,

From c5178e46837a059367fb77c620e2ac5f53f4f6e6 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 08:14:35 -0500
Subject: [PATCH 252/257] change placer context to use max_num_layer const

---
 vpr/src/place/placer_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index e82e331da3b..68ff6b5b183 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -104,7 +104,7 @@ struct PlacerMoveContext : public Context {
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
-    vtr::vector<ClusterNetId, std::array<int, 5>> num_sink_pin_layer;
+    vtr::vector<ClusterNetId, std::array<int, MAX_NUM_LAYERS>> num_sink_pin_layer;
 
     // The first range limit calculated by the anneal
     float first_rlim;

From 114012ad9590f33672f6d48bbad256dfa876aaef Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 08:24:09 -0500
Subject: [PATCH 253/257] add an assertion to check the maximum number of
 layers

---
 vpr/src/base/vpr_api.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 9f379f84e42..8d5764fc9e4 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -452,6 +452,8 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
     float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
     device_ctx.grid = create_device_grid(vpr_setup.device_layout, Arch.grid_layouts, num_type_instances, target_device_utilization);
 
+    VTR_ASSERT_MSG(device_ctx.grid.get_num_layers() < MAX_NUM_LAYERS, "Number of layers should be less than MAX_NUM_LAYERS. If you need more layers, please increase the value of MAX_NUM_LAYERS in vpr_types.h");
+
     /*
      *Report on the device
      */

From eb71943a3ad6c4c242902c354838918920a2e2c2 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 08:37:17 -0500
Subject: [PATCH 254/257] fix a typo in number of layers

---
 vpr/src/base/vpr_api.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 8d5764fc9e4..65519d5775f 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -452,7 +452,7 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
     float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
     device_ctx.grid = create_device_grid(vpr_setup.device_layout, Arch.grid_layouts, num_type_instances, target_device_utilization);
 
-    VTR_ASSERT_MSG(device_ctx.grid.get_num_layers() < MAX_NUM_LAYERS, "Number of layers should be less than MAX_NUM_LAYERS. If you need more layers, please increase the value of MAX_NUM_LAYERS in vpr_types.h");
+    VTR_ASSERT_MSG(device_ctx.grid.get_num_layers() <= MAX_NUM_LAYERS, "Number of layers should be less than MAX_NUM_LAYERS. If you need more layers, please increase the value of MAX_NUM_LAYERS in vpr_types.h");
 
     /*
      *Report on the device

From cce3bc60fc25848d456b76fd0e27735f9a438205 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Thu, 9 Nov 2023 18:22:46 -0500
Subject: [PATCH 255/257] change vector of arrays to 2D matrix

---
 vpr/src/place/median_move_generator.cpp       |   2 +-
 vpr/src/place/place.cpp                       | 155 ++++++++++--------
 vpr/src/place/placer_context.h                |   2 +-
 .../place/weighted_median_move_generator.cpp  |   2 +-
 4 files changed, 92 insertions(+), 69 deletions(-)

diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index d91c3a76974..324d0cd3e44 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -114,7 +114,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
         place_move_ctx.Y_coord.push_back(coords.ymax);
         if (is_multi_layer) {
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
+                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num];
             }
             // If the pin under consideration is of type sink, it shouldn't be added to layer_blk_cnt since the block
             // is moving
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index dd34b5d20d2..becaf426b5e 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -150,7 +150,7 @@ static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.g
 /* [0...cluster_ctx.clb_nlist.nets().size()-1] */
 static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
 static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
-static vtr::vector<ClusterNetId, std::array<int, MAX_NUM_LAYERS>> ts_layer_sink_pin_count;
+static vtr::Matrix<int> ts_layer_sink_pin_count;
 static std::vector<ClusterNetId> ts_nets_to_update;
 
 /* These file-scoped variables keep track of the number of swaps       *
@@ -356,16 +356,16 @@ static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer);
+                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::array<int, MAX_NUM_LAYERS>& num_sink_layer);
+                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
 
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer_new,
+                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin);
@@ -373,7 +373,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin);
@@ -383,7 +383,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new);
 
@@ -392,21 +392,21 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new);
 
 static void update_bb_pin_sink_count(ClusterNetId net_id,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::array<int, MAX_NUM_LAYERS>& curr_layer_pin_sink_count,
-                                     std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
+                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                      bool is_output_pin);
 
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::array<int, MAX_NUM_LAYERS>& bb_layer_pin_sink_count,
+                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,
@@ -455,23 +455,23 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
+                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer);
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
 
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
+                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
 static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count);
+                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
 
 static void free_try_swap_arrays();
 
@@ -1541,7 +1541,11 @@ static void update_move_nets(int num_nets_affected,
         } else {
             place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
         }
-        place_move_ctx.num_sink_pin_layer[net_id] = ts_layer_sink_pin_count[net_id];
+
+        for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
+            place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] =
+                ts_layer_sink_pin_count[size_t(net_id)][layer_num];
+        }
 
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
             if (cube_bb) {
@@ -2028,7 +2032,7 @@ static int find_affected_nets_and_update_costs(
         } else {
             proposed_net_cost[net_id] = get_net_layer_cost(net_id,
                                                            layer_ts_bb_coord_new[net_id],
-                                                           ts_layer_sink_pin_count[net_id]);
+                                                           ts_layer_sink_pin_count[size_t(net_id)]);
         }
 
         bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
@@ -2070,7 +2074,7 @@ static void update_net_bb(const ClusterNetId net,
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
             get_non_updateable_bb(net,
                                   ts_bb_coord_new[net],
-                                  ts_layer_sink_pin_count[net]);
+                                  ts_layer_sink_pin_count[size_t(net)]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -2093,7 +2097,7 @@ static void update_net_bb(const ClusterNetId net,
         update_bb(net,
                   ts_bb_edge_new[net],
                   ts_bb_coord_new[net],
-                  ts_layer_sink_pin_count[net],
+                  ts_layer_sink_pin_count[size_t(net)],
                   pin_old_loc,
                   pin_new_loc,
                   src_pin);
@@ -2113,7 +2117,7 @@ static void update_net_layer_bb(const ClusterNetId net,
         if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
             get_non_updateable_layer_bb(net,
                                         layer_ts_bb_coord_new[net],
-                                        ts_layer_sink_pin_count[net]);
+                                        ts_layer_sink_pin_count[size_t(net)]);
         }
     } else {
         //For large nets, update bounding box incrementally
@@ -2136,7 +2140,7 @@ static void update_net_layer_bb(const ClusterNetId net,
         update_layer_bb(net,
                         layer_ts_bb_edge_new[net],
                         layer_ts_bb_coord_new[net],
-                        ts_layer_sink_pin_count[net],
+                        ts_layer_sink_pin_count[size_t(net)],
                         pin_old_loc,
                         pin_new_loc,
                         pin_dir == e_pin_type::DRIVER);
@@ -2507,11 +2511,11 @@ static double comp_bb_cost(e_cost_methods method) {
                 get_bb_from_scratch(net_id,
                                     place_move_ctx.bb_coords[net_id],
                                     place_move_ctx.bb_num_on_edges[net_id],
-                                    place_move_ctx.num_sink_pin_layer[net_id]);
+                                    place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             } else {
                 get_non_updateable_bb(net_id,
                                       place_move_ctx.bb_coords[net_id],
-                                      place_move_ctx.num_sink_pin_layer[net_id]);
+                                      place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
             net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
@@ -2544,21 +2548,21 @@ static double comp_layer_bb_cost(e_cost_methods method) {
                 get_layer_bb_from_scratch(net_id,
                                           place_move_ctx.layer_bb_num_on_edges[net_id],
                                           place_move_ctx.layer_bb_coords[net_id],
-                                          place_move_ctx.num_sink_pin_layer[net_id]);
+                                          place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             } else {
                 get_non_updateable_layer_bb(net_id,
                                             place_move_ctx.layer_bb_coords[net_id],
-                                            place_move_ctx.num_sink_pin_layer[net_id]);
+                                            place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             }
 
             net_cost[net_id] = get_net_layer_cost(net_id,
                                                   place_move_ctx.layer_bb_coords[net_id],
-                                                  place_move_ctx.num_sink_pin_layer[net_id]);
+                                                  place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
             cost += net_cost[net_id];
             if (method == CHECK)
                 expected_wirelength += get_net_layer_wirelength_estimate(net_id,
                                                                          place_move_ctx.layer_bb_coords[net_id],
-                                                                         place_move_ctx.num_sink_pin_layer[net_id]);
+                                                                         place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
         }
     }
 
@@ -2646,10 +2650,11 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
 
-    place_move_ctx.num_sink_pin_layer.resize(num_nets);
-    std::for_each(place_move_ctx.num_sink_pin_layer.begin(), place_move_ctx.num_sink_pin_layer.end(), [](auto& arr) {
-        std::fill(arr.begin(), arr.end(), OPEN);
-    });
+    place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)});
+    for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) {
+        auto& elem = ts_layer_sink_pin_count.get(flat_idx);
+        elem = OPEN;
+    }
 
     /* Used to store costs for moves not yet made and to indicate when a net's   *
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
@@ -2693,7 +2698,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
     vtr::release_memory(place_move_ctx.layer_bb_num_on_edges);
     vtr::release_memory(place_move_ctx.layer_bb_coords);
 
-    vtr::release_memory(place_move_ctx.num_sink_pin_layer);
+    place_move_ctx.num_sink_pin_layer.clear();
 
     vtr::release_memory(bb_updated_before);
 
@@ -2723,10 +2728,13 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
         layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
     }
-    ts_layer_sink_pin_count.resize(num_nets);
-    std::for_each(ts_layer_sink_pin_count.begin(), ts_layer_sink_pin_count.end(), [](auto& arr) {
-        std::fill(arr.begin(), arr.end(), OPEN);
-    });
+
+    ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)});
+    for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) {
+        auto& elem = ts_layer_sink_pin_count.get(flat_idx);
+        elem = OPEN;
+    }
+
     ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2738,7 +2746,7 @@ static void free_try_swap_structs() {
     vtr::release_memory(ts_bb_coord_new);
     vtr::release_memory(layer_ts_bb_edge_new);
     vtr::release_memory(layer_ts_bb_coord_new);
-    vtr::release_memory(ts_layer_sink_pin_count);
+    ts_layer_sink_pin_count.clear();
     vtr::release_memory(ts_nets_to_update);
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -2752,7 +2760,7 @@ static void free_try_swap_structs() {
 static void get_bb_from_scratch(ClusterNetId net_id,
                                 t_bb& coords,
                                 t_bb& num_on_edges,
-                                std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer) {
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
     int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax;
     int xmin_edge, xmax_edge, ymin_edge, ymax_edge;
 
@@ -2781,7 +2789,9 @@ static void get_bb_from_scratch(ClusterNetId net_id,
     xmax_edge = 1;
     ymax_edge = 1;
 
-    std::fill(num_sink_pin_layer.begin(), num_sink_pin_layer.end(), 0);
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
+        num_sink_pin_layer[layer_num] = 0;
+    }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
@@ -2851,7 +2861,7 @@ static void get_bb_from_scratch(ClusterNetId net_id,
 static void get_layer_bb_from_scratch(ClusterNetId net_id,
                                       std::vector<t_2D_bb>& num_on_edges,
                                       std::vector<t_2D_bb>& coords,
-                                      std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
+                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     auto& device_ctx = g_vpr_ctx.device();
     const int num_layers = device_ctx.grid.get_num_layers();
     std::vector<int> xmin(num_layers, OPEN);
@@ -2994,7 +3004,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr
 
 static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
                                                 const std::vector<t_2D_bb>& bbptr,
-                                                const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
+                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* WMF: Finds the estimate of wirelength due to one net by looking at   *
      * its coordinate bounding box.                                         */
 
@@ -3052,7 +3062,7 @@ static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
 
 static double get_net_layer_cost(ClusterNetId /* net_id */,
                                  const std::vector<t_2D_bb>& bbptr,
-                                 const std::array<int, MAX_NUM_LAYERS>& layer_pin_sink_count) {
+                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
     /* Finds the cost due to one net by looking at its coordinate bounding  *
      * box.                                                                 */
 
@@ -3093,7 +3103,7 @@ static double get_net_layer_cost(ClusterNetId /* net_id */,
  * the pins always lie on the outside of the bounding box.            */
 static void get_non_updateable_bb(ClusterNetId net_id,
                                   t_bb& bb_coord_new,
-                                  std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer) {
+                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     int xmax, ymax, xmin, ymin, x, y, layer;
@@ -3116,7 +3126,9 @@ static void get_non_updateable_bb(ClusterNetId net_id,
     xmax = x;
     ymax = y;
 
-    std::fill(num_sink_pin_layer.begin(), num_sink_pin_layer.end(), 0);
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        num_sink_pin_layer[layer_num] = 0;
+    }
 
     for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
         bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
@@ -3158,12 +3170,14 @@ static void get_non_updateable_bb(ClusterNetId net_id,
 
 static void get_non_updateable_layer_bb(ClusterNetId net_id,
                                         std::vector<t_2D_bb>& bb_coord_new,
-                                        std::array<int, MAX_NUM_LAYERS>& num_sink_layer) {
+                                        vtr::NdMatrixProxy<int, 1> num_sink_layer) {
     //TODO: account for multiple physical pin instances per logical pin
 
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
-   std::fill(num_sink_layer.begin(), num_sink_layer.end(), 0);
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        num_sink_layer[layer_num] = 0;
+    }
 
     int pnum;
 
@@ -3232,7 +3246,7 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
 static void update_bb(ClusterNetId net_id,
                       t_bb& bb_edge_new,
                       t_bb& bb_coord_new,
-                      std::array<int, MAX_NUM_LAYERS>& num_sink_pin_layer_new,
+                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
                       t_physical_tile_loc pin_old_loc,
                       t_physical_tile_loc pin_new_loc,
                       bool src_pin) {
@@ -3251,8 +3265,6 @@ static void update_bb(ClusterNetId net_id,
     //TODO: account for multiple physical pin instances per logical pin
     const t_bb *curr_bb_edge, *curr_bb_coord;
 
-    const std::array<int, MAX_NUM_LAYERS>* curr_num_sink_pin_layer;
-
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
 
@@ -3267,17 +3279,21 @@ static void update_bb(ClusterNetId net_id,
     if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
         /* The net had been updated from scratch, DO NOT update again! */
         return;
-    } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+    }
+
+    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ?
+                                                                                                        place_move_ctx.num_sink_pin_layer[size_t(net_id)] :
+                                                                                                        num_sink_pin_layer_new;
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
         curr_bb_coord = &place_move_ctx.bb_coords[net_id];
-        curr_num_sink_pin_layer = &place_move_ctx.num_sink_pin_layer[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
         curr_bb_coord = &bb_coord_new;
         curr_bb_edge = &bb_edge_new;
-        curr_num_sink_pin_layer = &num_sink_pin_layer_new;
     }
 
     /* Check if I can update the bounding box incrementally. */
@@ -3429,12 +3445,14 @@ static void update_bb(ClusterNetId net_id,
     /* Now account for the layer motion. */
     if (num_layers > 1) {
         /* We need to update it only if multiple layers are available */
-        num_sink_pin_layer_new = (*curr_num_sink_pin_layer);
+        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+            num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num];
+        }
         if (!src_pin) {
             /* if src pin is being moved, we don't need to update this data structure */
             if (pin_old_loc.layer_num != pin_new_loc.layer_num) {
-                num_sink_pin_layer_new[pin_old_loc.layer_num] = (*curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1;
-                num_sink_pin_layer_new[pin_new_loc.layer_num] = (*curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
+                num_sink_pin_layer_new[pin_old_loc.layer_num] = (curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1;
+                num_sink_pin_layer_new[pin_new_loc.layer_num] = (curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
             }
         }
     }
@@ -3447,7 +3465,7 @@ static void update_bb(ClusterNetId net_id,
 static void update_layer_bb(ClusterNetId net_id,
                             std::vector<t_2D_bb>& bb_edge_new,
                             std::vector<t_2D_bb>& bb_coord_new,
-                            std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                             t_physical_tile_loc pin_old_loc,
                             t_physical_tile_loc pin_new_loc,
                             bool is_output_pin) {
@@ -3465,7 +3483,6 @@ static void update_layer_bb(ClusterNetId net_id,
     /* IO blocks are considered to be one cell in for simplicity.         */
     //TODO: account for multiple physical pin instances per logical pin
     const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
-    const std::array<int, MAX_NUM_LAYERS>* curr_layer_pin_sink_count;
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_move_ctx = g_placer_ctx.move();
@@ -3479,17 +3496,21 @@ static void update_layer_bb(ClusterNetId net_id,
     if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
         /* The net had been updated from scratch, DO NOT update again! */
         return;
-    } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
+    }
+
+    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ?
+                                                                                                                place_move_ctx.num_sink_pin_layer[size_t(net_id)] :
+                                                                                                                bb_pin_sink_count_new;
+
+    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
         curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id];
-        curr_layer_pin_sink_count = &place_move_ctx.num_sink_pin_layer[net_id];
         bb_updated_before[net_id] = UPDATED_ONCE;
     } else {
         /* The net had been updated before, must use the new values */
         curr_bb_edge = &bb_edge_new;
         curr_bb_coord = &bb_coord_new;
-        curr_layer_pin_sink_count = &bb_pin_sink_count_new;
     }
 
     /* Check if I can update the bounding box incrementally. */
@@ -3497,7 +3518,7 @@ static void update_layer_bb(ClusterNetId net_id,
     update_bb_pin_sink_count(net_id,
                              pin_old_loc,
                              pin_new_loc,
-                             *curr_layer_pin_sink_count,
+                             curr_layer_pin_sink_count,
                              bb_pin_sink_count_new,
                              is_output_pin);
 
@@ -3538,7 +3559,7 @@ static inline void update_bb_same_layer(ClusterNetId net_id,
                                         const t_physical_tile_loc& pin_new_loc,
                                         const std::vector<t_2D_bb>& curr_bb_edge,
                                         const std::vector<t_2D_bb>& curr_bb_coord,
-                                        std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                         std::vector<t_2D_bb>& bb_edge_new,
                                         std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3650,7 +3671,7 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
                                            const t_physical_tile_loc& pin_new_loc,
                                            const std::vector<t_2D_bb>& curr_bb_edge,
                                            const std::vector<t_2D_bb>& curr_bb_coord,
-                                           std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                            std::vector<t_2D_bb>& bb_edge_new,
                                            std::vector<t_2D_bb>& bb_coord_new) {
     int x_old = pin_old_loc.x;
@@ -3723,11 +3744,13 @@ static inline void update_bb_layer_changed(ClusterNetId net_id,
 static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
                                      const t_physical_tile_loc& pin_old_loc,
                                      const t_physical_tile_loc& pin_new_loc,
-                                     const std::array<int, MAX_NUM_LAYERS>& curr_layer_pin_sink_count,
-                                     std::array<int, MAX_NUM_LAYERS>& bb_pin_sink_count_new,
+                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
+                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
                                      bool is_output_pin) {
     VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
-    std::copy(curr_layer_pin_sink_count.begin(), curr_layer_pin_sink_count.end(), bb_pin_sink_count_new.begin());
+    for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
+        bb_pin_sink_count_new[layer_num] = curr_layer_pin_sink_count[layer_num];
+    }
     if (!is_output_pin) {
         bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
         bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
@@ -3737,7 +3760,7 @@ static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
 static inline void update_bb_edge(ClusterNetId net_id,
                                   std::vector<t_2D_bb>& bb_edge_new,
                                   std::vector<t_2D_bb>& bb_coord_new,
-                                  std::array<int, MAX_NUM_LAYERS>& bb_layer_pin_sink_count,
+                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
                                   const int& old_num_block_on_edge,
                                   const int& old_edge_coord,
                                   int& new_num_block_on_edge,
diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h
index 68ff6b5b183..f5e56bbf37f 100644
--- a/vpr/src/place/placer_context.h
+++ b/vpr/src/place/placer_context.h
@@ -104,7 +104,7 @@ struct PlacerMoveContext : public Context {
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords;
 
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
-    vtr::vector<ClusterNetId, std::array<int, MAX_NUM_LAYERS>> num_sink_pin_layer;
+    vtr::Matrix<int> num_sink_pin_layer;
 
     // The first range limit calculated by the anneal
     float first_rlim;
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index b94ef3da60e..2d343cd3347 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -79,7 +79,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         // If multile layers are available, I need to keep track of how many sinks are in each layer.
         if (is_multi_layer) {
             for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[net_id][layer_num];
+                layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num];
             }
             // If the pin under consideration if of type sink, it is counted in place_move_ctx.num_sink_pin_layer, and we don't want to consider the moving pins
             if (cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) {

From afbadb35722612ffe53c6001f4f117840a85546c Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 09:35:08 -0500
Subject: [PATCH 256/257] don't assign open to coords if there is no sink on
 that layer

---
 vpr/src/place/place.cpp | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index becaf426b5e..5d6e8ee20c1 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -3229,17 +3229,10 @@ static void get_non_updateable_layer_bb(ClusterNetId net_id,
      * is 0).  See route_common.cpp for a channel diagram.               */
     for (int layer_num = 0; layer_num < num_layers; layer_num++) {
         bb_coord_new[layer_num].layer_num = layer_num;
-        if (num_sink_layer[layer_num] == 0) {
-            bb_coord_new[layer_num].xmin = OPEN;
-            bb_coord_new[layer_num].ymin = OPEN;
-            bb_coord_new[layer_num].xmax = OPEN;
-            bb_coord_new[layer_num].ymax = OPEN;
-        } else {
-            bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-            bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-            bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-            bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-        }
+        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
     }
 }
 

From 48a1b423bda1d66ed0d89ace349381d7852d1287 Mon Sep 17 00:00:00 2001
From: amin1377 <amin1377.mohaghegh@gmail.com>
Date: Fri, 10 Nov 2023 09:37:47 -0500
Subject: [PATCH 257/257] make format

---
 vpr/src/place/place.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 5d6e8ee20c1..51dfce9ee32 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1543,8 +1543,7 @@ static void update_move_nets(int num_nets_affected,
         }
 
         for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
-            place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] =
-                ts_layer_sink_pin_count[size_t(net_id)][layer_num];
+            place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] = ts_layer_sink_pin_count[size_t(net_id)][layer_num];
         }
 
         if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
@@ -3274,9 +3273,7 @@ static void update_bb(ClusterNetId net_id,
         return;
     }
 
-    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ?
-                                                                                                        place_move_ctx.num_sink_pin_layer[size_t(net_id)] :
-                                                                                                        num_sink_pin_layer_new;
+    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
 
     if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
@@ -3491,9 +3488,7 @@ static void update_layer_bb(ClusterNetId net_id,
         return;
     }
 
-    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ?
-                                                                                                                place_move_ctx.num_sink_pin_layer[size_t(net_id)] :
-                                                                                                                bb_pin_sink_count_new;
+    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
 
     if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */