diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 422fb107535..4845ff3aca8 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -924,10 +924,10 @@ struct t_logical_block_type {
     std::vector<t_physical_tile_type_ptr> equivalent_tiles; ///>List of physical tiles at which one could
                                                             ///>place this type of netlist block.
 
-    std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                   /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
-    std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping; /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
-    std::vector<t_class> primitive_logical_class_inf;                                             /* primitive_logical_class_inf[class_logical_number] -> class */
-    std::unordered_map<const t_pb_graph_node*, t_class_range> pb_graph_node_class_range;
+    std::unordered_map<int, t_pb_graph_pin*> pin_logical_num_to_pb_pin_mapping;                    /* pin_logical_num_to_pb_pin_mapping[pin logical number] -> pb_graph_pin ptr} */
+    std::unordered_map<const t_pb_graph_pin*, int> primitive_pb_pin_to_logical_class_num_mapping;  /* primitive_pb_pin_to_logical_class_num_mapping[pb_graph_pin ptr] -> class logical number */
+    std::vector<t_class> primitive_logical_class_inf;                                              /* primitive_logical_class_inf[class_logical_number] -> class */
+    std::unordered_map<const t_pb_graph_node*, t_class_range> primitive_pb_graph_node_class_range; /* primitive_pb_graph_node_class_range[primitive_pb_graph_node ptr] -> class range for that primitive*/
 
     // Is this t_logical_block_type empty?
     bool is_empty() const;
@@ -1239,6 +1239,12 @@ class t_pb_graph_node {
 
     int placement_index;
 
+    /*
+     * There is a root-level pb_graph_node assigned to each logical type. Each logical type can contain multiple primitives.
+     * If this pb_graph_node is associated with a primitive, a unique number is assigned to it within the logical block level.
+     */
+    int primitive_num = OPEN;
+
     /* Contains a collection of mode indices that cannot be used as they produce conflicts during VPR packing stage
      *
      * Illegal modes do arise when children of a graph_node do have inconsistent `edge_modes` with respect to
diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index f6a7732ca8a..43a0fbc54da 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -965,7 +965,7 @@ t_class_range get_pb_graph_node_class_physical_range(t_physical_tile_type_ptr /*
                                                      const t_pb_graph_node* pb_graph_node) {
     VTR_ASSERT(pb_graph_node->is_primitive());
 
-    t_class_range class_range = logical_block->pb_graph_node_class_range.at(pb_graph_node);
+    t_class_range class_range = logical_block->primitive_pb_graph_node_class_range.at(pb_graph_node);
     int logical_block_class_offset = sub_tile->primitive_class_range[sub_tile_relative_cap].at(logical_block).low;
 
     class_range.low += logical_block_class_offset;
diff --git a/libs/libvtrutil/src/vtr_vec_id_set.h b/libs/libvtrutil/src/vtr_vec_id_set.h
index 7207225932c..10dc10e0f0a 100644
--- a/libs/libvtrutil/src/vtr_vec_id_set.h
+++ b/libs/libvtrutil/src/vtr_vec_id_set.h
@@ -2,6 +2,7 @@
 #define VTR_SET_H
 
 #include <vector>
+#include <algorithm>
 
 namespace vtr {
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 162bc53692b..7e99e5444b3 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1919,4 +1919,10 @@ void free_pack_molecules(t_pack_molecule* list_of_pack_molecules);
  */
 void free_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats);
 
+struct pair_hash {
+    std::size_t operator()(const std::pair<ClusterBlockId, ClusterBlockId>& p) const noexcept {
+        return std::hash<ClusterBlockId>()(p.first) ^ (std::hash<ClusterBlockId>()(p.second) << 1);
+    }
+};
+
 #endif
diff --git a/vpr/src/pack/pb_type_graph.cpp b/vpr/src/pack/pb_type_graph.cpp
index c1cbed18430..3b7c272cfc4 100644
--- a/vpr/src/pack/pb_type_graph.cpp
+++ b/vpr/src/pack/pb_type_graph.cpp
@@ -50,7 +50,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                     const int index,
                                     const int flat_index,
                                     bool load_power_structures,
-                                    int& pin_count_in_cluster);
+                                    int& pin_count_in_cluster,
+                                    int& primitive_num);
 
 static void alloc_and_load_pb_graph_pin_sinks(t_pb_graph_node* pb_graph_node);
 
@@ -153,13 +154,15 @@ void alloc_and_load_all_pb_graphs(bool load_power_structures, bool is_flat) {
         if (type.pb_type) {
             type.pb_graph_head = new t_pb_graph_node();
             int pin_count_in_cluster = 0;
+            int primitive_num = 0;
             alloc_and_load_pb_graph(type.pb_graph_head,
                                     nullptr,
                                     type.pb_type,
                                     0,
                                     0,
                                     load_power_structures,
-                                    pin_count_in_cluster);
+                                    pin_count_in_cluster,
+                                    primitive_num);
             type.pb_graph_head->total_pb_pins = pin_count_in_cluster;
             load_pin_classes_in_pb_graph_head(type.pb_graph_head);
             if (is_flat) {
@@ -233,7 +236,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                     const int index,
                                     const int flat_index,
                                     bool load_power_structures,
-                                    int& pin_count_in_cluster) {
+                                    int& pin_count_in_cluster,
+                                    int& primitive_num) {
     int i, j, k, i_input, i_output, i_clockport;
 
     pb_graph_node->placement_index = index;
@@ -350,6 +354,11 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
         pb_graph_node->pb_node_power->transistor_cnt_pb_children = 0.;
     }
 
+    if (pb_graph_node->is_primitive()) {
+        pb_graph_node->primitive_num = primitive_num;
+        primitive_num++;
+    }
+
     /* Allocate and load child nodes for each mode and create interconnect in each mode */
 
     pb_graph_node->child_pb_graph_nodes = (t_pb_graph_node***)vtr::calloc(pb_type->num_modes, sizeof(t_pb_graph_node**));
@@ -368,7 +377,8 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                         k,
                                         child_flat_index,
                                         load_power_structures,
-                                        pin_count_in_cluster);
+                                        pin_count_in_cluster,
+                                        primitive_num);
             }
         }
     }
@@ -384,6 +394,7 @@ static void alloc_and_load_pb_graph(t_pb_graph_node* pb_graph_node,
                                          load_power_structures);
     }
 
+
     // update the total number of primitives of that type
     if (pb_graph_node->is_primitive()) {
         int total_count = 1;
@@ -549,8 +560,8 @@ static void add_primitive_logical_classes(t_logical_block_type* logical_block) {
             }
             num_added_classes += add_port_logical_classes(logical_block, pb_graph_pins, num_ports, num_pins);
         }
-        logical_block->pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
-                                                                                                    first_class_num + num_added_classes - 1)));
+        logical_block->primitive_pb_graph_node_class_range.insert(std::make_pair(pb_graph_node, t_class_range(first_class_num,
+                                                                                                              first_class_num + num_added_classes - 1)));
     }
 }
 
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index f4df575453c..a1b48d0e083 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -200,9 +200,9 @@ bool swap_two_molecules(t_pack_molecule* molecule_1,
     }
 
     t_pb* clb_pb_1 = cluster_ctx.clb_nlist.block_pb(clb_1);
-    std::string clb_pb_1_name = (std::string)clb_pb_1->name;
+    std::string clb_pb_1_name = static_cast<std::string>(clb_pb_1->name);
     t_pb* clb_pb_2 = cluster_ctx.clb_nlist.block_pb(clb_2);
-    std::string clb_pb_2_name = (std::string)clb_pb_2->name;
+    std::string clb_pb_2_name = static_cast<std::string>(clb_pb_2->name);
 
     //remove the molecule from its current cluster
     remove_mol_from_cluster(molecule_1, molecule_1_size, clb_1, clb_1_atoms, false, old_1_router_data);
diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp
index 287dabbe78c..1e23ec468b8 100644
--- a/vpr/src/pack/re_cluster_util.cpp
+++ b/vpr/src/pack/re_cluster_util.cpp
@@ -1,5 +1,4 @@
 #include "re_cluster_util.h"
-
 #include "clustered_netlist_utils.h"
 #include "cluster_util.h"
 #include "cluster_router.h"
@@ -8,7 +7,6 @@
 #include "initial_placement.h"
 #include "read_netlist.h"
 
-
 // The name suffix of the new block (if exists)
 // This suffix is useful in preventing duplicate high-level cluster block names
 const char* name_suffix = "_m";
diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index c433b63303a..f36dd4d5e39 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -128,6 +128,7 @@ void clear_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) {
 
     //For run-time, we just reset num_moved_blocks to zero, but do not free the blocks_affected
     //array to avoid memory allocation
+
     blocks_affected.num_moved_blocks = 0;
 
     blocks_affected.affected_pins.clear();
diff --git a/vpr/src/place/move_transactions.h b/vpr/src/place/move_transactions.h
index d987dc9a4d1..27dd2b1b3c6 100644
--- a/vpr/src/place/move_transactions.h
+++ b/vpr/src/place/move_transactions.h
@@ -9,12 +9,17 @@
  * old_loc: the location the block is moved from                *
  * new_loc: the location the block is moved to                  */
 struct t_pl_moved_block {
+    t_pl_moved_block() = default;
+    t_pl_moved_block(ClusterBlockId block_num_, const t_pl_loc& old_loc_, const t_pl_loc& new_loc_)
+        : block_num(block_num_)
+        , old_loc(old_loc_)
+        , new_loc(new_loc_) {}
     ClusterBlockId block_num;
     t_pl_loc old_loc;
     t_pl_loc new_loc;
 };
 
-/* Stores the list of blocks to be moved in a swap during       *
+/* Stores the list of cluster blocks to be moved in a swap during       *
  * placement.                                                   *
  * Store the information on the blocks to be moved in a swap during     *
  * placement, in the form of array of structs instead of struct with    *
@@ -29,7 +34,7 @@ struct t_pl_moved_block {
  *                incrementally invalidate parts of the timing  *
  *                graph.                                        */
 struct t_pl_blocks_to_be_moved {
-    t_pl_blocks_to_be_moved(size_t max_blocks)
+    explicit t_pl_blocks_to_be_moved(size_t max_blocks)
         : moved_blocks(max_blocks) {}
 
     int num_moved_blocks = 0;
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 80b274b40a3..ce599492358 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -95,6 +95,14 @@ void report_aborted_moves();
 
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
+/**
+ * @brief Find the blocks that will be affected by a move of b_from to to_loc
+ * @param blocks_affected Loaded by this routine and returned via reference; it lists the blocks etc. moved
+ * @param b_from Id of the cluster-level block to be moved
+ * @param to Where b_from will be moved to
+ * @return e_block_move_result ABORT if either of the the moving blocks are already stored, or either of the blocks are fixed, to location is not
+ * compatible, etc. INVERT if the "from" block is a single block and the "to" block is a macro. VALID otherwise.
+ */
 e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
 
 e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to);
@@ -109,6 +117,12 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected,
 e_block_move_result identify_macro_self_swap_affected_macros(std::vector<int>& macros, const int imacro, t_pl_offset swap_offset);
 e_block_move_result record_macro_self_swaps(t_pl_blocks_to_be_moved& blocks_affected, const int imacro, t_pl_offset swap_offset);
 
+/**
+ * @brief Check whether the "to" location is legal for the given "blk"
+ * @param blk
+ * @param to
+ * @return True if this would be a legal move, false otherwise
+ */
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to);
 
 std::set<t_pl_loc> determine_locations_emptied_by_move(t_pl_blocks_to_be_moved& blocks_affected);
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
new file mode 100644
index 00000000000..ad0f643ceb4
--- /dev/null
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -0,0 +1,2280 @@
+/**
+ * @file net_cost_handler.cpp
+ * @brief This file contains the implementation of functions used to update placement cost when a new move is proposed/committed.
+ * 
+ * VPR placement cost consists of three terms which represent wirelength, timing, and NoC cost. 
+ * 
+ * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) approach is used. In this approach, 
+ * half of the perimeter of the bounding box which contains all terminals of the net is multiplied by a correction factor, 
+ * and the resulting number is considered as an estimation of the bounding box. 
+ * 
+ * Currently, we have two types of bounding boxes: 3D bounding box (or Cube BB) and per-layer bounding box. 
+ * If the FPGA grid is a 2D structure, a Cube bounding box is used, which will always have the z direction equal to 1. For 3D architectures, 
+ * the user can specify the type of bounding box. If no type is specified, the RR graph is analyzed. If all inter-die connections happen from OPINs, 
+ * the Cube bounding box is chosen; otherwise, the per-layer bounding box is chosen. In the Cube bounding box, when a net is stretched across multiple layers, 
+ * the edges of the bounding box are determined by all of the blocks on all layers. 
+ * When the per-layer bounding box is used, a separate bounding box for each layer is created, and the wirelength estimation for each layer is calculated. 
+ * To get the total wirelength of a net, the wirelength estimation on all layers is summed up. For more details, please refer to Amin Mohaghegh's MASc thesis. 
+ * 
+ * For timing estimation, the placement delay model is used. For 2D architectures, you can think of the placement delay model as a 2D array indexed by dx and dy. 
+ * To get a delay estimation of a connection (from a source to a sink), first, dx and dy between these two points should be calculated, 
+ * and these two numbers are the indices to access this 2D array. By default, the placement delay model is created by iterating over the router lookahead 
+ * to get the minimum cost for each dx and dy.
+ * 
+ * @date July 12, 2024
+ */
+#include "net_cost_handler.h"
+#include "globals.h"
+#include "placer_globals.h"
+#include "move_utils.h"
+#include "place_timing_update.h"
+#include "noc_place_utils.h"
+#include "vtr_math.h"
+
+using std::max;
+using std::min;
+
+/**
+ * @brief for the states of the bounding box. 
+ * Stored as char for memory efficiency.                              
+ */
+enum class NetUpdateState {
+    NOT_UPDATED_YET,
+    UPDATED_ONCE,
+    GOT_FROM_SCRATCH
+};
+
+/** 
+ * @brief The error tolerance due to round off for the total cost computation. 
+ * When we check it from scratch vs. incrementally. 0.01 means that there is a 1% error tolerance.      
+ */
+#define ERROR_TOL .01
+
+const int MAX_FANOUT_CROSSING_COUNT = 50;
+
+/** 
+ * @brief Crossing counts for nets with different #'s of pins.  From 
+ * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   
+ * Multiplied to bounding box of a net to better estimate wire length  
+ * for higher fanout nets. Each entry is the correction factor for the 
+ * fanout index-1
+ */
+static const float cross_count[MAX_FANOUT_CROSSING_COUNT] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
+                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
+                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
+                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
+                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
+                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
+                                      2.7933};
+
+/** 
+ * @brief Matrices below are used to precompute the inverse of the average   
+ * number of tracks per channel between [subhigh] and [sublow].  Access 
+ * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     
+ * speed up the computation of the cost function that takes the length  
+ * of the net bounding box in each dimension, divided by the average    
+ * number of tracks in that direction; for other cost functions they    
+ * will never be used.                                                  
+ */
+static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); // [0...device_ctx.grid.width()-2]
+static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); // [0...device_ctx.grid.height()-2]
+
+/**
+ * @brief Cost of a net, and a temporary cost of a net used during move assessment. 
+ * We also use negative cost values in proposed_net_cost as a flag to indicate that 
+ * the cost of a net has not yet been updated.
+ */
+static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
+
+/**                                              *
+ * @brief Flag array to indicate whether the specific bounding box has been updated
+ * in this particular swap or not. If it has been updated before, the code    
+ * must use the updated data, instead of the out-of-date data passed into the 
+ * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET     
+ * indicates that the net has not been updated before, UPDATED_ONCE indicated 
+ * that the net has been updated once, if it is going to be updated again, the
+ * values from the previous update must be used. GOT_FROM_SCRATCH is only     
+ * applicable for nets larger than SMALL_NETS and it indicates that the       
+ * particular bounding box is not incrementally updated, and hence the
+ * bounding box is got from scratch, so the bounding box would definitely be
+ * right, DO NOT update again.                                                   
+ */
+static vtr::vector<ClusterNetId, NetUpdateState> bb_updated_before; // [0...cluster_ctx.clb_nlist.nets().size()-1]
+
+/* The following arrays are used by the try_swap function for speed.   */
+
+/**
+ * The wire length estimation is based on the bounding box of the net. In the case of the 2D architecture,
+ * we use a 3D BB with the z-dimension (layer) set to 1. In the case of 3D architecture, there 2 types of bounding box:
+ * 3D and per-layer. The type is determined at the beginning of the placement and stored in the placement context.
+ *
+ *
+ * If the bonding box is of the type 3D, ts_bb_coord_new and ts_bb_edge_new are used. Otherwise, layer_ts_bb_edge_new and
+ * layer_ts_bb_coord_new are used.
+ */
+
+/* [0...cluster_ctx.clb_nlist.nets().size()-1] -> 3D bounding box*/
+static vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers-1] -> 2D bonding box on a layer*/
+static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
+/* [0...cluster_ctx.clb_nlist.nets().size()-1][0...num_layers-1] -> number of sink pins on a layer*/
+static vtr::Matrix<int> ts_layer_sink_pin_count;
+/* [0...num_afftected_nets] -> net_id of the affected nets */
+static std::vector<ClusterNetId> ts_nets_to_update;
+
+/**
+ * @param net
+ * @param moved_blocks
+ * @return True if the driver block of the net is among the moving blocks
+ */
+static bool driven_by_moved_block(const ClusterNetId net,
+                                  const int num_blocks,
+                                  const std::vector<t_pl_moved_block>& moved_blocks);
+/**
+ * @brief Update the bounding box (3D) of the net connected to blk_pin. The old and new locations of the pin are
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures. Do not update the net 
+ * cost here since it should only be updated once per net, not once per pin.
+ * @param net
+ * @param blk
+ * @param blk_pin
+ * @param pl_moved_block
+ */
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block);
+
+/**
+ * @brief Calculate the new connection delay and timing cost of all the
+ * sink pins affected by moving a specific pin to a new location. Also 
+ * calculates the total change in the timing cost.
+ * @param delay_model
+ * @param criticalities
+ * @param net
+ * @param pin
+ * @param affected_pins Updated by this routine to store the sink pins whose delays are changed due to moving the block
+ * @param delta_timing_cost Computed by this routine and returned by reference.
+ * @param is_src_moving True if "pin" is a sink pin and its driver is among the moving blocks
+ */
+static void update_td_delta_costs(const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities& criticalities,
+                                  const ClusterNetId net,
+                                  const ClusterPinId pin,
+                                  std::vector<ClusterPinId>& affected_pins,
+                                  double& delta_timing_cost,
+                                  bool is_src_moving);
+
+/**
+ * @brief if "net" is not already stored as an affected net, mark it in ts_nets_to_update and increment num_affected_nets
+ * @param net ID of a net affected by a move
+ * @param num_affected_nets Incremented if this is a new net affected, and returned via reference.
+ */
+static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
+
+/**
+ * @brief Call suitable function based on the bounding box type to update the bounding box of the net connected to pin_id. Also,
+ * call the function to update timing information if the placement algorithm is timing-driven.
+ * @param place_algorithm Placement algorithm
+ * @param delay_model Timing delay model used by placer
+ * @param criticalities Connections timing criticalities
+ * @param blk_id Block ID of that the moving pin blongs to.
+ * @param pin_id Pin ID of the moving pin
+ * @param moving_blk_inf Data structure that holds information, e.g., old location and new locatoin, about all moving blocks
+ * @param affected_pins Netlist pins which are affected, in terms placement cost, by the proposed move.
+ * @param timing_delta_c Timing cost change based on the proposed move
+ * @param num_affected_nets A pointer to the first free element of ts_nets_to_update. If a new net is added, the pointer should be increamented.
+ * @param is_src_moving Is the moving pin the source of a net.
+ */
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                               const PlaceDelayModel* delay_model,
+                                               const PlacerCriticalities* criticalities,
+                                               const ClusterBlockId& blk_id,
+                                               const ClusterPinId& pin_id,
+                                               const t_pl_moved_block& moving_blk_inf,
+                                               std::vector<ClusterPinId>& affected_pins,
+                                               double& timing_delta_c,
+                                               int& num_affected_nets,
+                                               bool is_src_moving);
+                                            
+/**
+ * @brief Update the 3D bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @details Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and the 
+ * number of blocks on each edge in the bb_edge_new data structure. This routine should only be called for large nets, 
+ * since it has some overhead relative to just doing a brute force bounding box calculation. The bounding box coordinate 
+ * and edge information for inet must be valid before this routine is called. Currently assumes channels on both sides of 
+ * the CLBs forming the edges of the bounding box can be used.  Essentially, I am assuming the pins always lie on the 
+ * outside of the bounding box. The x and y coordinates are the pin's x and y coordinates. IO blocks are considered to be one 
+ * cell in for simplicity.     
+ * @param bb_edge_new Number of blocks on the edges of the bounding box
+ * @param bb_coord_new Coordinates of the bounding box
+ * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
+ * @param pin_old_loc The old location of the moving pin
+ * @param pin_new_loc The new location of the moving pin
+ * @param src_pin Is the moving pin driving the net
+ */
+static void update_bb(ClusterNetId net_id,
+                      t_bb& bb_edge_new,
+                      t_bb& bb_coord_new,
+                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
+                      t_physical_tile_loc pin_old_loc,
+                      t_physical_tile_loc pin_new_loc,
+                      bool src_pin);
+
+/**
+ * @brief Calculate the 3D bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
+ * store them in bb_coord_new
+ * @param net_id ID of the net for which the bounding box is requested
+ * @param bb_coord_new Computed by this function and returned by reference.
+ * @param num_sink_pin_layer Store the number of sink pins of "net_id" on each layer
+ */
+static void get_non_updatable_bb(ClusterNetId net_id,
+                                  t_bb& bb_coord_new,
+                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
+
+
+/**
+ * @brief Update the bounding box (per-layer) of the net connected to blk_pin. The old and new locations of the pin are
+ * stored in pl_moved_block. The updated bounding box will be stored in ts data structures.
+ * @details Finds the bounding box of a net and stores its coordinates in the bb_coord_new 
+ * data structure.  This routine should only be called for small nets, since it does not 
+ * determine enough information for the bounding box to be updated incrementally later.                
+ * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box 
+ * can be used.  Essentially, I am assuming the pins always lie on the outside of the 
+ * bounding box.            
+ * @param net ID of the net for which the bounding box is requested
+ * @param blk ID of the moving block
+ * @param blk_pin ID of the pin connected to the net
+ * @param pl_moved_block Placement info about
+ */
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block);
+
+/**
+ * @brief Calculate the per-layer bounding box of "net_id" from scratch (based on the block locations stored in place_ctx) and
+ * store them in bb_coord_new
+ * @param net_id ID of the net for which the bounding box is requested
+ * @param bb_coord_new Computed by this function and returned by reference.
+ * @param num_sink_layer Store the number of sink pins of "net_id" on each layer
+ */
+static void get_non_updatable_layer_bb(ClusterNetId net_id,
+                                       std::vector<t_2D_bb>& bb_coord_new,
+                                       vtr::NdMatrixProxy<int, 1> num_sink_layer);
+
+
+/**
+ * @brief Update the per-layer bounding box of "net_id" incrementally based on the old and new locations of a pin on that net
+ * @details Updates the bounding box of a net by storing its coordinates in the bb_coord_new data structure and 
+ * the number of blocks on each edge in the bb_edge_new data structure. This routine should only  be called for 
+ * large nets, since it has some overhead relative to just doing a brute force bounding box calculation. 
+ * The bounding box coordinate and edge information for inet must be valid before  this routine is called. 
+ * Currently assumes channels on both sides of the CLBs forming the   edges of the bounding box can be used.  
+ * Essentially, I am assuming the pins always lie on the outside of the bounding box. The x and y coordinates 
+ * are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity.
+ * @param bb_edge_new Number of blocks on the edges of the bounding box
+ * @param bb_coord_new Coordinates of the bounding box
+ * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
+ * @param pin_old_loc The old location of the moving pin
+ * @param pin_new_loc The new location of the moving pin
+ * @param is_output_pin Is the moving pin of the type output
+ */
+static void update_layer_bb(ClusterNetId net_id,
+                            std::vector<t_2D_bb>& bb_edge_new,
+                            std::vector<t_2D_bb>& bb_coord_new,
+                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                            t_physical_tile_loc pin_old_loc,
+                            t_physical_tile_loc pin_new_loc,
+                            bool is_output_pin);
+
+/**
+* @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+* the pin under consideration change layer.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_bb_edge The current known number of blocks of the net on bounding box edges
+ * @param curr_bb_coord The current known boudning box of the net
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
+ */
+static inline void update_bb_layer_changed(ClusterNetId net_id,
+                                           const t_physical_tile_loc& pin_old_loc,
+                                           const t_physical_tile_loc& pin_new_loc,
+                                           const std::vector<t_2D_bb>& curr_bb_edge,
+                                           const std::vector<t_2D_bb>& curr_bb_coord,
+                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                           std::vector<t_2D_bb>& bb_edge_new,
+                                           std::vector<t_2D_bb>& bb_coord_new);
+                                
+/**
+ * @brief Calculate the per-layer BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
+ * @details This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. from 
+ * only the block location information). It updates the coordinate, number of pins on each edge information, and the 
+ * number of sinks on each layer. It should only be called when the bounding box information is not valid.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param coords Bounding box coordinates of the net. It is calculated in this function
+ * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function.
+ * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function.
+ */
+static void get_layer_bb_from_scratch(ClusterNetId net_id,
+                                      std::vector<t_2D_bb>& num_on_edges,
+                                      std::vector<t_2D_bb>& coords,
+                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
+/**
+ * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer
+ * and return the sum of the costs
+ * @param net_id ID of the net which cost is requested
+ * @param bb Per-layer bounding box of the net
+ * @return Wirelength cost of the net
+ */
+static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */,
+                                 const std::vector<t_2D_bb>& bb,
+                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
+/**
+ * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer
+ * and return the sum of the lengths
+ * @param net_id ID of the net which wirelength estimate is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength estimate of the net
+ */
+static double get_net_wirelength_from_layer_bb(ClusterNetId /* net_id */,
+                                                const std::vector<t_2D_bb>& bb,
+                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
+
+/**
+ * @brief This function is called in update_layer_bb to update the net's bounding box incrementally if
+ * the pin under consideration is not changing layer.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_bb_edge The current known number of blocks of the net on bounding box edges
+ * @param curr_bb_coord The current known boudning box of the net
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
+ */
+static inline void update_bb_same_layer(ClusterNetId net_id,
+                                        const t_physical_tile_loc& pin_old_loc,
+                                        const t_physical_tile_loc& pin_new_loc,
+                                        const std::vector<t_2D_bb>& curr_bb_edge,
+                                        const std::vector<t_2D_bb>& curr_bb_coord,
+                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                        std::vector<t_2D_bb>& bb_edge_new,
+                                        std::vector<t_2D_bb>& bb_coord_new);
+
+/**
+ * @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
+ * @param pin_old_loc Old location of the moving pin
+ * @param pin_new_loc New location of the moving pin
+ * @param curr_layer_pin_sink_count Updated number of sinks of the net on each layer
+ * @param bb_pin_sink_count_new The updated number of net's sinks on each layer
+ * @param is_output_pin Is the moving pin of the type output
+ */
+static void update_bb_pin_sink_count(const t_physical_tile_loc& pin_old_loc,
+                                     const t_physical_tile_loc& pin_new_loc,
+                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
+                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                     bool is_output_pin);
+
+/**
+ * @brief Update the data structure for large nets that keep track of
+ * the number of blocks on each edge of the bounding box. If the moving block
+ * is the only block on one of the edges, the bounding box is calculated from scratch.
+ * Since this function is used for large nets, it updates the bounding box incrementally.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param bb_edge_new The new bb edge calculated by this function
+ * @param bb_coord_new The new bb calculated by this function
+ * @param bb_layer_pin_sink_count The updated number of net's sinks on each layer
+ * @param old_num_block_on_edge The current known number of blocks of the net on bounding box edges
+ * @param old_edge_coord The current known boudning box of the net
+ * @param new_num_block_on_edge The new bb calculated by this function
+ * @param new_edge_coord The new bb edge calculated by this function
+ *
+ */
+static inline void update_bb_edge(ClusterNetId net_id,
+                                  std::vector<t_2D_bb>& bb_edge_new,
+                                  std::vector<t_2D_bb>& bb_coord_new,
+                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
+                                  const int& old_num_block_on_edge,
+                                  const int& old_edge_coord,
+                                  int& new_num_block_on_edge,
+                                  int& new_edge_coord);
+
+/**
+ * @brief When BB is being updated incrementally, the pin is moving to a new layer, and the BB is of the type "per-layer,
+ * use this function to update the BB on the new layer.
+ * @param new_pin_loc New location of the pin
+ * @param bb_edge_old bb_edge prior to moving the pin
+ * @param bb_coord_old bb_coord prior to moving the pin
+ * @param bb_edge_new New bb edge calculated by this function
+ * @param bb_coord_new new bb coord calculated by this function
+ */
+static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
+                            const t_2D_bb& bb_edge_old,
+                            const t_2D_bb& bb_coord_old,
+                            t_2D_bb& bb_edge_new,
+                            t_2D_bb& bb_coord_new);
+
+/**
+ * @brief Calculate the 3D BB of a large net from scratch and update coord, edge, and num_sink_pin_layer data structures.
+ * @details This routine finds the bounding box of each net from scratch (i.e. from only the block location information).  It updates both the       
+ * coordinate and number of pins on each edge information. It should only be called when the bounding box 
+ * information is not valid.
+ * @param net_id ID of the net which the moving pin belongs to
+ * @param coords Bounding box coordinates of the net. It is calculated in this function
+ * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function.
+ * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function.
+ */
+static void get_bb_from_scratch(ClusterNetId net_id,
+                                t_bb& coords,
+                                t_bb& num_on_edges,
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
+
+/**
+ * @brief Given the 3D BB, calculate the wire-length cost of the net
+ * @param net_id ID of the net which cost is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength cost of the net
+ */
+static double get_net_cost(ClusterNetId net_id, const t_bb& bb);
+
+
+
+/**
+ * @brief Given the 3D BB, calculate the wire-length estimate of the net
+ * @param net_id ID of the net which wirelength estimate is requested
+ * @param bb Bounding box of the net
+ * @return Wirelength estimate of the net
+ */
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb);
+
+
+
+/**
+ * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch.
+ * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
+ * @return Total bb (wirelength) cost for the placement
+ */
+static double recompute_bb_cost();
+
+/**
+ * @brief To get the wirelength cost/est, BB perimeter is multiplied by a factor to approximately correct for the half-perimeter 
+ * bounding box wirelength's underestimate of wiring for nets with fanout greater than 2.
+ * @return Multiplicative wirelength correction factor
+ */
+static double wirelength_crossing_count(size_t fanout);
+
+/**
+ * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks 
+ * indicated in the blocks_affected data structure.
+ * @param num_affected_nets Number of valid elements in ts_bb_coord_new 
+ * @param bb_delta_c Cost difference after and before moving the block
+ */
+static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c);
+
+/******************************* End of Function definitions ************************************/
+
+//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
+static bool driven_by_moved_block(const ClusterNetId net,
+                                  const int num_blocks,
+                                  const std::vector<t_pl_moved_block>& moved_blocks) {
+    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+    bool is_driven_by_move_blk = false;
+    ClusterBlockId net_driver_block = clb_nlist.net_driver_block(
+        net);
+
+    for (int block_num = 0; block_num < num_blocks; block_num++) {
+        if (net_driver_block == moved_blocks[block_num].block_num) {
+            is_driven_by_move_blk = true;
+            break;
+        }
+    }
+
+    return is_driven_by_move_blk;
+}
+
+static void update_net_bb(const ClusterNetId& net,
+                          const ClusterBlockId& blk,
+                          const ClusterPinId& blk_pin,
+                          const t_pl_moved_block& pl_moved_block) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
+        //For small nets brute-force bounding box update is faster
+
+        if (bb_updated_before[net] == NetUpdateState::NOT_UPDATED_YET) { //Only once per-net
+            get_non_updatable_bb(net,
+                                 ts_bb_coord_new[net],
+                                 ts_layer_sink_pin_count[size_t(net)]);
+        }
+    } else {
+        //For large nets, update bounding box incrementally
+        int iblk_pin = tile_pin_index(blk_pin);
+        bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER;
+
+        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
+        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
+        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
+
+        //Incremental bounding box update
+        update_bb(net,
+                  ts_bb_edge_new[net],
+                  ts_bb_coord_new[net],
+                  ts_layer_sink_pin_count[size_t(net)],
+                  {pl_moved_block.old_loc.x + pin_width_offset,
+                  pl_moved_block.old_loc.y + pin_height_offset,
+                  pl_moved_block.old_loc.layer},
+                  {pl_moved_block.new_loc.x + pin_width_offset,
+                  pl_moved_block.new_loc.y + pin_height_offset,
+                  pl_moved_block.new_loc.layer},
+                  src_pin);
+    }
+}
+
+static void update_net_layer_bb(const ClusterNetId& net,
+                                const ClusterBlockId& blk,
+                                const ClusterPinId& blk_pin,
+                                const t_pl_moved_block& pl_moved_block) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
+        //For small nets brute-force bounding box update is faster
+
+        if (bb_updated_before[net] == NetUpdateState::NOT_UPDATED_YET) { //Only once per-net
+            get_non_updatable_layer_bb(net,
+                                       layer_ts_bb_coord_new[net],
+                                       ts_layer_sink_pin_count[size_t(net)]);
+        }
+    } else {
+        //For large nets, update bounding box incrementally
+        int iblk_pin = tile_pin_index(blk_pin);
+
+        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
+        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
+        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
+
+        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
+
+        //Incremental bounding box update
+        update_layer_bb(net,
+                        layer_ts_bb_edge_new[net],
+                        layer_ts_bb_coord_new[net],
+                        ts_layer_sink_pin_count[size_t(net)],
+                        {pl_moved_block.old_loc.x + pin_width_offset,
+                         pl_moved_block.old_loc.y + pin_height_offset,
+                         pl_moved_block.old_loc.layer},
+                        {pl_moved_block.new_loc.x + pin_width_offset,
+                         pl_moved_block.new_loc.y + pin_height_offset,
+                         pl_moved_block.new_loc.layer},
+                        pin_dir == e_pin_type::DRIVER);
+    }
+}
+
+static void update_td_delta_costs(const PlaceDelayModel* delay_model,
+                                  const PlacerCriticalities& criticalities,
+                                  const ClusterNetId net,
+                                  const ClusterPinId pin,
+                                  std::vector<ClusterPinId>& affected_pins,
+                                  double& delta_timing_cost,
+                                  bool is_src_moving) {
+
+    /**
+     * Assumes that the blocks have been moved to the proposed new locations.
+     * Otherwise, the routine comp_td_single_connection_delay() will not be
+     * able to calculate the most up to date connection delay estimation value.
+     *
+     * If the moved pin is a driver pin, then all the sink connections that are
+     * driven by this driver pin are considered.
+     *
+     * If the moved pin is a sink pin, then it is the only pin considered. But
+     * in some cases, the sink is already accounted for if it is also driven
+     * by a driver pin located on a moved block. Computing it again would double
+     * count its affect on the total timing cost change (delta_timing_cost).
+     *
+     * It is possible for some connections to have unchanged delays. For instance,
+     * if we are using a dx/dy delay model, this could occur if a sink pin moved
+     * to a new position with the same dx/dy from its net's driver pin.
+     *
+     * We skip these connections with unchanged delay values as their delay need
+     * not be updated. Their timing costs also do not require any update, since
+     * the criticalities values are always kept stale/unchanged during an block
+     * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
+     *
+     * This is also done to minimize the number of timing node/edge invalidations
+     * for incremental static timing analysis (incremental STA).
+     */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    const auto& connection_delay = g_placer_ctx.timing().connection_delay;
+    auto& connection_timing_cost = g_placer_ctx.mutable_timing().connection_timing_cost;
+    auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
+    auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
+
+    if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
+        /* This pin is a net driver on a moved block. */
+        /* Recompute all point to point connection delays for the net sinks. */
+        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
+             ipin++) {
+            float temp_delay = comp_td_single_connection_delay(delay_model, net,
+                                                               ipin);
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                continue;
+            }
+
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
+
+            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
+                                 - connection_timing_cost[net][ipin];
+
+            /* Record this connection in blocks_affected.affected_pins */
+            ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
+            affected_pins.push_back(sink_pin);
+        }
+    } else {
+        /* This pin is a net sink on a moved block */
+        VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
+
+        /* Check if this sink's net is driven by a moved block */
+        if (!is_src_moving) {
+            /* Get the sink pin index in the net */
+            int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
+
+            float temp_delay = comp_td_single_connection_delay(delay_model, net,
+                                                               ipin);
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                return;
+            }
+
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
+
+            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
+                                 - connection_timing_cost[net][ipin];
+
+            /* Record this connection in blocks_affected.affected_pins */
+            affected_pins.push_back(pin);
+        }
+    }
+}
+
+///@brief Record effected nets.
+static void record_affected_net(const ClusterNetId net,
+                                int& num_affected_nets) {
+    /* Record effected nets. */
+    if (proposed_net_cost[net] < 0.) {
+        /* Net not marked yet. */
+        ts_nets_to_update[num_affected_nets] = net;
+        num_affected_nets++;
+
+        /* Flag to say we've marked this net. */
+        proposed_net_cost[net] = 1.;
+    }
+}
+
+static void update_net_info_on_pin_move(const t_place_algorithm& place_algorithm,
+                                               const PlaceDelayModel* delay_model,
+                                               const PlacerCriticalities* criticalities,
+                                               const ClusterBlockId& blk_id,
+                                               const ClusterPinId& pin_id,
+                                               const t_pl_moved_block& moving_blk_inf,
+                                               std::vector<ClusterPinId>& affected_pins,
+                                               double& timing_delta_c,
+                                               int& num_affected_nets,
+                                               bool is_src_moving) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
+    VTR_ASSERT_SAFE_MSG(net_id,
+                        "Only valid nets should be found in compressed netlist block pins");
+
+    if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+        //TODO: Do we require anyting special here for global nets?
+        //"Global nets are assumed to span the whole chip, and do not effect costs."
+        return;
+    }
+
+    /* Record effected nets */
+    record_affected_net(net_id, num_affected_nets);
+
+    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
+    /* Update the net bounding boxes. */
+    if (cube_bb) {
+        update_net_bb(net_id, blk_id, pin_id, moving_blk_inf);
+    } else {
+        update_net_layer_bb(net_id, blk_id, pin_id, moving_blk_inf);
+    }
+
+    if (place_algorithm.is_timing_driven()) {
+        /* Determine the change in connection delay and timing cost. */
+        update_td_delta_costs(delay_model,
+                              *criticalities,
+                              net_id,
+                              pin_id,
+                              affected_pins,
+                              timing_delta_c,
+                              is_src_moving);
+    }
+}
+
+static void get_non_updatable_bb(ClusterNetId net_id,
+                                t_bb& bb_coord_new,
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
+    //TODO: account for multiple physical pin instances per logical pin
+
+    int xmax, ymax, layer_max, xmin, ymin, layer_min, x, y, layer;
+    int pnum;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+    layer = place_ctx.block_locs[bnum].loc.layer;
+
+    xmin = x;
+    ymin = y;
+    layer_min = layer;
+    xmax = x;
+    ymax = y;
+    layer_max = layer;
+
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        num_sink_pin_layer[layer_num] = 0;
+    }
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+        layer = place_ctx.block_locs[bnum].loc.layer;
+
+        if (x < xmin) {
+            xmin = x;
+        } else if (x > xmax) {
+            xmax = x;
+        }
+
+        if (y < ymin) {
+            ymin = y;
+        } else if (y > ymax) {
+            ymax = y;
+        }
+
+        if (layer < layer_min) {
+            layer_min = layer;
+        } else if (layer > layer_max) {
+            layer_max = layer;
+        }
+
+        num_sink_pin_layer[layer]++;
+    }
+
+    /* Now I've found the coordinates of the bounding box.  There are no *
+     * channels beyond device_ctx.grid.width()-2 and                     *
+     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
+     * since I'll always include the channel immediately below and the   *
+     * channel immediately to the left of the bounding box, I want to    *
+     * clip to 1 in both directions as well (since minimum channel index *
+     * is 0).  See route_common.cpp for a channel diagram.               */
+
+    bb_coord_new.xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.layer_min = max(min<int>(layer_min, device_ctx.grid.get_num_layers() - 1), 0);
+    bb_coord_new.xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    bb_coord_new.ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    bb_coord_new.layer_max = max(min<int>(layer_max, device_ctx.grid.get_num_layers() - 1), 0);
+}
+
+static void get_non_updatable_layer_bb(ClusterNetId net_id,
+                                       std::vector<t_2D_bb>& bb_coord_new,
+                                       vtr::NdMatrixProxy<int, 1> num_sink_layer) {
+    //TODO: account for multiple physical pin instances per logical pin
+
+    auto& device_ctx = g_vpr_ctx.device();
+    int num_layers = device_ctx.grid.get_num_layers();
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        num_sink_layer[layer_num] = 0;
+    }
+
+    int pnum;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+
+    int src_x = place_ctx.block_locs[bnum].loc.x
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
+    int src_y = place_ctx.block_locs[bnum].loc.y
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+    std::vector<int> xmin(num_layers, src_x);
+    std::vector<int> ymin(num_layers, src_y);
+    std::vector<int> xmax(num_layers, src_x);
+    std::vector<int> ymax(num_layers, src_y);
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        int x = place_ctx.block_locs[bnum].loc.x
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
+        int y = place_ctx.block_locs[bnum].loc.y
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        int layer_num = place_ctx.block_locs[bnum].loc.layer;
+        num_sink_layer[layer_num]++;
+        if (x < xmin[layer_num]) {
+            xmin[layer_num] = x;
+        } else if (x > xmax[layer_num]) {
+            xmax[layer_num] = x;
+        }
+
+        if (y < ymin[layer_num]) {
+            ymin[layer_num] = y;
+        } else if (y > ymax[layer_num]) {
+            ymax[layer_num] = y;
+        }
+    }
+
+    /* Now I've found the coordinates of the bounding box.  There are no *
+     * channels beyond device_ctx.grid.width()-2 and                     *
+     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
+     * since I'll always include the channel immediately below and the   *
+     * channel immediately to the left of the bounding box, I want to    *
+     * clip to 1 in both directions as well (since minimum channel index *
+     * is 0).  See route_common.cpp for a channel diagram.               */
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        bb_coord_new[layer_num].layer_num = layer_num;
+        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    }
+}
+
+static void update_bb(ClusterNetId net_id,
+                      t_bb& bb_edge_new,
+                      t_bb& bb_coord_new,
+                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
+                      t_physical_tile_loc pin_old_loc,
+                      t_physical_tile_loc pin_new_loc,
+                      bool src_pin) {
+    //TODO: account for multiple physical pin instances per logical pin
+    const t_bb *curr_bb_edge, *curr_bb_coord;
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& place_move_ctx = g_placer_ctx.move();
+
+    const int num_layers = device_ctx.grid.get_num_layers();
+
+    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_new_loc.layer_num = max(min<int>(pin_new_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
+    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_old_loc.layer_num = max(min<int>(pin_old_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
+
+    /* Check if the net had been updated before. */
+    if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+        /* The net had been updated from scratch, DO NOT update again! */
+        return;
+    }
+
+    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? 
+    place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
+
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        /* The net had NOT been updated before, could use the old values */
+        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
+        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
+    } else {
+        /* The net had been updated before, must use the new values */
+        curr_bb_coord = &bb_coord_new;
+        curr_bb_edge = &bb_edge_new;
+    }
+
+    /* Check if I can update the bounding box incrementally. */
+
+    if (pin_new_loc.x < pin_old_loc.x) { /* Move to left. */
+
+        /* Update the xmax fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
+            if (curr_bb_edge->xmax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.xmax = curr_bb_edge->xmax - 1;
+                bb_coord_new.xmax = curr_bb_coord->xmax;
+            }
+        } else { /* Move to left, old position was not at xmax. */
+            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_edge_new.xmax = curr_bb_edge->xmax;
+        }
+
+        /* Now do the xmin fields for coordinates and number of edges. */
+
+        if (pin_new_loc.x < curr_bb_coord->xmin) { /* Moved past xmin */
+            bb_coord_new.xmin = pin_new_loc.x;
+            bb_edge_new.xmin = 1;
+        } else if (pin_new_loc.x == curr_bb_coord->xmin) { /* Moved to xmin */
+            bb_coord_new.xmin = pin_new_loc.x;
+            bb_edge_new.xmin = curr_bb_edge->xmin + 1;
+        } else { /* Xmin unchanged. */
+            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_edge_new.xmin = curr_bb_edge->xmin;
+        }
+        /* End of move to left case. */
+
+    } else if (pin_new_loc.x > pin_old_loc.x) { /* Move to right. */
+
+        /* Update the xmin fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
+            if (curr_bb_edge->xmin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.xmin = curr_bb_edge->xmin - 1;
+                bb_coord_new.xmin = curr_bb_coord->xmin;
+            }
+        } else { /* Move to right, old position was not at xmin. */
+            bb_coord_new.xmin = curr_bb_coord->xmin;
+            bb_edge_new.xmin = curr_bb_edge->xmin;
+        }
+
+        /* Now do the xmax fields for coordinates and number of edges. */
+
+        if (pin_new_loc.x > curr_bb_coord->xmax) { /* Moved past xmax. */
+            bb_coord_new.xmax = pin_new_loc.x;
+            bb_edge_new.xmax = 1;
+        } else if (pin_new_loc.x == curr_bb_coord->xmax) { /* Moved to xmax */
+            bb_coord_new.xmax = pin_new_loc.x;
+            bb_edge_new.xmax = curr_bb_edge->xmax + 1;
+        } else { /* Xmax unchanged. */
+            bb_coord_new.xmax = curr_bb_coord->xmax;
+            bb_edge_new.xmax = curr_bb_edge->xmax;
+        }
+        /* End of move to right case. */
+
+    } else { /* pin_new_loc.x == pin_old_loc.x -- no x motion. */
+        bb_coord_new.xmin = curr_bb_coord->xmin;
+        bb_coord_new.xmax = curr_bb_coord->xmax;
+        bb_edge_new.xmin = curr_bb_edge->xmin;
+        bb_edge_new.xmax = curr_bb_edge->xmax;
+    }
+
+    /* Now account for the y-direction motion. */
+
+    if (pin_new_loc.y < pin_old_loc.y) { /* Move down. */
+
+        /* Update the ymax fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
+            if (curr_bb_edge->ymax == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.ymax = curr_bb_edge->ymax - 1;
+                bb_coord_new.ymax = curr_bb_coord->ymax;
+            }
+        } else { /* Move down, old postion was not at ymax. */
+            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_edge_new.ymax = curr_bb_edge->ymax;
+        }
+
+        /* Now do the ymin fields for coordinates and number of edges. */
+
+        if (pin_new_loc.y < curr_bb_coord->ymin) { /* Moved past ymin */
+            bb_coord_new.ymin = pin_new_loc.y;
+            bb_edge_new.ymin = 1;
+        } else if (pin_new_loc.y == curr_bb_coord->ymin) { /* Moved to ymin */
+            bb_coord_new.ymin = pin_new_loc.y;
+            bb_edge_new.ymin = curr_bb_edge->ymin + 1;
+        } else { /* ymin unchanged. */
+            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_edge_new.ymin = curr_bb_edge->ymin;
+        }
+        /* End of move down case. */
+
+    } else if (pin_new_loc.y > pin_old_loc.y) { /* Moved up. */
+
+        /* Update the ymin fields for coordinates and number of edges first. */
+
+        if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
+            if (curr_bb_edge->ymin == 1) {
+                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                return;
+            } else {
+                bb_edge_new.ymin = curr_bb_edge->ymin - 1;
+                bb_coord_new.ymin = curr_bb_coord->ymin;
+            }
+        } else { /* Moved up, old position was not at ymin. */
+            bb_coord_new.ymin = curr_bb_coord->ymin;
+            bb_edge_new.ymin = curr_bb_edge->ymin;
+        }
+
+        /* Now do the ymax fields for coordinates and number of edges. */
+
+        if (pin_new_loc.y > curr_bb_coord->ymax) { /* Moved past ymax. */
+            bb_coord_new.ymax = pin_new_loc.y;
+            bb_edge_new.ymax = 1;
+        } else if (pin_new_loc.y == curr_bb_coord->ymax) { /* Moved to ymax */
+            bb_coord_new.ymax = pin_new_loc.y;
+            bb_edge_new.ymax = curr_bb_edge->ymax + 1;
+        } else { /* ymax unchanged. */
+            bb_coord_new.ymax = curr_bb_coord->ymax;
+            bb_edge_new.ymax = curr_bb_edge->ymax;
+        }
+        /* End of move up case. */
+
+    } else { /* pin_new_loc.y == yold -- no y motion. */
+        bb_coord_new.ymin = curr_bb_coord->ymin;
+        bb_coord_new.ymax = curr_bb_coord->ymax;
+        bb_edge_new.ymin = curr_bb_edge->ymin;
+        bb_edge_new.ymax = curr_bb_edge->ymax;
+    }
+
+    /* Now account for the layer motion. */
+    if (num_layers > 1) {
+        /* We need to update it only if multiple layers are available */
+        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+            num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num];
+        }
+        if (!src_pin) {
+            /* if src pin is being moved, we don't need to update this data structure */
+            if (pin_old_loc.layer_num != pin_new_loc.layer_num) {
+                num_sink_pin_layer_new[pin_old_loc.layer_num] = (curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1;
+                num_sink_pin_layer_new[pin_new_loc.layer_num] = (curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
+            }
+        }
+
+        if (pin_new_loc.layer_num < pin_old_loc.layer_num) {
+            if (pin_old_loc.layer_num == curr_bb_coord->layer_max) {
+                if (curr_bb_edge->layer_max == 1) {
+                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                    bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                    return;
+                } else {
+                    bb_edge_new.layer_max = curr_bb_edge->layer_max - 1;
+                    bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                }
+            } else {
+                bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max;
+            }
+
+
+            if (pin_new_loc.layer_num < curr_bb_coord->layer_min) {
+                bb_coord_new.layer_min = pin_new_loc.layer_num;
+                bb_edge_new.layer_min = 1;
+            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_min) {
+                bb_coord_new.layer_min = pin_new_loc.layer_num;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min + 1;
+            } else {
+                bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            }
+
+        } else if (pin_new_loc.layer_num > pin_old_loc.layer_num) {
+
+
+            if (pin_old_loc.layer_num == curr_bb_coord->layer_min) {
+                if (curr_bb_edge->layer_min == 1) {
+                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
+                    bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+                    return;
+                } else {
+                    bb_edge_new.layer_min = curr_bb_edge->layer_min - 1;
+                    bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                }
+            } else {
+                bb_coord_new.layer_min = curr_bb_coord->layer_min;
+                bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            }
+
+            if (pin_new_loc.layer_num > curr_bb_coord->layer_max) {
+                bb_coord_new.layer_max = pin_new_loc.layer_num;
+                bb_edge_new.layer_max = 1;
+            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_max) {
+                bb_coord_new.layer_max = pin_new_loc.layer_num;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max + 1;
+            } else {
+                bb_coord_new.layer_max = curr_bb_coord->layer_max;
+                bb_edge_new.layer_max = curr_bb_edge->layer_max;
+            }
+
+
+        } else {//pin_new_loc.layer_num == pin_old_loc.layer_num
+            bb_coord_new.layer_min = curr_bb_coord->layer_min;
+            bb_coord_new.layer_max = curr_bb_coord->layer_max;
+            bb_edge_new.layer_min = curr_bb_edge->layer_min;
+            bb_edge_new.layer_max = curr_bb_edge->layer_max;
+        }
+
+    } else {// num_layers == 1
+        bb_coord_new.layer_min = curr_bb_coord->layer_min;
+        bb_coord_new.layer_max = curr_bb_coord->layer_max;
+        bb_edge_new.layer_min = curr_bb_edge->layer_min;
+        bb_edge_new.layer_max = curr_bb_edge->layer_max;
+    }
+
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
+    }
+}
+
+static void update_layer_bb(ClusterNetId net_id,
+                            std::vector<t_2D_bb>& bb_edge_new,
+                            std::vector<t_2D_bb>& bb_coord_new,
+                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                            t_physical_tile_loc pin_old_loc,
+                            t_physical_tile_loc pin_new_loc,
+                            bool is_output_pin) {
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& place_move_ctx = g_placer_ctx.move();
+
+    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
+    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
+
+    /* Check if the net had been updated before. */
+    if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+        /* The net had been updated from scratch, DO NOT update again! */
+        return;
+    }
+
+    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) ? 
+    place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
+
+    const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        /* The net had NOT been updated before, could use the old values */
+        curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
+        curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id];
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
+    } else {
+        /* The net had been updated before, must use the new values */
+        curr_bb_edge = &bb_edge_new;
+        curr_bb_coord = &bb_coord_new;
+    }
+
+    /* Check if I can update the bounding box incrementally. */
+
+    update_bb_pin_sink_count(pin_old_loc,
+                             pin_new_loc,
+                             curr_layer_pin_sink_count,
+                             bb_pin_sink_count_new,
+                             is_output_pin);
+
+    int layer_old = pin_old_loc.layer_num;
+    int layer_new = pin_new_loc.layer_num;
+    bool layer_changed = (layer_old != layer_new);
+
+    bb_edge_new = *curr_bb_edge;
+    bb_coord_new = *curr_bb_coord;
+
+    if (layer_changed) {
+        update_bb_layer_changed(net_id,
+                                pin_old_loc,
+                                pin_new_loc,
+                                *curr_bb_edge,
+                                *curr_bb_coord,
+                                bb_pin_sink_count_new,
+                                bb_edge_new,
+                                bb_coord_new);
+    } else {
+        update_bb_same_layer(net_id,
+                             pin_old_loc,
+                             pin_new_loc,
+                             *curr_bb_edge,
+                             *curr_bb_coord,
+                             bb_pin_sink_count_new,
+                             bb_edge_new,
+                             bb_coord_new);
+    }
+
+    if (bb_updated_before[net_id] == NetUpdateState::NOT_UPDATED_YET) {
+        bb_updated_before[net_id] = NetUpdateState::UPDATED_ONCE;
+    }
+}
+
+static inline void update_bb_same_layer(ClusterNetId net_id,
+                                        const t_physical_tile_loc& pin_old_loc,
+                                        const t_physical_tile_loc& pin_new_loc,
+                                        const std::vector<t_2D_bb>& curr_bb_edge,
+                                        const std::vector<t_2D_bb>& curr_bb_coord,
+                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                        std::vector<t_2D_bb>& bb_edge_new,
+                                        std::vector<t_2D_bb>& bb_coord_new) {
+    int x_old = pin_old_loc.x;
+    int x_new = pin_new_loc.x;
+
+    int y_old = pin_old_loc.y;
+    int y_new = pin_new_loc.y;
+
+    int layer_num = pin_old_loc.layer_num;
+    VTR_ASSERT_SAFE(layer_num == pin_new_loc.layer_num);
+
+    if (x_new < x_old) {
+        if (x_old == curr_bb_coord[layer_num].xmax) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           curr_bb_edge[layer_num].xmax,
+                           curr_bb_coord[layer_num].xmax,
+                           bb_edge_new[layer_num].xmax,
+                           bb_coord_new[layer_num].xmax);
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+                return;
+            }
+        }
+
+        if (x_new < curr_bb_coord[layer_num].xmin) {
+            bb_edge_new[layer_num].xmin = 1;
+            bb_coord_new[layer_num].xmin = x_new;
+        } else if (x_new == curr_bb_coord[layer_num].xmin) {
+            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin + 1;
+            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
+        }
+
+    } else if (x_new > x_old) {
+        if (x_old == curr_bb_coord[layer_num].xmin) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           curr_bb_edge[layer_num].xmin,
+                           curr_bb_coord[layer_num].xmin,
+                           bb_edge_new[layer_num].xmin,
+                           bb_coord_new[layer_num].xmin);
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+                return;
+            }
+        }
+
+        if (x_new > curr_bb_coord[layer_num].xmax) {
+            bb_edge_new[layer_num].xmax = 1;
+            bb_coord_new[layer_num].xmax = x_new;
+        } else if (x_new == curr_bb_coord[layer_num].xmax) {
+            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1;
+            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
+        }
+    }
+
+    if (y_new < y_old) {
+        if (y_old == curr_bb_coord[layer_num].ymax) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           curr_bb_edge[layer_num].ymax,
+                           curr_bb_coord[layer_num].ymax,
+                           bb_edge_new[layer_num].ymax,
+                           bb_coord_new[layer_num].ymax);
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+                return;
+            }
+        }
+
+        if (y_new < curr_bb_coord[layer_num].ymin) {
+            bb_edge_new[layer_num].ymin = 1;
+            bb_coord_new[layer_num].ymin = y_new;
+        } else if (y_new == curr_bb_coord[layer_num].ymin) {
+            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin + 1;
+            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
+        }
+
+    } else if (y_new > y_old) {
+        if (y_old == curr_bb_coord[layer_num].ymin) {
+            update_bb_edge(net_id,
+                           bb_edge_new,
+                           bb_coord_new,
+                           bb_pin_sink_count_new,
+                           curr_bb_edge[layer_num].ymin,
+                           curr_bb_coord[layer_num].ymin,
+                           bb_edge_new[layer_num].ymin,
+                           bb_coord_new[layer_num].ymin);
+            if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+                return;
+            }
+        }
+
+        if (y_new > curr_bb_coord[layer_num].ymax) {
+            bb_edge_new[layer_num].ymax = 1;
+            bb_coord_new[layer_num].ymax = y_new;
+        } else if (y_new == curr_bb_coord[layer_num].ymax) {
+            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax + 1;
+            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
+        }
+    }
+}
+
+static inline void update_bb_layer_changed(ClusterNetId net_id,
+                                           const t_physical_tile_loc& pin_old_loc,
+                                           const t_physical_tile_loc& pin_new_loc,
+                                           const std::vector<t_2D_bb>& curr_bb_edge,
+                                           const std::vector<t_2D_bb>& curr_bb_coord,
+                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                           std::vector<t_2D_bb>& bb_edge_new,
+                                           std::vector<t_2D_bb>& bb_coord_new) {
+    int x_old = pin_old_loc.x;
+
+    int y_old = pin_old_loc.y;
+
+    int old_layer_num = pin_old_loc.layer_num;
+    int new_layer_num = pin_new_loc.layer_num;
+    VTR_ASSERT_SAFE(old_layer_num != new_layer_num);
+
+    /*
+    This funcitn is called when BB per layer is used and when the moving block is moving from one layer to another.
+    Thus, we need to update bounding box on both "from" and "to" layer. Here, we update the bounding box on "from" or
+    "old_layer". Then, "add_block_to_bb" is called to update the bounding box on the new layer.
+    */
+    if (x_old == curr_bb_coord[old_layer_num].xmax) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].xmax,
+                       curr_bb_coord[old_layer_num].xmax,
+                       bb_edge_new[old_layer_num].xmax,
+                       bb_coord_new[old_layer_num].xmax);
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+            return;
+        }
+    } else if (x_old == curr_bb_coord[old_layer_num].xmin) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].xmin,
+                       curr_bb_coord[old_layer_num].xmin,
+                       bb_edge_new[old_layer_num].xmin,
+                       bb_coord_new[old_layer_num].xmin);
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+            return;
+        }
+    }
+
+    if (y_old == curr_bb_coord[old_layer_num].ymax) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].ymax,
+                       curr_bb_coord[old_layer_num].ymax,
+                       bb_edge_new[old_layer_num].ymax,
+                       bb_coord_new[old_layer_num].ymax);
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+            return;
+        }
+    } else if (y_old == curr_bb_coord[old_layer_num].ymin) {
+        update_bb_edge(net_id,
+                       bb_edge_new,
+                       bb_coord_new,
+                       bb_pin_sink_count_new,
+                       curr_bb_edge[old_layer_num].ymin,
+                       curr_bb_coord[old_layer_num].ymin,
+                       bb_edge_new[old_layer_num].ymin,
+                       bb_coord_new[old_layer_num].ymin);
+        if (bb_updated_before[net_id] == NetUpdateState::GOT_FROM_SCRATCH) {
+            return;
+        }
+    }
+
+    add_block_to_bb(pin_new_loc,
+                    curr_bb_edge[new_layer_num],
+                    curr_bb_coord[new_layer_num],
+                    bb_edge_new[new_layer_num],
+                    bb_coord_new[new_layer_num]);
+}
+
+static void update_bb_pin_sink_count(const t_physical_tile_loc& pin_old_loc,
+                                     const t_physical_tile_loc& pin_new_loc,
+                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
+                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
+                                     bool is_output_pin) {
+    VTR_ASSERT_SAFE(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin);
+    for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
+        bb_pin_sink_count_new[layer_num] = curr_layer_pin_sink_count[layer_num];
+    }
+    if (!is_output_pin) {
+        bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
+        bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
+    }
+}
+
+static inline void update_bb_edge(ClusterNetId net_id,
+                                  std::vector<t_2D_bb>& bb_edge_new,
+                                  std::vector<t_2D_bb>& bb_coord_new,
+                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
+                                  const int& old_num_block_on_edge,
+                                  const int& old_edge_coord,
+                                  int& new_num_block_on_edge,
+                                  int& new_edge_coord) {
+    if (old_num_block_on_edge == 1) {
+        get_layer_bb_from_scratch(net_id,
+                                  bb_edge_new,
+                                  bb_coord_new,
+                                  bb_layer_pin_sink_count);
+        bb_updated_before[net_id] = NetUpdateState::GOT_FROM_SCRATCH;
+        return;
+    } else {
+        new_num_block_on_edge = old_num_block_on_edge - 1;
+        new_edge_coord = old_edge_coord;
+    }
+}
+
+static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
+                            const t_2D_bb& bb_edge_old,
+                            const t_2D_bb& bb_coord_old,
+                            t_2D_bb& bb_edge_new,
+                            t_2D_bb& bb_coord_new) {
+    int x_new = new_pin_loc.x;
+    int y_new = new_pin_loc.y;
+
+    /* 
+    This function is called to only update the bounding box on the new layer from a block
+    moving to this layer from another layer. Thus, we only need to assess the effect of this
+    new block on the edges.
+    */
+
+    if (x_new > bb_coord_old.xmax) {
+        bb_edge_new.xmax = 1;
+        bb_coord_new.xmax = x_new;
+    } else if (x_new == bb_coord_old.xmax) {
+        bb_edge_new.xmax = bb_edge_old.xmax + 1;
+    }
+
+    if (x_new < bb_coord_old.xmin) {
+        bb_edge_new.xmin = 1;
+        bb_coord_new.xmin = x_new;
+    } else if (x_new == bb_coord_old.xmin) {
+        bb_edge_new.xmin = bb_edge_old.xmin + 1;
+    }
+
+    if (y_new > bb_coord_old.ymax) {
+        bb_edge_new.ymax = 1;
+        bb_coord_new.ymax = y_new;
+    } else if (y_new == bb_coord_old.ymax) {
+        bb_edge_new.ymax = bb_edge_old.ymax + 1;
+    }
+
+    if (y_new < bb_coord_old.ymin) {
+        bb_edge_new.ymin = 1;
+        bb_coord_new.ymin = y_new;
+    } else if (y_new == bb_coord_old.ymin) {
+        bb_edge_new.ymin = bb_edge_old.ymin + 1;
+    }
+}
+
+static void get_bb_from_scratch(ClusterNetId net_id,
+                                t_bb& coords,
+                                t_bb& num_on_edges,
+                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
+    int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax, layer_min, layer_max;
+    int xmin_edge, xmax_edge, ymin_edge, ymax_edge, layer_min_edge, layer_max_edge;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& grid = device_ctx.grid;
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    pnum = net_pin_to_tile_pin_index(net_id, 0);
+    VTR_ASSERT_SAFE(pnum >= 0);
+    x = place_ctx.block_locs[bnum].loc.x
+        + physical_tile_type(bnum)->pin_width_offset[pnum];
+    y = place_ctx.block_locs[bnum].loc.y
+        + physical_tile_type(bnum)->pin_height_offset[pnum];
+    pin_layer = place_ctx.block_locs[bnum].loc.layer;
+
+    x = max(min<int>(x, grid.width() - 2), 1);
+    y = max(min<int>(y, grid.height() - 2), 1);
+    pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
+
+    xmin = x;
+    ymin = y;
+    layer_min = pin_layer;
+    xmax = x;
+    ymax = y;
+    layer_max = pin_layer;
+
+    xmin_edge = 1;
+    ymin_edge = 1;
+    layer_min_edge = 1;
+    xmax_edge = 1;
+    ymax_edge = 1;
+    layer_max_edge = 1;
+
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
+        num_sink_pin_layer[layer_num] = 0;
+    }
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        pnum = tile_pin_index(pin_id);
+        x = place_ctx.block_locs[bnum].loc.x
+            + physical_tile_type(bnum)->pin_width_offset[pnum];
+        y = place_ctx.block_locs[bnum].loc.y
+            + physical_tile_type(bnum)->pin_height_offset[pnum];
+        pin_layer = place_ctx.block_locs[bnum].loc.layer;
+
+        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
+         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
+         * I always take all channels impinging on the bounding box to be within   *
+         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
+         * the which channels are included within the bounding box, and it         *
+         * simplifies the code a lot.                                              */
+
+        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
+        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
+        pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
+
+        if (x == xmin) {
+            xmin_edge++;
+        }
+        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
+            xmax_edge++;
+        } else if (x < xmin) {
+            xmin = x;
+            xmin_edge = 1;
+        } else if (x > xmax) {
+            xmax = x;
+            xmax_edge = 1;
+        }
+
+        if (y == ymin) {
+            ymin_edge++;
+        }
+        if (y == ymax) {
+            ymax_edge++;
+        } else if (y < ymin) {
+            ymin = y;
+            ymin_edge = 1;
+        } else if (y > ymax) {
+            ymax = y;
+            ymax_edge = 1;
+        }
+
+        if (pin_layer == layer_min) {
+            layer_min_edge++;
+        }
+        if (pin_layer == layer_max) {
+            layer_max_edge++;
+        } else if (pin_layer < layer_min) {
+            layer_min = pin_layer;
+            layer_min_edge = 1;
+        } else if (pin_layer > layer_max) {
+            layer_max = pin_layer;
+            layer_max_edge = 1;
+        }
+
+        num_sink_pin_layer[pin_layer]++;
+    }
+
+    /* Copy the coordinates and number on edges information into the proper   *
+     * structures.                                                            */
+    coords.xmin = xmin;
+    coords.xmax = xmax;
+    coords.ymin = ymin;
+    coords.ymax = ymax;
+    coords.layer_min = layer_min;
+    coords.layer_max = layer_max;
+    VTR_ASSERT_DEBUG(layer_min >= 0 && layer_min < device_ctx.grid.get_num_layers());
+    VTR_ASSERT_DEBUG(layer_max >= 0 && layer_max < device_ctx.grid.get_num_layers());
+
+
+    num_on_edges.xmin = xmin_edge;
+    num_on_edges.xmax = xmax_edge;
+    num_on_edges.ymin = ymin_edge;
+    num_on_edges.ymax = ymax_edge;
+    num_on_edges.layer_min = layer_min_edge;
+    num_on_edges.layer_max = layer_max_edge;
+}
+
+static void get_layer_bb_from_scratch(ClusterNetId net_id,
+                                      std::vector<t_2D_bb>& num_on_edges,
+                                      std::vector<t_2D_bb>& coords,
+                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
+    auto& device_ctx = g_vpr_ctx.device();
+    const int num_layers = device_ctx.grid.get_num_layers();
+    std::vector<int> xmin(num_layers, OPEN);
+    std::vector<int> xmax(num_layers, OPEN);
+    std::vector<int> ymin(num_layers, OPEN);
+    std::vector<int> ymax(num_layers, OPEN);
+    std::vector<int> xmin_edge(num_layers, OPEN);
+    std::vector<int> xmax_edge(num_layers, OPEN);
+    std::vector<int> ymin_edge(num_layers, OPEN);
+    std::vector<int> ymax_edge(num_layers, OPEN);
+
+    std::vector<int> num_sink_pin_layer(num_layers, 0);
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+    auto& grid = device_ctx.grid;
+
+    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
+    int pnum_src = net_pin_to_tile_pin_index(net_id, 0);
+    VTR_ASSERT_SAFE(pnum_src >= 0);
+    int x_src = place_ctx.block_locs[bnum].loc.x
+                + physical_tile_type(bnum)->pin_width_offset[pnum_src];
+    int y_src = place_ctx.block_locs[bnum].loc.y
+                + physical_tile_type(bnum)->pin_height_offset[pnum_src];
+
+    x_src = max(min<int>(x_src, grid.width() - 2), 1);
+    y_src = max(min<int>(y_src, grid.height() - 2), 1);
+
+    // TODO: Currently we are assuming that crossing can only happen from OPIN. Because of that,
+    // when per-layer bounding box is used, we want the bounding box on each layer to also include
+    // the location of source since the connection on each layer starts from that location.
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        xmin[layer_num] = x_src;
+        ymin[layer_num] = y_src;
+        xmax[layer_num] = x_src;
+        ymax[layer_num] = y_src;
+        xmin_edge[layer_num] = 1;
+        ymin_edge[layer_num] = 1;
+        xmax_edge[layer_num] = 1;
+        ymax_edge[layer_num] = 1;
+    }
+
+    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
+        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
+        int pnum = tile_pin_index(pin_id);
+        int layer = place_ctx.block_locs[bnum].loc.layer;
+        VTR_ASSERT_SAFE(layer >= 0 && layer < num_layers);
+        num_sink_pin_layer[layer]++;
+        int x = place_ctx.block_locs[bnum].loc.x
+                + physical_tile_type(bnum)->pin_width_offset[pnum];
+        int y = place_ctx.block_locs[bnum].loc.y
+                + physical_tile_type(bnum)->pin_height_offset[pnum];
+
+        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
+         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
+         * I always take all channels impinging on the bounding box to be within   *
+         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
+         * the which channels are included within the bounding box, and it         *
+         * simplifies the code a lot.                                              */
+
+        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
+        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
+
+        if (x == xmin[layer]) {
+            xmin_edge[layer]++;
+        }
+        if (x == xmax[layer]) { /* Recall that xmin could equal xmax -- don't use else */
+            xmax_edge[layer]++;
+        } else if (x < xmin[layer]) {
+            xmin[layer] = x;
+            xmin_edge[layer] = 1;
+        } else if (x > xmax[layer]) {
+            xmax[layer] = x;
+            xmax_edge[layer] = 1;
+        }
+
+        if (y == ymin[layer]) {
+            ymin_edge[layer]++;
+        }
+        if (y == ymax[layer]) {
+            ymax_edge[layer]++;
+        } else if (y < ymin[layer]) {
+            ymin[layer] = y;
+            ymin_edge[layer] = 1;
+        } else if (y > ymax[layer]) {
+            ymax[layer] = y;
+            ymax_edge[layer] = 1;
+        }
+    }
+
+    /* Copy the coordinates and number on edges information into the proper   *
+     * structures.                                                            */
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num];
+        coords[layer_num].xmin = xmin[layer_num];
+        coords[layer_num].xmax = xmax[layer_num];
+        coords[layer_num].ymin = ymin[layer_num];
+        coords[layer_num].ymax = ymax[layer_num];
+        coords[layer_num].layer_num = layer_num;
+
+        num_on_edges[layer_num].xmin = xmin_edge[layer_num];
+        num_on_edges[layer_num].xmax = xmax_edge[layer_num];
+        num_on_edges[layer_num].ymin = ymin_edge[layer_num];
+        num_on_edges[layer_num].ymax = ymax_edge[layer_num];
+        num_on_edges[layer_num].layer_num = layer_num;
+    }
+}
+
+static double get_net_cost(ClusterNetId net_id, const t_bb& bb) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bb.xmax - bb.xmin + 1) * crossing
+            * chanx_place_cost_fac[bb.ymax][bb.ymin - 1];
+
+    ncost += (bb.ymax - bb.ymin + 1) * crossing
+             * chany_place_cost_fac[bb.xmax][bb.xmin - 1];
+
+    return (ncost);
+}
+
+static double get_net_layer_bb_wire_cost(ClusterNetId /* net_id */,
+                                 const std::vector<t_2D_bb>& bb,
+                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
+    /* Finds the cost due to one net by looking at its coordinate bounding  *
+     * box.                                                                 */
+
+    double ncost = 0.;
+    double crossing = 0.;
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
+        if (layer_pin_sink_count[layer_num] == 0) {
+            continue;
+        }
+        /* 
+        adjust the bounding box half perimeter by the wirelength correction 
+        factor based on terminal count, which is 1 for the source + the number 
+        of sinks on this layer. 
+        */
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
+
+        /* Could insert a check for xmin == xmax.  In that case, assume  *
+         * connection will be made with no bends and hence no x-cost.    *
+         * Same thing for y-cost.                                        */
+
+        /* Cost = wire length along channel * cross_count / average      *
+         * channel capacity.   Do this for x, then y direction and add.  */
+
+        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing
+                 * chanx_place_cost_fac[bb[layer_num].ymax][bb[layer_num].ymin - 1];
+
+        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing
+                 * chany_place_cost_fac[bb[layer_num].xmax][bb[layer_num].xmin - 1];
+    }
+
+    return (ncost);
+}
+
+static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bb) {
+    double ncost, crossing;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    crossing = wirelength_crossing_count(
+        cluster_ctx.clb_nlist.net_pins(net_id).size());
+
+    /* Could insert a check for xmin == xmax.  In that case, assume  *
+     * connection will be made with no bends and hence no x-cost.    *
+     * Same thing for y-cost.                                        */
+
+    /* Cost = wire length along channel * cross_count / average      *
+     * channel capacity.   Do this for x, then y direction and add.  */
+
+    ncost = (bb.xmax - bb.xmin + 1) * crossing;
+
+    ncost += (bb.ymax - bb.ymin + 1) * crossing;
+
+    return (ncost);
+}
+
+static double get_net_wirelength_from_layer_bb(ClusterNetId /* net_id */,
+                                                const std::vector<t_2D_bb>& bb,
+                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
+    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
+     * its coordinate bounding box.                                         */
+
+    double ncost = 0.;
+    double crossing = 0.;
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        VTR_ASSERT_SAFE (layer_pin_sink_count[layer_num] != OPEN);
+        if (layer_pin_sink_count[layer_num] == 0) {
+            continue;
+        }
+        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
+
+        /* Could insert a check for xmin == xmax.  In that case, assume  *
+         * connection will be made with no bends and hence no x-cost.    *
+         * Same thing for y-cost.                                        */
+
+        /* Cost = wire length along channel * cross_count / average      *
+         * channel capacity.   Do this for x, then y direction and add.  */
+
+        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing;
+
+        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing;
+    }
+
+    return (ncost);
+}
+
+static double recompute_bb_cost() {
+    double cost = 0;
+
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Bounding boxes don't have to be recomputed; they're correct. */
+            cost += net_cost[net_id];
+        }
+    }
+
+    return (cost);
+}
+
+static double wirelength_crossing_count(size_t fanout) {
+    /* Get the expected "crossing count" of a net, based on its number *
+     * of pins.  Extrapolate for very large nets.                      */
+
+    if (fanout > MAX_FANOUT_CROSSING_COUNT) {
+        return 2.7933 + 0.02616 * (fanout - MAX_FANOUT_CROSSING_COUNT);
+    } else {
+        return cross_count[fanout - 1];
+    }
+}
+
+static void set_bb_delta_cost(const int num_affected_nets, double& bb_delta_c) {
+    for (int inet_affected = 0; inet_affected < num_affected_nets;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+        const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
+
+        if (cube_bb) {
+            proposed_net_cost[net_id] = get_net_cost(net_id,
+                                                     ts_bb_coord_new[net_id]);
+        } else {
+            proposed_net_cost[net_id] = get_net_layer_bb_wire_cost(net_id,
+                                                           layer_ts_bb_coord_new[net_id],
+                                                           ts_layer_sink_pin_count[size_t(net_id)]);
+        }
+
+        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
+    }
+}
+
+int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c) {
+    VTR_ASSERT_SAFE(bb_delta_c == 0.);
+    VTR_ASSERT_SAFE(timing_delta_c == 0.);
+    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+
+    int num_affected_nets = 0;
+
+    /* Go through all the blocks moved. */
+    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
+        const auto& moving_block_inf = blocks_affected.moved_blocks[iblk];
+        auto& affected_pins = blocks_affected.affected_pins;
+        ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
+
+        /* Go through all the pins in the moved block. */
+        for (ClusterPinId blk_pin : clb_nlist.block_pins(blk)) {
+            bool is_src_moving = false;
+            if (clb_nlist.pin_type(blk_pin) == PinType::SINK) {
+                ClusterNetId net_id = clb_nlist.pin_net(blk_pin);
+                is_src_moving = driven_by_moved_block(net_id,
+                                                      blocks_affected.num_moved_blocks,
+                                                      blocks_affected.moved_blocks);
+            }
+            update_net_info_on_pin_move(place_algorithm,
+                                        delay_model,
+                                        criticalities,
+                                        blk,
+                                        blk_pin,
+                                        moving_block_inf,
+                                        affected_pins,
+                                        timing_delta_c,
+                                        num_affected_nets,
+                                        is_src_moving);
+        }
+    }
+
+    /* Now update the bounding box costs (since the net bounding     *
+     * boxes are up-to-date). The cost is only updated once per net. */
+    set_bb_delta_cost(num_affected_nets, bb_delta_c);
+
+    return num_affected_nets;
+}
+
+double comp_bb_cost(e_cost_methods method) {
+    double cost = 0;
+    double expected_wirelength = 0.0;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Small nets don't use incremental updating on their bounding boxes, *
+             * so they can use a fast bounding box calculator.                    */
+            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
+                && method == NORMAL) {
+                get_bb_from_scratch(net_id,
+                                    place_move_ctx.bb_coords[net_id],
+                                    place_move_ctx.bb_num_on_edges[net_id],
+                                    place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+            } else {
+                get_non_updatable_bb(net_id,
+                                     place_move_ctx.bb_coords[net_id],
+                                     place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+            }
+
+            net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
+            cost += net_cost[net_id];
+            if (method == CHECK)
+                expected_wirelength += get_net_wirelength_estimate(net_id, place_move_ctx.bb_coords[net_id]);
+        }
+    }
+
+    if (method == CHECK) {
+        VTR_LOG("\n");
+        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
+                expected_wirelength);
+    }
+    return cost;
+}
+
+double comp_layer_bb_cost(e_cost_methods method) {
+    double cost = 0;
+    double expected_wirelength = 0.0;
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
+            /* Small nets don't use incremental updating on their bounding boxes, *
+             * so they can use a fast bounding box calculator.                    */
+            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
+                && method == NORMAL) {
+                get_layer_bb_from_scratch(net_id,
+                                          place_move_ctx.layer_bb_num_on_edges[net_id],
+                                          place_move_ctx.layer_bb_coords[net_id],
+                                          place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+            } else {
+                get_non_updatable_layer_bb(net_id,
+                                           place_move_ctx.layer_bb_coords[net_id],
+                                           place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+            }
+
+            net_cost[net_id] = get_net_layer_bb_wire_cost(net_id,
+                                                  place_move_ctx.layer_bb_coords[net_id],
+                                                  place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+            cost += net_cost[net_id];
+            if (method == CHECK)
+                expected_wirelength += get_net_wirelength_from_layer_bb(net_id,
+                                                                         place_move_ctx.layer_bb_coords[net_id],
+                                                                         place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
+        }
+    }
+
+    if (method == CHECK) {
+        VTR_LOG("\n");
+        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
+                expected_wirelength);
+    }
+    return cost;
+}
+
+void update_move_nets(int num_nets_affected,
+                      const bool cube_bb) {
+    /* update net cost functions and reset flags. */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_move_ctx = g_placer_ctx.mutable_move();
+
+    for (int inet_affected = 0; inet_affected < num_nets_affected;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+
+        if (cube_bb) {
+            place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
+        } else {
+            place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
+        }
+
+        for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
+            place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] = ts_layer_sink_pin_count[size_t(net_id)][layer_num];
+        }
+
+        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
+            if (cube_bb) {
+                place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
+            } else {
+                place_move_ctx.layer_bb_num_on_edges[net_id] = layer_ts_bb_edge_new[net_id];
+            }
+        }
+
+        net_cost[net_id] = proposed_net_cost[net_id];
+
+        /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */
+        proposed_net_cost[net_id] = -1;
+        bb_updated_before[net_id] = NetUpdateState::NOT_UPDATED_YET;
+    }
+}
+
+void reset_move_nets(int num_nets_affected) {
+    /* Reset the net cost function flags first. */
+    for (int inet_affected = 0; inet_affected < num_nets_affected;
+         inet_affected++) {
+        ClusterNetId net_id = ts_nets_to_update[inet_affected];
+        proposed_net_cost[net_id] = -1;
+        bb_updated_before[net_id] = NetUpdateState::NOT_UPDATED_YET;
+    }
+}
+
+void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
+                                const t_noc_opts& noc_opts,
+                                const PlaceDelayModel* delay_model,
+                                const PlacerCriticalities* criticalities,
+                                t_placer_costs* costs) {
+    auto check_and_print_cost = [](double new_cost,
+                                   double old_cost,
+                                   const std::string& cost_name) {
+        if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n",
+                cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+    };
+
+    double new_bb_cost = recompute_bb_cost();
+    check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
+    costs->bb_cost = new_bb_cost;
+
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        double new_timing_cost = 0.;
+        comp_td_costs(delay_model, *criticalities, &new_timing_cost);
+        check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost");
+        costs->timing_cost = new_timing_cost;
+    } else {
+        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
+        costs->cost = new_bb_cost * costs->bb_cost_norm;
+    }
+
+    if (noc_opts.noc) {
+        NocCostTerms new_noc_cost;
+        recompute_noc_costs(new_noc_cost);
+
+        check_and_print_cost(new_noc_cost.aggregate_bandwidth,
+                             costs->noc_cost_terms.aggregate_bandwidth,
+                             "noc_aggregate_bandwidth");
+        costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth;
+
+        // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
+        // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
+        if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency,
+                                 costs->noc_cost_terms.latency,
+                                 "noc_latency_cost");
+        }
+        costs->noc_cost_terms.latency = new_noc_cost.latency;
+
+        if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency_overrun,
+                                 costs->noc_cost_terms.latency_overrun,
+                                 "noc_latency_overrun_cost");
+        }
+        costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun;
+
+        if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            check_and_print_cost(new_noc_cost.congestion,
+                                 costs->noc_cost_terms.congestion,
+                                 "noc_congestion_cost");
+        }
+        costs->noc_cost_terms.congestion = new_noc_cost.congestion;
+    }
+}
+
+void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp) {
+    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
+     * arrays with the inverse of the average number of tracks per channel   *
+     * between [subhigh] and [sublow].  This is only useful for the cost     *
+     * function that takes the length of the net bounding box in each        *
+     * dimension divided by the average number of tracks in that direction.  *
+     * For other cost functions, you don't have to bother calling this       *
+     * routine; when using the cost function described above, however, you   *
+     * must always call this routine after you call init_chan and before     *
+     * you do any placement cost determination.  The place_cost_exp factor   *
+     * specifies to what power the width of the channel should be taken --   *
+     * larger numbers make narrower channels more expensive.                 */
+
+    auto& device_ctx = g_vpr_ctx.device();
+
+    /* 
+    Access arrays below as chan?_place_cost_fac[subhigh][sublow]. Since subhigh must be greater than or 
+    equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway 
+    for simplicity so we can use the vtr utility matrix functions.
+    */
+
+    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
+    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
+
+    /* First compute the number of tracks between channel high and channel *
+     * low, inclusive, in an efficient manner.                             */
+
+    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
+        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.x_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  The cost function divides by the average     *
+     * number of tracks per channel, so by storing the inverse I convert   *
+     * this to a faster multiplication.  Take this final number to the     *
+     * place_cost_exp power -- numbers other than one mean this is no      *
+     * longer a simple "average number of tracks"; it is some power of     *
+     * that, allowing greater penalization of narrow channels.             */
+
+    for (size_t high = 0; high < device_ctx.grid.height(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chanx_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
+                chanx_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chanx_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chanx_place_cost_fac[high][low];
+            chanx_place_cost_fac[high][low] = pow(
+                (double)chanx_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+
+    /* Now do the same thing for the y-directed channels.  First get the  *
+     * number of tracks between channel high and channel low, inclusive.  */
+
+    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
+
+    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
+        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
+        for (size_t low = 0; low < high; low++) {
+            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
+                                              + device_ctx.chan_width.y_list[high];
+        }
+    }
+
+    /* Now compute the inverse of the average number of tracks per channel *
+     * between high and low.  Take to specified power.                     */
+
+    for (size_t high = 0; high < device_ctx.grid.width(); high++)
+        for (size_t low = 0; low <= high; low++) {
+            /* Since we will divide the wiring cost by the average channel *
+             * capacity between high and low, having only 0 width channels *
+             * will result in infinite wiring capacity normalization       *
+             * factor, and extremely bad placer behaviour. Hence we change *
+             * this to a small (1 track) channel capacity instead.         */
+            if (chany_place_cost_fac[high][low] == 0.0f) {
+                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
+                chany_place_cost_fac[high][low] = 1.0f;
+            }
+
+            chany_place_cost_fac[high][low] = (high - low + 1.)
+                                              / chany_place_cost_fac[high][low];
+            chany_place_cost_fac[high][low] = pow(
+                (double)chany_place_cost_fac[high][low],
+                (double)place_cost_exp);
+        }
+}
+
+void free_chan_w_factors_for_place_cost () {
+    chanx_place_cost_fac.clear();
+    chany_place_cost_fac.clear();
+}
+
+void init_place_move_structs(size_t num_nets) {
+    net_cost.resize(num_nets, -1.);
+    proposed_net_cost.resize(num_nets, -1.);
+    /* Used to store costs for moves not yet made and to indicate when a net's   *
+     * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
+     * been recomputed.                                                          */
+    bb_updated_before.resize(num_nets, NetUpdateState::NOT_UPDATED_YET);
+}
+
+void free_place_move_structs() {
+    vtr::release_memory(net_cost);
+    vtr::release_memory(proposed_net_cost);
+    vtr::release_memory(bb_updated_before);
+}
+
+void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb) {
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+
+    if (cube_bb) {
+        ts_bb_edge_new.resize(num_nets, t_bb());
+        ts_bb_coord_new.resize(num_nets, t_bb());
+    } else {
+        VTR_ASSERT_SAFE(!cube_bb);
+        layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+        layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+    }
+
+    /*This initialize the whole matrix to OPEN which is an invalid value*/
+    ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)}, OPEN);
+
+    ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
+}
+
+void free_try_swap_net_cost_structs() {
+    vtr::release_memory(ts_bb_edge_new);
+    vtr::release_memory(ts_bb_coord_new);
+    vtr::release_memory(layer_ts_bb_edge_new);
+    vtr::release_memory(layer_ts_bb_coord_new);
+    ts_layer_sink_pin_count.clear();
+    vtr::release_memory(ts_nets_to_update);
+}
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
new file mode 100644
index 00000000000..57c64cadca5
--- /dev/null
+++ b/vpr/src/place/net_cost_handler.h
@@ -0,0 +1,150 @@
+#pragma once
+#include "place_delay_model.h"
+#include "timing_place.h"
+#include "move_transactions.h"
+#include "place_util.h"
+
+/**
+ * @brief The method used to calculate palcement cost
+ * @details For comp_cost.  NORMAL means use the method that generates updateable bounding boxes for speed.  
+ * CHECK means compute all bounding boxes from scratch using a very simple routine to allow checks 
+ * of the other costs.
+ * NORMAL: Compute cost efficiently using incremental techniques.
+ * CHECK: Brute-force cost computation; useful to validate the more complex incremental cost update code.
+ */
+enum e_cost_methods {
+    NORMAL,
+    CHECK
+};
+
+/**
+ * @brief Find all the nets and pins affected by this swap and update costs.
+ *
+ * Find all the nets affected by this swap and update the bounding box (wiring)
+ * costs. This cost function doesn't depend on the timing info.
+ *
+ * Find all the connections affected by this swap and update the timing cost.
+ * For a connection to be affected, it not only needs to be on or driven by
+ * a block, but it also needs to have its delay changed. Otherwise, it will
+ * not be added to the affected_pins structure.
+ *
+ * For more, see update_td_delta_costs().
+ *
+ * The timing costs are calculated by getting the new connection delays,
+ * multiplied by the connection criticalities returned by the timing
+ * analyzer. These timing costs are stored in the proposed_* data structures.
+ *
+ * The change in the bounding box cost is stored in `bb_delta_c`.
+ * The change in the timing cost is stored in `timing_delta_c`.
+ * 
+ * @param place_algorithm
+ * @param delay_model
+ * @param criticalities
+ * @param blocks_affected
+ * @param bb_delta_c
+ * @param timing_delta_c
+ * @return The number of affected nets.
+ */
+int find_affected_nets_and_update_costs(
+    const t_place_algorithm& place_algorithm,
+    const PlaceDelayModel* delay_model,
+    const PlacerCriticalities* criticalities,
+    t_pl_blocks_to_be_moved& blocks_affected,
+    double& bb_delta_c,
+    double& timing_delta_c);
+
+/**
+ * @brief Finds the bb cost from scratch (based on 3D BB).  
+ * Done only when the placement has been radically changed 
+ * (i.e. after initial placement). Otherwise find the cost 
+ * change incrementally. If method check is NORMAL, we find 
+ * bounding boxes that are updatable for the larger nets.  
+ * If method is CHECK, all bounding boxes are found via the 
+ * non_updateable_bb routine, to provide a cost which can be 
+ * used to check the correctness of the other routine.                                               
+ * @param method
+ * @return The bounding box cost of the placement, computed by the 3D method.
+ */
+double comp_bb_cost(e_cost_methods method);
+
+/**
+ * @brief Finds the bb cost from scratch (based on per-layer BB).  
+ * Done only when the placement has been radically changed 
+ * (i.e. after initial placement). Otherwise find the cost change 
+ * incrementally.  If method check is NORMAL, we find bounding boxes 
+ * that are updateable for the larger nets.  If method is CHECK, all 
+ * bounding boxes are found via the non_updateable_bb routine, to provide 
+ * a cost which can be used to check the correctness of the other routine.                                              
+ * @param method
+ * @return The placement bounding box cost, computed by the per layer method.
+ */
+double comp_layer_bb_cost(e_cost_methods method);
+
+/**
+ * @brief update net cost data structures (in placer context and net_cost in .cpp file) and reset flags (proposed_net_cost and bb_updated_before).
+ * @param num_nets_affected The number of nets affected by the move. It is used to determine the index up to which elements in ts_nets_to_update are valid.
+ * @param cube_bb True if we should use the 3D bounding box (cube_bb), false otherwise.
+ */
+void update_move_nets(int num_nets_affected,
+                      const bool cube_bb);
+
+/**
+ * @brief Reset the net cost function flags (proposed_net_cost and bb_updated_before)
+ * @param num_nets_affected
+ */
+void reset_move_nets(int num_nets_affected);
+
+/**
+ * @brief re-calculates different terms of the cost function (wire-length, timing, NoC) and update "costs" accordingly. It is important to note that
+ * in this function bounding box and connection delays are not calculated from scratch. However, it iterates over all nets and connections and updates 
+ * their costs by a complete summation, rather than incrementally.
+ * @param placer_opts
+ * @param noc_opts
+ * @param delay_model
+ * @param criticalities
+ * @param costs passed by reference and computed by this routine (i.e. returned by reference)
+ */
+void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
+                                const t_noc_opts& noc_opts,
+                                const PlaceDelayModel* delay_model,
+                                const PlacerCriticalities* criticalities,
+                                t_placer_costs* costs);
+
+/**
+ * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac
+ * arrays with the inverse of the average number of tracks per channel
+ * between [subhigh] and [sublow].
+ * @param place_cost_exp It is an exponent to which you take the average inverse channel 
+ * capacity; a higher value would favour wider channels more over narrower channels during placement (usually we use 1).
+ */
+void alloc_and_load_chan_w_factors_for_place_cost(float place_cost_exp);
+
+/**
+ * @brief Frees the chanx_place_cost_fac and chany_place_cost_fac arrays.
+ */
+void free_chan_w_factors_for_place_cost ();
+
+/**
+ * @brief Resize net_cost, proposed_net_cost, and  bb_updated_before data structures to accommodate all nets.
+ * @param num_nets Number of nets in the netlist (clustered currently) that the placement engine uses.
+ */
+void init_place_move_structs(size_t num_nets);
+
+/**
+ * @brief Free net_cost, proposed_net_cost, and  bb_updated_before data structures.
+ */
+void free_place_move_structs();
+
+/**
+ * @brief Resize temporary storage data structures needed to determine which nets are affected by a move and data needed per net 
+ * about where their terminals are in order to quickly (incrementally) update their wirelength costs. These data structures are  
+ * (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update.
+ * @param num_nets Number of nets in the netlist used by the placement engine (currently clustered netlist)
+ * @param cube_bb True if the 3D bounding box should be used, false otherwise.
+ */
+void init_try_swap_net_cost_structs(size_t num_nets, bool cube_bb);
+
+/**
+ * @brief Free (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, and ts_nets_to_update data structures.
+ */
+void free_try_swap_net_cost_structs();
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index a38d5f442d9..4e7f448c34b 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -55,6 +55,7 @@
 #include "VprTimingGraphResolver.h"
 #include "timing_util.h"
 #include "timing_info.h"
+#include "concrete_timing_info.h"
 #include "tatum/echo_writer.hpp"
 #include "tatum/TimingReporter.hpp"
 
@@ -64,12 +65,12 @@
 
 #include "clustered_netlist_utils.h"
 
-#include "re_cluster.h"
-#include "re_cluster_util.h"
 #include "cluster_placement.h"
 
 #include "noc_place_utils.h"
 
+#include "net_cost_handler.h"
+
 /*  define the RL agent's reward function factor constant. This factor controls the weight of bb cost *
  *  compared to the timing cost in the agent's reward function. The reward is calculated as           *
  * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
@@ -97,63 +98,11 @@ static constexpr double ERROR_TOL = .01;
  * variables round-offs check.                                            */
 static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 
-/* Flags for the states of the bounding box.                              *
- * Stored as char for memory efficiency.                                  */
-#define NOT_UPDATED_YET 'N'
-#define UPDATED_ONCE 'U'
-#define GOT_FROM_SCRATCH 'S'
-
-/* For comp_cost.  NORMAL means use the method that generates updatable   *
- * bounding boxes for speed.  CHECK means compute all bounding boxes from *
- * scratch using a very simple routine to allow checks of the other       *
- * costs.                                   
- */
-
-enum e_cost_methods {
-    NORMAL,
-    CHECK
-};
-
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
 /********************** Variables local to place.c ***************************/
 
-/* Cost of a net, and a temporary cost of a net used during move assessment. */
-static vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
-
-/* [0...cluster_ctx.clb_nlist.nets().size()-1]                                               *
- * A flag array to indicate whether the specific bounding box has been updated   *
- * in this particular swap or not. If it has been updated before, the code       *
- * must use the updated data, instead of the out-of-date data passed into the    *
- * subroutine, particularly used in try_swap(). The value NOT_UPDATED_YET        *
- * indicates that the net has not been updated before, UPDATED_ONCE indicated    *
- * that the net has been updated once, if it is going to be updated again, the   *
- * values from the previous update must be used. GOT_FROM_SCRATCH is only        *
- * applicable for nets larger than SMALL_NETS and it indicates that the          *
- * particular bounding box cannot be updated incrementally before, hence the     *
- * bounding box is got from scratch, so the bounding box would definitely be     *
- * right, DO NOT update again.                                                   */
-static vtr::vector<ClusterNetId, char> bb_updated_before;
-
-/* The arrays below are used to precompute the inverse of the average   *
- * number of tracks per channel between [subhigh] and [sublow].  Access *
- * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
- * speed up the computation of the cost function that takes the length  *
- * of the net bounding box in each dimension, divided by the average    *
- * number of tracks in that direction; for other cost functions they    *
- * will never be used.                                                  *
- */
-static vtr::NdMatrix<float, 2> chanx_place_cost_fac({0, 0}); //[0...device_ctx.grid.width()-2]
-static vtr::NdMatrix<float, 2> chany_place_cost_fac({0, 0}); //[0...device_ctx.grid.height()-2]
-
-/* The following arrays are used by the try_swap function for speed.   */
-/* [0...cluster_ctx.clb_nlist.nets().size()-1] */
-static vtr::vector<ClusterNetId, t_bb> ts_bb_edge_new, ts_bb_coord_new;
-static vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_ts_bb_edge_new, layer_ts_bb_coord_new;
-static vtr::Matrix<int> ts_layer_sink_pin_count;
-static std::vector<ClusterNetId> ts_nets_to_update;
-
 /* These file-scoped variables keep track of the number of swaps       *
  * rejected, accepted or aborted. The total number of swap attempts    *
  * is the sum of the three number.                                     */
@@ -162,19 +111,6 @@ static int num_swap_accepted = 0;
 static int num_swap_aborted = 0;
 static int num_ts_called = 0;
 
-/* Expected crossing counts for nets with different #'s of pins.  From *
- * ICCAD 94 pp. 690 - 695 (with linear interpolation applied by me).   *
- * Multiplied to bounding box of a net to better estimate wire length  *
- * for higher fanout nets. Each entry is the correction factor for the *
- * fanout index-1                                                      */
-static const float cross_count[50] = {/* [0..49] */ 1.0, 1.0, 1.0, 1.0828,
-                                      1.1536, 1.2206, 1.2823, 1.3385, 1.3991, 1.4493, 1.4974, 1.5455, 1.5937,
-                                      1.6418, 1.6899, 1.7304, 1.7709, 1.8114, 1.8519, 1.8924, 1.9288, 1.9652,
-                                      2.0015, 2.0379, 2.0743, 2.1061, 2.1379, 2.1698, 2.2016, 2.2334, 2.2646,
-                                      2.2958, 2.3271, 2.3583, 2.3895, 2.4187, 2.4479, 2.4772, 2.5064, 2.5356,
-                                      2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410, 2.7671,
-                                      2.7933};
-
 std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
                                                                 vtr::fclose);
 
@@ -275,19 +211,6 @@ static void free_try_swap_structs();
 
 static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static void alloc_and_load_for_fast_cost_update(float place_cost_exp);
-
-static void free_fast_cost_update();
-
-static double comp_bb_cost(e_cost_methods method);
-
-static double comp_layer_bb_cost(e_cost_methods method);
-
-static void update_move_nets(int num_nets_affected,
-                             const bool cube_bb);
-
-static void reset_move_nets(int num_nets_affected);
-
 static e_move_result try_swap(const t_annealing_state* state,
                               t_placer_costs* costs,
                               MoveGenerator& move_generator,
@@ -337,8 +260,6 @@ static float starting_t(const t_annealing_state* state,
 
 static int count_connections();
 
-static double recompute_bb_cost();
-
 static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
 
 static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
@@ -348,132 +269,14 @@ static void invalidate_affected_connections(
     NetPinTimingInvalidator* pin_tedges_invalidator,
     TimingInfo* timing_info);
 
-static bool driven_by_moved_block(const ClusterNetId net,
-                                  const t_pl_blocks_to_be_moved& blocks_affected);
-
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
 
 static e_move_result assess_swap(double delta_c, double t);
 
-static void get_non_updateable_bb(ClusterNetId net_id,
-                                  t_bb& bb_coord_new,
-                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
-
-static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_bb>& bb_coord_new,
-                                        vtr::NdMatrixProxy<int, 1> num_sink_layer);
-
-static void update_bb(ClusterNetId net_id,
-                      t_bb& bb_edge_new,
-                      t_bb& bb_coord_new,
-                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
-                      t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc,
-                      bool src_pin);
-
-static void update_layer_bb(ClusterNetId net_id,
-                            std::vector<t_2D_bb>& bb_edge_new,
-                            std::vector<t_2D_bb>& bb_coord_new,
-                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                            t_physical_tile_loc pin_old_loc,
-                            t_physical_tile_loc pin_new_loc,
-                            bool is_output_pin);
-
-static inline void update_bb_same_layer(ClusterNetId net_id,
-                                        const t_physical_tile_loc& pin_old_loc,
-                                        const t_physical_tile_loc& pin_new_loc,
-                                        const std::vector<t_2D_bb>& curr_bb_edge,
-                                        const std::vector<t_2D_bb>& curr_bb_coord,
-                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                        std::vector<t_2D_bb>& bb_edge_new,
-                                        std::vector<t_2D_bb>& bb_coord_new);
-
-static inline void update_bb_layer_changed(ClusterNetId net_id,
-                                           const t_physical_tile_loc& pin_old_loc,
-                                           const t_physical_tile_loc& pin_new_loc,
-                                           const std::vector<t_2D_bb>& curr_bb_edge,
-                                           const std::vector<t_2D_bb>& curr_bb_coord,
-                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                           std::vector<t_2D_bb>& bb_edge_new,
-                                           std::vector<t_2D_bb>& bb_coord_new);
-
-static void update_bb_pin_sink_count(ClusterNetId net_id,
-                                     const t_physical_tile_loc& pin_old_loc,
-                                     const t_physical_tile_loc& pin_new_loc,
-                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
-                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                     bool is_output_pin);
-
-static inline void update_bb_edge(ClusterNetId net_id,
-                                  std::vector<t_2D_bb>& bb_edge_new,
-                                  std::vector<t_2D_bb>& bb_coord_new,
-                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
-                                  const int& old_num_block_on_edge,
-                                  const int& old_edge_coord,
-                                  int& new_num_block_on_edge,
-                                  int& new_edge_coord);
-
-static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
-                            const t_2D_bb& bb_edge_old,
-                            const t_2D_bb& bb_coord_old,
-                            t_2D_bb& bb_edge_new,
-                            t_2D_bb& bb_coord_new);
-
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c);
-
-static void record_affected_net(const ClusterNetId net, int& num_affected_nets);
-
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin);
-
-static void update_net_layer_bb(const ClusterNetId net,
-                                const t_pl_blocks_to_be_moved& blocks_affected,
-                                int iblk,
-                                const ClusterBlockId blk,
-                                const ClusterPinId blk_pin);
-
-static void update_td_delta_costs(const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities& criticalities,
-                                  const ClusterNetId net,
-                                  const ClusterPinId pin,
-                                  t_pl_blocks_to_be_moved& blocks_affected,
-                                  double& delta_timing_cost);
-
 static void update_placement_cost_normalization_factors(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
 static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
 
-static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr);
-
-static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_bb>& bbptr,
-                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
-
-static void get_bb_from_scratch(ClusterNetId net_id,
-                                t_bb& coords,
-                                t_bb& num_on_edges,
-                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer);
-
-static void get_layer_bb_from_scratch(ClusterNetId net_id,
-                                      std::vector<t_2D_bb>& num_on_edges,
-                                      std::vector<t_2D_bb>& coords,
-                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
-
-static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr);
-
-static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_bb>& bbptr,
-                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count);
-
 static void free_try_swap_arrays();
 
 static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
@@ -507,12 +310,6 @@ static void placement_inner_loop(const t_annealing_state* state,
                                  MoveTypeStat& move_type_stat,
                                  float timing_bb_factor);
 
-static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                         const t_noc_opts& noc_opts,
-                                         const PlaceDelayModel* delay_model,
-                                         const PlacerCriticalities* criticalities,
-                                         t_placer_costs* costs);
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
@@ -1314,13 +1111,6 @@ static void placement_inner_loop(const t_annealing_state* state,
             }
             inner_crit_iter_count++;
         }
-#ifdef VERBOSE
-        VTR_LOG("t = %g  cost = %g   bb_cost = %g timing_cost = %g move = %d\n",
-                state->t, costs->cost, costs->bb_cost, costs->timing_cost, inner_iter);
-        if (fabs((costs->bb_cost) - comp_bb_cost(CHECK)) > (costs->bb_cost) * ERROR_TOL)
-            VPR_ERROR(VPR_ERROR_PLACE, "bb_cost is %g, comp_bb_cost is %g\n", costs->bb_cost, comp_bb_cost(CHECK));
-            //"fabs((*bb_cost) - comp_bb_cost(CHECK)) > (*bb_cost) * ERROR_TOL");
-#endif
 
         /* Lines below prevent too much round-off error from accumulating
          * in the cost over many iterations (due to incremental updates).
@@ -1355,70 +1145,6 @@ static void placement_inner_loop(const t_annealing_state* state,
     stats->calc_iteration_stats(*costs, state->move_lim);
 }
 
-static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
-                                         const t_noc_opts& noc_opts,
-                                         const PlaceDelayModel* delay_model,
-                                         const PlacerCriticalities* criticalities,
-                                         t_placer_costs* costs) {
-    auto check_and_print_cost = [](double new_cost,
-                                   double old_cost,
-                                   const std::string& cost_name) {
-        if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n",
-                cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-    };
-
-    double new_bb_cost = recompute_bb_cost();
-    check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
-    costs->bb_cost = new_bb_cost;
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        double new_timing_cost = 0.;
-        comp_td_costs(delay_model, *criticalities, &new_timing_cost);
-        check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost");
-        costs->timing_cost = new_timing_cost;
-    } else {
-        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
-        costs->cost = new_bb_cost * costs->bb_cost_norm;
-    }
-
-    if (noc_opts.noc) {
-        NocCostTerms new_noc_cost;
-        recompute_noc_costs(new_noc_cost);
-
-        check_and_print_cost(new_noc_cost.aggregate_bandwidth,
-                             costs->noc_cost_terms.aggregate_bandwidth,
-                             "noc_aggregate_bandwidth");
-        costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth;
-
-        // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
-        // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
-        if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
-            check_and_print_cost(new_noc_cost.latency,
-                                 costs->noc_cost_terms.latency,
-                                 "noc_latency_cost");
-        }
-        costs->noc_cost_terms.latency = new_noc_cost.latency;
-
-        if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
-            check_and_print_cost(new_noc_cost.latency_overrun,
-                                 costs->noc_cost_terms.latency_overrun,
-                                 "noc_latency_overrun_cost");
-        }
-        costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun;
-
-        if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
-            check_and_print_cost(new_noc_cost.congestion,
-                                 costs->noc_cost_terms.congestion,
-                                 "noc_congestion_cost");
-        }
-        costs->noc_cost_terms.congestion = new_noc_cost.congestion;
-    }
-}
-
 /*only count non-global connections */
 static int count_connections() {
     int count = 0;
@@ -1519,52 +1245,6 @@ static float starting_t(const t_annealing_state* state,
     return init_temp;
 }
 
-static void update_move_nets(int num_nets_affected,
-                             const bool cube_bb) {
-    /* update net cost functions and reset flags. */
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.mutable_move();
-
-    for (int inet_affected = 0; inet_affected < num_nets_affected;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        if (cube_bb) {
-            place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id];
-        } else {
-            place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id];
-        }
-
-        for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
-            place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] = ts_layer_sink_pin_count[size_t(net_id)][layer_num];
-        }
-
-        if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) {
-            if (cube_bb) {
-                place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id];
-            } else {
-                place_move_ctx.layer_bb_num_on_edges[net_id] = layer_ts_bb_edge_new[net_id];
-            }
-        }
-
-        net_cost[net_id] = proposed_net_cost[net_id];
-
-        /* negative proposed_net_cost value is acting as a flag. */
-        proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
-    }
-}
-
-static void reset_move_nets(int num_nets_affected) {
-    /* Reset the net cost function flags first. */
-    for (int inet_affected = 0; inet_affected < num_nets_affected;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-        proposed_net_cost[net_id] = -1;
-        bb_updated_before[net_id] = NOT_UPDATED_YET;
-    }
-}
-
 /**
  * @brief Pick some block and moves it to another spot.
  *
@@ -1663,7 +1343,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     }
     LOG_MOVE_STATS_PROPOSED(t, blocks_affected);
 
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost);
 
     e_move_result move_outcome = e_move_result::ABORTED;
 
@@ -1735,8 +1415,8 @@ static e_move_result try_swap(const t_annealing_state* state,
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values */
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %f, bb_cost_norm %f, timing_tradeoff %f, "
-                           "timing_delta_c %f, timing_cost_norm %f\n",
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
+                           "timing_delta_c %e, timing_cost_norm %e\n",
                            bb_delta_c,
                            costs->bb_cost_norm,
                            timing_tradeoff,
@@ -1748,8 +1428,7 @@ static e_move_result try_swap(const t_annealing_state* state,
         } else {
             VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %f, bb_cost_norm %f, timing_tradeoff %f, "
-                           "timing_delta_c %f, timing_cost_norm %f\n",
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e\n",
                            bb_delta_c,
                            costs->bb_cost_norm);
             delta_c = bb_delta_c * costs->bb_cost_norm;
@@ -1908,7 +1587,7 @@ static e_move_result try_swap(const t_annealing_state* state,
     // greatly slow the placer, but can debug some issues.
     check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
 #endif
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %f, bb_cost %f, timing cost %f\n", costs->cost, costs->bb_cost, costs->timing_cost);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", costs->cost, costs->bb_cost, costs->timing_cost);
     return move_outcome;
 }
 
@@ -1943,297 +1622,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     return cube_bb;
 }
 
-/**
- * @brief Find all the nets and pins affected by this swap and update costs.
- *
- * Find all the nets affected by this swap and update the bounding box (wiring)
- * costs. This cost function doesn't depend on the timing info.
- *
- * Find all the connections affected by this swap and update the timing cost.
- * For a connection to be affected, it not only needs to be on or driven by
- * a block, but it also needs to have its delay changed. Otherwise, it will
- * not be added to the affected_pins structure.
- *
- * For more, see update_td_delta_costs().
- *
- * The timing costs are calculated by getting the new connection delays,
- * multiplied by the connection criticalities returned by the timing
- * analyzer. These timing costs are stored in the proposed_* data structures.
- *
- * The change in the bounding box cost is stored in `bb_delta_c`.
- * The change in the timing cost is stored in `timing_delta_c`.
- *
- * @return The number of affected nets.
- */
-static int find_affected_nets_and_update_costs(
-    const t_place_algorithm& place_algorithm,
-    const PlaceDelayModel* delay_model,
-    const PlacerCriticalities* criticalities,
-    t_pl_blocks_to_be_moved& blocks_affected,
-    double& bb_delta_c,
-    double& timing_delta_c) {
-    VTR_ASSERT_SAFE(bb_delta_c == 0.);
-    VTR_ASSERT_SAFE(timing_delta_c == 0.);
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    int num_affected_nets = 0;
-
-    const auto& cube_bb = g_vpr_ctx.placement().cube_bb;
-
-    /* Go through all the blocks moved. */
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
-
-        /* Go through all the pins in the moved block. */
-        for (ClusterPinId blk_pin : cluster_ctx.clb_nlist.block_pins(blk)) {
-            ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(blk_pin);
-            VTR_ASSERT_SAFE_MSG(net_id,
-                                "Only valid nets should be found in compressed netlist block pins");
-
-            if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                //TODO: Do we require anyting special here for global nets?
-                //"Global nets are assumed to span the whole chip, and do not effect costs."
-                continue;
-
-            /* Record effected nets */
-            record_affected_net(net_id, num_affected_nets);
-
-            /* Update the net bounding boxes. */
-            if (cube_bb) {
-                update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
-            } else {
-                update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin);
-            }
-
-            if (place_algorithm.is_timing_driven()) {
-                /* Determine the change in connection delay and timing cost. */
-                update_td_delta_costs(delay_model, *criticalities, net_id,
-                                      blk_pin, blocks_affected, timing_delta_c);
-            }
-        }
-    }
-
-    /* Now update the bounding box costs (since the net bounding     *
-     * boxes are up-to-date). The cost is only updated once per net. */
-    for (int inet_affected = 0; inet_affected < num_affected_nets;
-         inet_affected++) {
-        ClusterNetId net_id = ts_nets_to_update[inet_affected];
-
-        if (cube_bb) {
-            proposed_net_cost[net_id] = get_net_cost(net_id,
-                                                     ts_bb_coord_new[net_id]);
-        } else {
-            proposed_net_cost[net_id] = get_net_layer_cost(net_id,
-                                                           layer_ts_bb_coord_new[net_id],
-                                                           ts_layer_sink_pin_count[size_t(net_id)]);
-        }
-
-        bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id];
-    }
-
-    return num_affected_nets;
-}
-
-///@brief Record effected nets.
-static void record_affected_net(const ClusterNetId net,
-                                int& num_affected_nets) {
-    /* Record effected nets. */
-    if (proposed_net_cost[net] < 0.) {
-        /* Net not marked yet. */
-        ts_nets_to_update[num_affected_nets] = net;
-        num_affected_nets++;
-
-        /* Flag to say we've marked this net. */
-        proposed_net_cost[net] = 1.;
-    }
-}
-
-/**
- * @brief Update the net bounding boxes.
- *
- * Do not update the net cost here since it should only
- * be updated once per net, not once per pin.
- */
-static void update_net_bb(const ClusterNetId net,
-                          const t_pl_blocks_to_be_moved& blocks_affected,
-                          int iblk,
-                          const ClusterBlockId blk,
-                          const ClusterPinId blk_pin) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
-        //For small nets brute-force bounding box update is faster
-
-        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_bb(net,
-                                  ts_bb_coord_new[net],
-                                  ts_layer_sink_pin_count[size_t(net)]);
-        }
-    } else {
-        //For large nets, update bounding box incrementally
-        int iblk_pin = tile_pin_index(blk_pin);
-        bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER;
-
-        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
-        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
-        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
-
-        //Incremental bounding box update
-        t_physical_tile_loc pin_old_loc(
-            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.layer);
-        t_physical_tile_loc pin_new_loc(
-            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.layer);
-        update_bb(net,
-                  ts_bb_edge_new[net],
-                  ts_bb_coord_new[net],
-                  ts_layer_sink_pin_count[size_t(net)],
-                  pin_old_loc,
-                  pin_new_loc,
-                  src_pin);
-    }
-}
-
-static void update_net_layer_bb(const ClusterNetId net,
-                                const t_pl_blocks_to_be_moved& blocks_affected,
-                                int iblk,
-                                const ClusterBlockId blk,
-                                const ClusterPinId blk_pin) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) {
-        //For small nets brute-force bounding box update is faster
-
-        if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net
-            get_non_updateable_layer_bb(net,
-                                        layer_ts_bb_coord_new[net],
-                                        ts_layer_sink_pin_count[size_t(net)]);
-        }
-    } else {
-        //For large nets, update bounding box incrementally
-        int iblk_pin = tile_pin_index(blk_pin);
-
-        t_physical_tile_type_ptr blk_type = physical_tile_type(blk);
-        int pin_width_offset = blk_type->pin_width_offset[iblk_pin];
-        int pin_height_offset = blk_type->pin_height_offset[iblk_pin];
-
-        //Incremental bounding box update
-        t_physical_tile_loc pin_old_loc(
-            blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].old_loc.layer);
-        t_physical_tile_loc pin_new_loc(
-            blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset,
-            blocks_affected.moved_blocks[iblk].new_loc.layer);
-        auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin);
-        update_layer_bb(net,
-                        layer_ts_bb_edge_new[net],
-                        layer_ts_bb_coord_new[net],
-                        ts_layer_sink_pin_count[size_t(net)],
-                        pin_old_loc,
-                        pin_new_loc,
-                        pin_dir == e_pin_type::DRIVER);
-    }
-}
-
-/**
- * @brief Calculate the new connection delay and timing cost of all the
- *        sink pins affected by moving a specific pin to a new location.
- *        Also calculates the total change in the timing cost.
- *
- * Assumes that the blocks have been moved to the proposed new locations.
- * Otherwise, the routine comp_td_single_connection_delay() will not be
- * able to calculate the most up to date connection delay estimation value.
- *
- * If the moved pin is a driver pin, then all the sink connections that are
- * driven by this driver pin are considered.
- *
- * If the moved pin is a sink pin, then it is the only pin considered. But
- * in some cases, the sink is already accounted for if it is also driven
- * by a driver pin located on a moved block. Computing it again would double
- * count its affect on the total timing cost change (delta_timing_cost).
- *
- * It is possible for some connections to have unchanged delays. For instance,
- * if we are using a dx/dy delay model, this could occur if a sink pin moved
- * to a new position with the same dx/dy from its net's driver pin.
- *
- * We skip these connections with unchanged delay values as their delay need
- * not be updated. Their timing costs also do not require any update, since
- * the criticalities values are always kept stale/unchanged during an block
- * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
- *
- * This is also done to minimize the number of timing node/edge invalidations
- * for incremental static timing analysis (incremental STA).
- */
-static void update_td_delta_costs(const PlaceDelayModel* delay_model,
-                                  const PlacerCriticalities& criticalities,
-                                  const ClusterNetId net,
-                                  const ClusterPinId pin,
-                                  t_pl_blocks_to_be_moved& blocks_affected,
-                                  double& delta_timing_cost) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    const auto& connection_delay = g_placer_ctx.timing().connection_delay;
-    auto& connection_timing_cost = g_placer_ctx.mutable_timing().connection_timing_cost;
-    auto& proposed_connection_delay = g_placer_ctx.mutable_timing().proposed_connection_delay;
-    auto& proposed_connection_timing_cost = g_placer_ctx.mutable_timing().proposed_connection_timing_cost;
-
-    if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
-        /* This pin is a net driver on a moved block. */
-        /* Recompute all point to point connection delays for the net sinks. */
-        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size();
-             ipin++) {
-            float temp_delay = comp_td_single_connection_delay(delay_model, net,
-                                                               ipin);
-            /* If the delay hasn't changed, do not mark this pin as affected */
-            if (temp_delay == connection_delay[net][ipin]) {
-                continue;
-            }
-
-            /* Calculate proposed delay and cost values */
-            proposed_connection_delay[net][ipin] = temp_delay;
-
-            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
-                                 - connection_timing_cost[net][ipin];
-
-            /* Record this connection in blocks_affected.affected_pins */
-            ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
-            blocks_affected.affected_pins.push_back(sink_pin);
-        }
-    } else {
-        /* This pin is a net sink on a moved block */
-        VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
-
-        /* Check if this sink's net is driven by a moved block */
-        if (!driven_by_moved_block(net, blocks_affected)) {
-            /* Get the sink pin index in the net */
-            int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
-
-            float temp_delay = comp_td_single_connection_delay(delay_model, net,
-                                                               ipin);
-            /* If the delay hasn't changed, do not mark this pin as affected */
-            if (temp_delay == connection_delay[net][ipin]) {
-                return;
-            }
-
-            /* Calculate proposed delay and cost values */
-            proposed_connection_delay[net][ipin] = temp_delay;
-
-            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
-            delta_timing_cost += proposed_connection_timing_cost[net][ipin]
-                                 - connection_timing_cost[net][ipin];
-
-            /* Record this connection in blocks_affected.affected_pins */
-            blocks_affected.affected_pins.push_back(pin);
-        }
-    }
-}
-
 /**
  * @brief Updates all the cost normalization factors during the outer
  * loop iteration of the placement. At each temperature change, these
@@ -2350,7 +1738,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) {
 
 static e_move_result assess_swap(double delta_c, double t) {
     /* Returns: 1 -> move accepted, 0 -> rejected. */
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %f delta_c is %f\n", t, delta_c);
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c);
     if (delta_c <= 0) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
         return ACCEPTED;
@@ -2371,25 +1759,6 @@ static e_move_result assess_swap(double delta_c, double t) {
     return REJECTED;
 }
 
-static double recompute_bb_cost() {
-    /* Recomputes the cost to eliminate roundoff that may have accrued.  *
-     * This routine does as little work as possible to compute this new  *
-     * cost.                                                             */
-
-    double cost = 0;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
-            /* Bounding boxes don't have to be recomputed; they're correct. */
-            cost += net_cost[net_id];
-        }
-    }
-
-    return (cost);
-}
-
 /**
  * @brief Update the connection_timing_cost values from the temporary
  *        values for all connections that have/haven't changed.
@@ -2466,107 +1835,6 @@ static void invalidate_affected_connections(
     }
 }
 
-//Returns true if 'net' is driven by one of the blocks in 'blocks_affected'
-static bool driven_by_moved_block(const ClusterNetId net,
-                                  const t_pl_blocks_to_be_moved& blocks_affected) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    ClusterBlockId net_driver_block = cluster_ctx.clb_nlist.net_driver_block(
-        net);
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        if (net_driver_block == blocks_affected.moved_blocks[iblk].block_num) {
-            return true;
-        }
-    }
-    return false;
-}
-
-/* Finds the cost from scratch.  Done only when the placement   *
- * has been radically changed (i.e. after initial placement).   *
- * Otherwise find the cost change incrementally.  If method     *
- * check is NORMAL, we find bounding boxes that are updateable  *
- * for the larger nets.  If method is CHECK, all bounding boxes *
- * are found via the non_updateable_bb routine, to provide a    *
- * cost which can be used to check the correctness of the       *
- * other routine.                                               */
-static double comp_bb_cost(e_cost_methods method) {
-    double cost = 0;
-    double expected_wirelength = 0.0;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.mutable_move();
-
-    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
-            /* Small nets don't use incremental updating on their bounding boxes, *
-             * so they can use a fast bounding box calculator.                    */
-            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
-                && method == NORMAL) {
-                get_bb_from_scratch(net_id,
-                                    place_move_ctx.bb_coords[net_id],
-                                    place_move_ctx.bb_num_on_edges[net_id],
-                                    place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-            } else {
-                get_non_updateable_bb(net_id,
-                                      place_move_ctx.bb_coords[net_id],
-                                      place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-            }
-
-            net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]);
-            cost += net_cost[net_id];
-            if (method == CHECK)
-                expected_wirelength += get_net_wirelength_estimate(net_id, place_move_ctx.bb_coords[net_id]);
-        }
-    }
-
-    if (method == CHECK) {
-        VTR_LOG("\n");
-        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
-                expected_wirelength);
-    }
-    return cost;
-}
-
-static double comp_layer_bb_cost(e_cost_methods method) {
-    double cost = 0;
-    double expected_wirelength = 0.0;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_move_ctx = g_placer_ctx.mutable_move();
-
-    for (auto net_id : cluster_ctx.clb_nlist.nets()) {       /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */
-            /* Small nets don't use incremental updating on their bounding boxes, *
-             * so they can use a fast bounding box calculator.                    */
-            if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET
-                && method == NORMAL) {
-                get_layer_bb_from_scratch(net_id,
-                                          place_move_ctx.layer_bb_num_on_edges[net_id],
-                                          place_move_ctx.layer_bb_coords[net_id],
-                                          place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-            } else {
-                get_non_updateable_layer_bb(net_id,
-                                            place_move_ctx.layer_bb_coords[net_id],
-                                            place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-            }
-
-            net_cost[net_id] = get_net_layer_cost(net_id,
-                                                  place_move_ctx.layer_bb_coords[net_id],
-                                                  place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-            cost += net_cost[net_id];
-            if (method == CHECK)
-                expected_wirelength += get_net_layer_wirelength_estimate(net_id,
-                                                                         place_move_ctx.layer_bb_coords[net_id],
-                                                                         place_move_ctx.num_sink_pin_layer[size_t(net_id)]);
-        }
-    }
-
-    if (method == CHECK) {
-        VTR_LOG("\n");
-        VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n",
-                expected_wirelength);
-    }
-    return cost;
-}
-
 /* Allocates the major structures needed only by the placer, primarily for *
  * computing costs quickly and such.                                       */
 static void alloc_and_load_placement_structs(float place_cost_exp,
@@ -2631,8 +1899,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         }
     }
 
-    net_cost.resize(num_nets, -1.);
-    proposed_net_cost.resize(num_nets, -1.);
+    init_place_move_structs(num_nets);
 
     if (cube_bb) {
         place_move_ctx.bb_coords.resize(num_nets, t_bb());
@@ -2644,17 +1911,12 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
     }
 
     place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)});
-    for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) {
-        auto& elem = ts_layer_sink_pin_count.get(flat_idx);
+    for (size_t flat_idx = 0; flat_idx < place_move_ctx.num_sink_pin_layer.size(); flat_idx++) {
+        auto& elem = place_move_ctx.num_sink_pin_layer.get(flat_idx);
         elem = OPEN;
     }
 
-    /* Used to store costs for moves not yet made and to indicate when a net's   *
-     * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't *
-     * been recomputed.                                                          */
-    bb_updated_before.resize(num_nets, NOT_UPDATED_YET);
-
-    alloc_and_load_for_fast_cost_update(place_cost_exp);
+    alloc_and_load_chan_w_factors_for_place_cost (place_cost_exp);
 
     alloc_and_load_try_swap_structs(cube_bb);
 
@@ -2683,8 +1945,9 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     free_placement_macros_structs();
 
-    vtr::release_memory(net_cost);
-    vtr::release_memory(proposed_net_cost);
+    free_place_move_structs();
+
+    vtr::release_memory(place_move_ctx.bb_coords);
     vtr::release_memory(place_move_ctx.bb_num_on_edges);
     vtr::release_memory(place_move_ctx.bb_coords);
 
@@ -2693,9 +1956,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc
 
     place_move_ctx.num_sink_pin_layer.clear();
 
-    vtr::release_memory(bb_updated_before);
-
-    free_fast_cost_update();
+    free_chan_w_factors_for_place_cost ();
 
     free_try_swap_structs();
 
@@ -2711,1313 +1972,19 @@ static void alloc_and_load_try_swap_structs(const bool cube_bb) {
 
     size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
-    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-
-    if (cube_bb) {
-        ts_bb_edge_new.resize(num_nets, t_bb());
-        ts_bb_coord_new.resize(num_nets, t_bb());
-    } else {
-        VTR_ASSERT_SAFE(!cube_bb);
-        layer_ts_bb_edge_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-        layer_ts_bb_coord_new.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-    }
-
-    ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)});
-    for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) {
-        auto& elem = ts_layer_sink_pin_count.get(flat_idx);
-        elem = OPEN;
-    }
-
-    ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID());
+    init_try_swap_net_cost_structs(num_nets, cube_bb);
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 }
 
 static void free_try_swap_structs() {
-    vtr::release_memory(ts_bb_edge_new);
-    vtr::release_memory(ts_bb_coord_new);
-    vtr::release_memory(layer_ts_bb_edge_new);
-    vtr::release_memory(layer_ts_bb_coord_new);
-    ts_layer_sink_pin_count.clear();
-    vtr::release_memory(ts_nets_to_update);
+    free_try_swap_net_cost_structs();
 
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     vtr::release_memory(place_ctx.compressed_block_grids);
 }
 
-/* This routine finds the bounding box of each net from scratch (i.e.   *
- * from only the block location information).  It updates both the       *
- * coordinate and number of pins on each edge information.  It           *
- * should only be called when the bounding box information is not valid. */
-static void get_bb_from_scratch(ClusterNetId net_id,
-                                t_bb& coords,
-                                t_bb& num_on_edges,
-                                vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
-    int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax, layer_min, layer_max;
-    int xmin_edge, xmax_edge, ymin_edge, ymax_edge, layer_min_edge, layer_max_edge;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& grid = device_ctx.grid;
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum >= 0);
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
-    pin_layer = place_ctx.block_locs[bnum].loc.layer;
-
-    x = max(min<int>(x, grid.width() - 2), 1);
-    y = max(min<int>(y, grid.height() - 2), 1);
-    pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
-
-    xmin = x;
-    ymin = y;
-    layer_min = pin_layer;
-    xmax = x;
-    ymax = y;
-    layer_max = pin_layer;
-
-    xmin_edge = 1;
-    ymin_edge = 1;
-    layer_min_edge = 1;
-    xmax_edge = 1;
-    ymax_edge = 1;
-    layer_max_edge = 1;
-
-    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
-        num_sink_pin_layer[layer_num] = 0;
-    }
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
-        pin_layer = place_ctx.block_locs[bnum].loc.layer;
-
-        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
-         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
-         * I always take all channels impinging on the bounding box to be within   *
-         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
-         * the which channels are included within the bounding box, and it         *
-         * simplifies the code a lot.                                              */
-
-        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
-        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
-        pin_layer = max(min<int>(pin_layer, grid.get_num_layers() - 1), 0);
-
-        if (x == xmin) {
-            xmin_edge++;
-        }
-        if (x == xmax) { /* Recall that xmin could equal xmax -- don't use else */
-            xmax_edge++;
-        } else if (x < xmin) {
-            xmin = x;
-            xmin_edge = 1;
-        } else if (x > xmax) {
-            xmax = x;
-            xmax_edge = 1;
-        }
-
-        if (y == ymin) {
-            ymin_edge++;
-        }
-        if (y == ymax) {
-            ymax_edge++;
-        } else if (y < ymin) {
-            ymin = y;
-            ymin_edge = 1;
-        } else if (y > ymax) {
-            ymax = y;
-            ymax_edge = 1;
-        }
-
-        if (pin_layer == layer_min) {
-            layer_min_edge++;
-        }
-        if (pin_layer == layer_max) {
-            layer_max_edge++;
-        } else if (pin_layer < layer_min) {
-            layer_min = pin_layer;
-            layer_min_edge = 1;
-        } else if (pin_layer > layer_max) {
-            layer_max = pin_layer;
-            layer_max_edge = 1;
-        }
-
-        num_sink_pin_layer[pin_layer]++;
-    }
-
-    /* Copy the coordinates and number on edges information into the proper   *
-     * structures.                                                            */
-    coords.xmin = xmin;
-    coords.xmax = xmax;
-    coords.ymin = ymin;
-    coords.ymax = ymax;
-    coords.layer_min = layer_min;
-    coords.layer_max = layer_max;
-    VTR_ASSERT_DEBUG(layer_min >= 0 && layer_min < device_ctx.grid.get_num_layers());
-    VTR_ASSERT_DEBUG(layer_max >= 0 && layer_max < device_ctx.grid.get_num_layers());
-
-
-    num_on_edges.xmin = xmin_edge;
-    num_on_edges.xmax = xmax_edge;
-    num_on_edges.ymin = ymin_edge;
-    num_on_edges.ymax = ymax_edge;
-    num_on_edges.layer_min = layer_min_edge;
-    num_on_edges.layer_max = layer_max_edge;
-}
-
-/* This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e.   *
- * from only the block location information).  It updates the       *
- * coordinate, number of pins on each edge information, and the number of sinks on each layer.  It           *
- * should only be called when the bounding box information is not valid. */
-static void get_layer_bb_from_scratch(ClusterNetId net_id,
-                                      std::vector<t_2D_bb>& num_on_edges,
-                                      std::vector<t_2D_bb>& coords,
-                                      vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
-    auto& device_ctx = g_vpr_ctx.device();
-    const int num_layers = device_ctx.grid.get_num_layers();
-    std::vector<int> xmin(num_layers, OPEN);
-    std::vector<int> xmax(num_layers, OPEN);
-    std::vector<int> ymin(num_layers, OPEN);
-    std::vector<int> ymax(num_layers, OPEN);
-    std::vector<int> xmin_edge(num_layers, OPEN);
-    std::vector<int> xmax_edge(num_layers, OPEN);
-    std::vector<int> ymin_edge(num_layers, OPEN);
-    std::vector<int> ymax_edge(num_layers, OPEN);
-
-    std::vector<int> num_sink_pin_layer(num_layers, 0);
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& grid = device_ctx.grid;
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    int pnum_src = net_pin_to_tile_pin_index(net_id, 0);
-    VTR_ASSERT(pnum_src >= 0);
-    int x_src = place_ctx.block_locs[bnum].loc.x
-                + physical_tile_type(bnum)->pin_width_offset[pnum_src];
-    int y_src = place_ctx.block_locs[bnum].loc.y
-                + physical_tile_type(bnum)->pin_height_offset[pnum_src];
-
-    x_src = max(min<int>(x_src, grid.width() - 2), 1);
-    y_src = max(min<int>(y_src, grid.height() - 2), 1);
-
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        xmin[layer_num] = x_src;
-        ymin[layer_num] = y_src;
-        xmax[layer_num] = x_src;
-        ymax[layer_num] = y_src;
-        xmin_edge[layer_num] = 1;
-        ymin_edge[layer_num] = 1;
-        xmax_edge[layer_num] = 1;
-        ymax_edge[layer_num] = 1;
-    }
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        int pnum = tile_pin_index(pin_id);
-        int layer = place_ctx.block_locs[bnum].loc.layer;
-        VTR_ASSERT(layer >= 0 && layer < num_layers);
-        num_sink_pin_layer[layer]++;
-        int x = place_ctx.block_locs[bnum].loc.x
-                + physical_tile_type(bnum)->pin_width_offset[pnum];
-        int y = place_ctx.block_locs[bnum].loc.y
-                + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. *
-         * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and   *
-         * I always take all channels impinging on the bounding box to be within   *
-         * that bounding box.  Hence, this "movement" of IO blocks does not affect *
-         * the which channels are included within the bounding box, and it         *
-         * simplifies the code a lot.                                              */
-
-        x = max(min<int>(x, grid.width() - 2), 1);  //-2 for no perim channels
-        y = max(min<int>(y, grid.height() - 2), 1); //-2 for no perim channels
-
-        if (x == xmin[layer]) {
-            xmin_edge[layer]++;
-        }
-        if (x == xmax[layer]) { /* Recall that xmin could equal xmax -- don't use else */
-            xmax_edge[layer]++;
-        } else if (x < xmin[layer]) {
-            xmin[layer] = x;
-            xmin_edge[layer] = 1;
-        } else if (x > xmax[layer]) {
-            xmax[layer] = x;
-            xmax_edge[layer] = 1;
-        }
-
-        if (y == ymin[layer]) {
-            ymin_edge[layer]++;
-        }
-        if (y == ymax[layer]) {
-            ymax_edge[layer]++;
-        } else if (y < ymin[layer]) {
-            ymin[layer] = y;
-            ymin_edge[layer] = 1;
-        } else if (y > ymax[layer]) {
-            ymax[layer] = y;
-            ymax_edge[layer] = 1;
-        }
-    }
-
-    /* Copy the coordinates and number on edges information into the proper   *
-     * structures.                                                            */
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num];
-        coords[layer_num].xmin = xmin[layer_num];
-        coords[layer_num].xmax = xmax[layer_num];
-        coords[layer_num].ymin = ymin[layer_num];
-        coords[layer_num].ymax = ymax[layer_num];
-        coords[layer_num].layer_num = layer_num;
-
-        num_on_edges[layer_num].xmin = xmin_edge[layer_num];
-        num_on_edges[layer_num].xmax = xmax_edge[layer_num];
-        num_on_edges[layer_num].ymin = ymin_edge[layer_num];
-        num_on_edges[layer_num].ymax = ymax_edge[layer_num];
-        num_on_edges[layer_num].layer_num = layer_num;
-    }
-}
-
-static double wirelength_crossing_count(size_t fanout) {
-    /* Get the expected "crossing count" of a net, based on its number *
-     * of pins.  Extrapolate for very large nets.                      */
-
-    if (fanout > 50) {
-        return 2.7933 + 0.02616 * (fanout - 50);
-    } else {
-        return cross_count[fanout - 1];
-    }
-}
-
-static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr) {
-    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
-     * its coordinate bounding box.                                         */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing;
-
-    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing;
-
-    return (ncost);
-}
-
-static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */,
-                                                const std::vector<t_2D_bb>& bbptr,
-                                                const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
-    /* WMF: Finds the estimate of wirelength due to one net by looking at   *
-     * its coordinate bounding box.                                         */
-
-    double ncost = 0.;
-    double crossing = 0.;
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
-        if (layer_pin_sink_count[layer_num] == 0) {
-            continue;
-        }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
-
-        /* Could insert a check for xmin == xmax.  In that case, assume  *
-         * connection will be made with no bends and hence no x-cost.    *
-         * Same thing for y-cost.                                        */
-
-        /* Cost = wire length along channel * cross_count / average      *
-         * channel capacity.   Do this for x, then y direction and add.  */
-
-        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing;
-
-        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing;
-    }
-
-    return (ncost);
-}
-
-static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) {
-    /* Finds the cost due to one net by looking at its coordinate bounding  *
-     * box.                                                                 */
-
-    double ncost, crossing;
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    crossing = wirelength_crossing_count(
-        cluster_ctx.clb_nlist.net_pins(net_id).size());
-
-    /* Could insert a check for xmin == xmax.  In that case, assume  *
-     * connection will be made with no bends and hence no x-cost.    *
-     * Same thing for y-cost.                                        */
-
-    /* Cost = wire length along channel * cross_count / average      *
-     * channel capacity.   Do this for x, then y direction and add.  */
-
-    ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing
-            * chanx_place_cost_fac[bbptr.ymax][bbptr.ymin - 1];
-
-    ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing
-             * chany_place_cost_fac[bbptr.xmax][bbptr.xmin - 1];
-
-    return (ncost);
-}
-
-static double get_net_layer_cost(ClusterNetId /* net_id */,
-                                 const std::vector<t_2D_bb>& bbptr,
-                                 const vtr::NdMatrixProxy<int, 1> layer_pin_sink_count) {
-    /* Finds the cost due to one net by looking at its coordinate bounding  *
-     * box.                                                                 */
-
-    double ncost = 0.;
-    double crossing = 0.;
-    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
-
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN);
-        if (layer_pin_sink_count[layer_num] == 0) {
-            continue;
-        }
-        crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1);
-
-        /* Could insert a check for xmin == xmax.  In that case, assume  *
-         * connection will be made with no bends and hence no x-cost.    *
-         * Same thing for y-cost.                                        */
-
-        /* Cost = wire length along channel * cross_count / average      *
-         * channel capacity.   Do this for x, then y direction and add.  */
-
-        ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing
-                 * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1];
-
-        ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing
-                 * chany_place_cost_fac[bbptr[layer_num].xmax][bbptr[layer_num].xmin - 1];
-    }
-
-    return (ncost);
-}
-
-/* Finds the bounding box of a net and stores its coordinates in the  *
- * bb_coord_new data structure.  This routine should only be called   *
- * for small nets, since it does not determine enough information for *
- * the bounding box to be updated incrementally later.                *
- * Currently assumes channels on both sides of the CLBs forming the   *
- * edges of the bounding box can be used.  Essentially, I am assuming *
- * the pins always lie on the outside of the bounding box.            */
-static void get_non_updateable_bb(ClusterNetId net_id,
-                                  t_bb& bb_coord_new,
-                                  vtr::NdMatrixProxy<int, 1> num_sink_pin_layer) {
-    //TODO: account for multiple physical pin instances per logical pin
-
-    int xmax, ymax, layer_max, xmin, ymin, layer_min, x, y, layer;
-    int pnum;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-    auto& device_ctx = g_vpr_ctx.device();
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-
-    x = place_ctx.block_locs[bnum].loc.x
-        + physical_tile_type(bnum)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[bnum].loc.y
-        + physical_tile_type(bnum)->pin_height_offset[pnum];
-    layer = place_ctx.block_locs[bnum].loc.layer;
-
-    xmin = x;
-    ymin = y;
-    layer_min = layer;
-    xmax = x;
-    ymax = y;
-    layer_max = layer;
-
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
-        num_sink_pin_layer[layer_num] = 0;
-    }
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        x = place_ctx.block_locs[bnum].loc.x
-            + physical_tile_type(bnum)->pin_width_offset[pnum];
-        y = place_ctx.block_locs[bnum].loc.y
-            + physical_tile_type(bnum)->pin_height_offset[pnum];
-        layer = place_ctx.block_locs[bnum].loc.layer;
-
-        if (x < xmin) {
-            xmin = x;
-        } else if (x > xmax) {
-            xmax = x;
-        }
-
-        if (y < ymin) {
-            ymin = y;
-        } else if (y > ymax) {
-            ymax = y;
-        }
-
-        if (layer < layer_min) {
-            layer_min = layer;
-        } else if (layer > layer_max) {
-            layer_max = layer;
-        }
-
-        num_sink_pin_layer[layer]++;
-    }
-
-    /* Now I've found the coordinates of the bounding box.  There are no *
-     * channels beyond device_ctx.grid.width()-2 and                     *
-     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
-     * since I'll always include the channel immediately below and the   *
-     * channel immediately to the left of the bounding box, I want to    *
-     * clip to 1 in both directions as well (since minimum channel index *
-     * is 0).  See route_common.cpp for a channel diagram.               */
-
-    bb_coord_new.xmin = max(min<int>(xmin, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new.ymin = max(min<int>(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    bb_coord_new.layer_min = max(min<int>(layer_min, device_ctx.grid.get_num_layers() - 1), 0);
-    bb_coord_new.xmax = max(min<int>(xmax, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    bb_coord_new.ymax = max(min<int>(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    bb_coord_new.layer_max = max(min<int>(layer_max, device_ctx.grid.get_num_layers() - 1), 0);
-}
-
-static void get_non_updateable_layer_bb(ClusterNetId net_id,
-                                        std::vector<t_2D_bb>& bb_coord_new,
-                                        vtr::NdMatrixProxy<int, 1> num_sink_layer) {
-    //TODO: account for multiple physical pin instances per logical pin
-
-    auto& device_ctx = g_vpr_ctx.device();
-    int num_layers = device_ctx.grid.get_num_layers();
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
-        num_sink_layer[layer_num] = 0;
-    }
-
-    int pnum;
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& place_ctx = g_vpr_ctx.placement();
-
-    ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id);
-    pnum = net_pin_to_tile_pin_index(net_id, 0);
-
-    int src_x = place_ctx.block_locs[bnum].loc.x
-                + physical_tile_type(bnum)->pin_width_offset[pnum];
-    int src_y = place_ctx.block_locs[bnum].loc.y
-                + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-    std::vector<int> xmin(num_layers, src_x);
-    std::vector<int> ymin(num_layers, src_y);
-    std::vector<int> xmax(num_layers, src_x);
-    std::vector<int> ymax(num_layers, src_y);
-
-    for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) {
-        bnum = cluster_ctx.clb_nlist.pin_block(pin_id);
-        pnum = tile_pin_index(pin_id);
-        int x = place_ctx.block_locs[bnum].loc.x
-                + physical_tile_type(bnum)->pin_width_offset[pnum];
-        int y = place_ctx.block_locs[bnum].loc.y
-                + physical_tile_type(bnum)->pin_height_offset[pnum];
-
-        int layer_num = place_ctx.block_locs[bnum].loc.layer;
-        num_sink_layer[layer_num]++;
-        if (x < xmin[layer_num]) {
-            xmin[layer_num] = x;
-        } else if (x > xmax[layer_num]) {
-            xmax[layer_num] = x;
-        }
-
-        if (y < ymin[layer_num]) {
-            ymin[layer_num] = y;
-        } else if (y > ymax[layer_num]) {
-            ymax[layer_num] = y;
-        }
-    }
-
-    /* Now I've found the coordinates of the bounding box.  There are no *
-     * channels beyond device_ctx.grid.width()-2 and                     *
-     * device_ctx.grid.height() - 2, so I want to clip to that.  As well,*
-     * since I'll always include the channel immediately below and the   *
-     * channel immediately to the left of the bounding box, I want to    *
-     * clip to 1 in both directions as well (since minimum channel index *
-     * is 0).  See route_common.cpp for a channel diagram.               */
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        bb_coord_new[layer_num].layer_num = layer_num;
-        bb_coord_new[layer_num].xmin = max(min<int>(xmin[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymin = max(min<int>(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-        bb_coord_new[layer_num].xmax = max(min<int>(xmax[layer_num], device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-        bb_coord_new[layer_num].ymax = max(min<int>(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    }
-}
-
-static void update_bb(ClusterNetId net_id,
-                      t_bb& bb_edge_new,
-                      t_bb& bb_coord_new,
-                      vtr::NdMatrixProxy<int, 1> num_sink_pin_layer_new,
-                      t_physical_tile_loc pin_old_loc,
-                      t_physical_tile_loc pin_new_loc,
-                      bool src_pin) {
-    /* Updates the bounding box of a net by storing its coordinates in    *
-     * the bb_coord_new data structure and the number of blocks on each   *
-     * edge in the bb_edge_new data structure.  This routine should only  *
-     * be called for large nets, since it has some overhead relative to   *
-     * just doing a brute force bounding box calculation.  The bounding   *
-     * box coordinate and edge information for inet must be valid before  *
-     * this routine is called.                                            *
-     * Currently assumes channels on both sides of the CLBs forming the   *
-     * edges of the bounding box can be used.  Essentially, I am assuming *
-     * the pins always lie on the outside of the bounding box.            *
-     * The x and y coordinates are the pin's x and y coordinates.         */
-    /* IO blocks are considered to be one cell in for simplicity.         */
-    //TODO: account for multiple physical pin instances per logical pin
-    const t_bb *curr_bb_edge, *curr_bb_coord;
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& place_move_ctx = g_placer_ctx.move();
-
-    const int num_layers = device_ctx.grid.get_num_layers();
-
-    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    pin_new_loc.layer_num = max(min<int>(pin_new_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
-    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    pin_old_loc.layer_num = max(min<int>(pin_old_loc.layer_num, device_ctx.grid.get_num_layers() - 1), 0);
-
-    /* Check if the net had been updated before. */
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-        /* The net had been updated from scratch, DO NOT update again! */
-        return;
-    }
-
-    vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new;
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        /* The net had NOT been updated before, could use the old values */
-        curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id];
-        curr_bb_coord = &place_move_ctx.bb_coords[net_id];
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    } else {
-        /* The net had been updated before, must use the new values */
-        curr_bb_coord = &bb_coord_new;
-        curr_bb_edge = &bb_edge_new;
-    }
-
-    /* Check if I can update the bounding box incrementally. */
-
-    if (pin_new_loc.x < pin_old_loc.x) { /* Move to left. */
-
-        /* Update the xmax fields for coordinates and number of edges first. */
-
-        if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */
-            if (curr_bb_edge->xmax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new.xmax = curr_bb_edge->xmax - 1;
-                bb_coord_new.xmax = curr_bb_coord->xmax;
-            }
-        } else { /* Move to left, old position was not at xmax. */
-            bb_coord_new.xmax = curr_bb_coord->xmax;
-            bb_edge_new.xmax = curr_bb_edge->xmax;
-        }
-
-        /* Now do the xmin fields for coordinates and number of edges. */
-
-        if (pin_new_loc.x < curr_bb_coord->xmin) { /* Moved past xmin */
-            bb_coord_new.xmin = pin_new_loc.x;
-            bb_edge_new.xmin = 1;
-        } else if (pin_new_loc.x == curr_bb_coord->xmin) { /* Moved to xmin */
-            bb_coord_new.xmin = pin_new_loc.x;
-            bb_edge_new.xmin = curr_bb_edge->xmin + 1;
-        } else { /* Xmin unchanged. */
-            bb_coord_new.xmin = curr_bb_coord->xmin;
-            bb_edge_new.xmin = curr_bb_edge->xmin;
-        }
-        /* End of move to left case. */
-
-    } else if (pin_new_loc.x > pin_old_loc.x) { /* Move to right. */
-
-        /* Update the xmin fields for coordinates and number of edges first. */
-
-        if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */
-            if (curr_bb_edge->xmin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new.xmin = curr_bb_edge->xmin - 1;
-                bb_coord_new.xmin = curr_bb_coord->xmin;
-            }
-        } else { /* Move to right, old position was not at xmin. */
-            bb_coord_new.xmin = curr_bb_coord->xmin;
-            bb_edge_new.xmin = curr_bb_edge->xmin;
-        }
-
-        /* Now do the xmax fields for coordinates and number of edges. */
-
-        if (pin_new_loc.x > curr_bb_coord->xmax) { /* Moved past xmax. */
-            bb_coord_new.xmax = pin_new_loc.x;
-            bb_edge_new.xmax = 1;
-        } else if (pin_new_loc.x == curr_bb_coord->xmax) { /* Moved to xmax */
-            bb_coord_new.xmax = pin_new_loc.x;
-            bb_edge_new.xmax = curr_bb_edge->xmax + 1;
-        } else { /* Xmax unchanged. */
-            bb_coord_new.xmax = curr_bb_coord->xmax;
-            bb_edge_new.xmax = curr_bb_edge->xmax;
-        }
-        /* End of move to right case. */
-
-    } else { /* pin_new_loc.x == pin_old_loc.x -- no x motion. */
-        bb_coord_new.xmin = curr_bb_coord->xmin;
-        bb_coord_new.xmax = curr_bb_coord->xmax;
-        bb_edge_new.xmin = curr_bb_edge->xmin;
-        bb_edge_new.xmax = curr_bb_edge->xmax;
-    }
-
-    /* Now account for the y-direction motion. */
-
-    if (pin_new_loc.y < pin_old_loc.y) { /* Move down. */
-
-        /* Update the ymax fields for coordinates and number of edges first. */
-
-        if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */
-            if (curr_bb_edge->ymax == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new.ymax = curr_bb_edge->ymax - 1;
-                bb_coord_new.ymax = curr_bb_coord->ymax;
-            }
-        } else { /* Move down, old postion was not at ymax. */
-            bb_coord_new.ymax = curr_bb_coord->ymax;
-            bb_edge_new.ymax = curr_bb_edge->ymax;
-        }
-
-        /* Now do the ymin fields for coordinates and number of edges. */
-
-        if (pin_new_loc.y < curr_bb_coord->ymin) { /* Moved past ymin */
-            bb_coord_new.ymin = pin_new_loc.y;
-            bb_edge_new.ymin = 1;
-        } else if (pin_new_loc.y == curr_bb_coord->ymin) { /* Moved to ymin */
-            bb_coord_new.ymin = pin_new_loc.y;
-            bb_edge_new.ymin = curr_bb_edge->ymin + 1;
-        } else { /* ymin unchanged. */
-            bb_coord_new.ymin = curr_bb_coord->ymin;
-            bb_edge_new.ymin = curr_bb_edge->ymin;
-        }
-        /* End of move down case. */
-
-    } else if (pin_new_loc.y > pin_old_loc.y) { /* Moved up. */
-
-        /* Update the ymin fields for coordinates and number of edges first. */
-
-        if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */
-            if (curr_bb_edge->ymin == 1) {
-                get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                return;
-            } else {
-                bb_edge_new.ymin = curr_bb_edge->ymin - 1;
-                bb_coord_new.ymin = curr_bb_coord->ymin;
-            }
-        } else { /* Moved up, old position was not at ymin. */
-            bb_coord_new.ymin = curr_bb_coord->ymin;
-            bb_edge_new.ymin = curr_bb_edge->ymin;
-        }
-
-        /* Now do the ymax fields for coordinates and number of edges. */
-
-        if (pin_new_loc.y > curr_bb_coord->ymax) { /* Moved past ymax. */
-            bb_coord_new.ymax = pin_new_loc.y;
-            bb_edge_new.ymax = 1;
-        } else if (pin_new_loc.y == curr_bb_coord->ymax) { /* Moved to ymax */
-            bb_coord_new.ymax = pin_new_loc.y;
-            bb_edge_new.ymax = curr_bb_edge->ymax + 1;
-        } else { /* ymax unchanged. */
-            bb_coord_new.ymax = curr_bb_coord->ymax;
-            bb_edge_new.ymax = curr_bb_edge->ymax;
-        }
-        /* End of move up case. */
-
-    } else { /* pin_new_loc.y == yold -- no y motion. */
-        bb_coord_new.ymin = curr_bb_coord->ymin;
-        bb_coord_new.ymax = curr_bb_coord->ymax;
-        bb_edge_new.ymin = curr_bb_edge->ymin;
-        bb_edge_new.ymax = curr_bb_edge->ymax;
-    }
-
-    /* Now account for the layer motion. */
-    if (num_layers > 1) {
-        /* We need to update it only if multiple layers are available */
-        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-            num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num];
-        }
-        if (!src_pin) {
-            /* if src pin is being moved, we don't need to update this data structure */
-            if (pin_old_loc.layer_num != pin_new_loc.layer_num) {
-                num_sink_pin_layer_new[pin_old_loc.layer_num] = (curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1;
-                num_sink_pin_layer_new[pin_new_loc.layer_num] = (curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1;
-            }
-        }
-
-        if (pin_new_loc.layer_num < pin_old_loc.layer_num) {
-            if (pin_old_loc.layer_num == curr_bb_coord->layer_max) {
-                if (curr_bb_edge->layer_max == 1) {
-                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                    bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                    return;
-                } else {
-                    bb_edge_new.layer_max = curr_bb_edge->layer_max - 1;
-                    bb_coord_new.layer_max = curr_bb_coord->layer_max;
-                }
-            } else {
-                bb_coord_new.layer_max = curr_bb_coord->layer_max;
-                bb_edge_new.layer_max = curr_bb_edge->layer_max;
-            }
-
-
-            if (pin_new_loc.layer_num < curr_bb_coord->layer_min) {
-                bb_coord_new.layer_min = pin_new_loc.layer_num;
-                bb_edge_new.layer_min = 1;
-            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_min) {
-                bb_coord_new.layer_min = pin_new_loc.layer_num;
-                bb_edge_new.layer_min = curr_bb_edge->layer_min + 1;
-            } else {
-                bb_coord_new.layer_min = curr_bb_coord->layer_min;
-                bb_edge_new.layer_min = curr_bb_edge->layer_min;
-            }
-
-        } else if (pin_new_loc.layer_num > pin_old_loc.layer_num) {
-
-
-            if (pin_old_loc.layer_num == curr_bb_coord->layer_min) {
-                if (curr_bb_edge->layer_min == 1) {
-                    get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new);
-                    bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-                    return;
-                } else {
-                    bb_edge_new.layer_min = curr_bb_edge->layer_min - 1;
-                    bb_coord_new.layer_min = curr_bb_coord->layer_min;
-                }
-            } else {
-                bb_coord_new.layer_min = curr_bb_coord->layer_min;
-                bb_edge_new.layer_min = curr_bb_edge->layer_min;
-            }
-
-            if (pin_new_loc.layer_num > curr_bb_coord->layer_max) {
-                bb_coord_new.layer_max = pin_new_loc.layer_num;
-                bb_edge_new.layer_max = 1;
-            } else if (pin_new_loc.layer_num == curr_bb_coord->layer_max) {
-                bb_coord_new.layer_max = pin_new_loc.layer_num;
-                bb_edge_new.layer_max = curr_bb_edge->layer_max + 1;
-            } else {
-                bb_coord_new.layer_max = curr_bb_coord->layer_max;
-                bb_edge_new.layer_max = curr_bb_edge->layer_max;
-            }
-
-
-        } else {
-            bb_coord_new.layer_min = curr_bb_coord->layer_min;
-            bb_coord_new.layer_max = curr_bb_coord->layer_max;
-            bb_edge_new.layer_min = curr_bb_edge->layer_min;
-            bb_edge_new.layer_max = curr_bb_edge->layer_max;
-        }
-
-    } else {
-        bb_coord_new.layer_min = curr_bb_coord->layer_min;
-        bb_coord_new.layer_max = curr_bb_coord->layer_max;
-        bb_edge_new.layer_min = curr_bb_edge->layer_min;
-        bb_edge_new.layer_max = curr_bb_edge->layer_max;
-    }
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    }
-}
-
-static void update_layer_bb(ClusterNetId net_id,
-                            std::vector<t_2D_bb>& bb_edge_new,
-                            std::vector<t_2D_bb>& bb_coord_new,
-                            vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                            t_physical_tile_loc pin_old_loc,
-                            t_physical_tile_loc pin_new_loc,
-                            bool is_output_pin) {
-    /* Updates the bounding box of a net by storing its coordinates in    *
-     * the bb_coord_new data structure and the number of blocks on each   *
-     * edge in the bb_edge_new data structure.  This routine should only  *
-     * be called for large nets, since it has some overhead relative to   *
-     * just doing a brute force bounding box calculation.  The bounding   *
-     * box coordinate and edge information for inet must be valid before  *
-     * this routine is called.                                            *
-     * Currently assumes channels on both sides of the CLBs forming the   *
-     * edges of the bounding box can be used.  Essentially, I am assuming *
-     * the pins always lie on the outside of the bounding box.            *
-     * The x and y coordinates are the pin's x and y coordinates.         */
-    /* IO blocks are considered to be one cell in for simplicity.         */
-    //TODO: account for multiple physical pin instances per logical pin
-    const std::vector<t_2D_bb>*curr_bb_edge, *curr_bb_coord;
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& place_move_ctx = g_placer_ctx.move();
-
-    pin_new_loc.x = max(min<int>(pin_new_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    pin_new_loc.y = max(min<int>(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-    pin_old_loc.x = max(min<int>(pin_old_loc.x, device_ctx.grid.width() - 2), 1);  //-2 for no perim channels
-    pin_old_loc.y = max(min<int>(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels
-
-    /* Check if the net had been updated before. */
-    if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-        /* The net had been updated from scratch, DO NOT update again! */
-        return;
-    }
-
-    const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new;
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        /* The net had NOT been updated before, could use the old values */
-        curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id];
-        curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id];
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    } else {
-        /* The net had been updated before, must use the new values */
-        curr_bb_edge = &bb_edge_new;
-        curr_bb_coord = &bb_coord_new;
-    }
-
-    /* Check if I can update the bounding box incrementally. */
-
-    update_bb_pin_sink_count(net_id,
-                             pin_old_loc,
-                             pin_new_loc,
-                             curr_layer_pin_sink_count,
-                             bb_pin_sink_count_new,
-                             is_output_pin);
-
-    int layer_old = pin_old_loc.layer_num;
-    int layer_new = pin_new_loc.layer_num;
-    bool layer_changed = (layer_old != layer_new);
-
-    bb_edge_new = *curr_bb_edge;
-    bb_coord_new = *curr_bb_coord;
-
-    if (layer_changed) {
-        update_bb_layer_changed(net_id,
-                                pin_old_loc,
-                                pin_new_loc,
-                                *curr_bb_edge,
-                                *curr_bb_coord,
-                                bb_pin_sink_count_new,
-                                bb_edge_new,
-                                bb_coord_new);
-    } else {
-        update_bb_same_layer(net_id,
-                             pin_old_loc,
-                             pin_new_loc,
-                             *curr_bb_edge,
-                             *curr_bb_coord,
-                             bb_pin_sink_count_new,
-                             bb_edge_new,
-                             bb_coord_new);
-    }
-
-    if (bb_updated_before[net_id] == NOT_UPDATED_YET) {
-        bb_updated_before[net_id] = UPDATED_ONCE;
-    }
-}
-
-static inline void update_bb_same_layer(ClusterNetId net_id,
-                                        const t_physical_tile_loc& pin_old_loc,
-                                        const t_physical_tile_loc& pin_new_loc,
-                                        const std::vector<t_2D_bb>& curr_bb_edge,
-                                        const std::vector<t_2D_bb>& curr_bb_coord,
-                                        vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                        std::vector<t_2D_bb>& bb_edge_new,
-                                        std::vector<t_2D_bb>& bb_coord_new) {
-    int x_old = pin_old_loc.x;
-    int x_new = pin_new_loc.x;
-
-    int y_old = pin_old_loc.y;
-    int y_new = pin_new_loc.y;
-
-    int layer_num = pin_old_loc.layer_num;
-    VTR_ASSERT_SAFE(layer_num == pin_new_loc.layer_num);
-
-    if (x_new < x_old) {
-        if (x_old == curr_bb_coord[layer_num].xmax) {
-            update_bb_edge(net_id,
-                           bb_edge_new,
-                           bb_coord_new,
-                           bb_pin_sink_count_new,
-                           curr_bb_edge[layer_num].xmax,
-                           curr_bb_coord[layer_num].xmax,
-                           bb_edge_new[layer_num].xmax,
-                           bb_coord_new[layer_num].xmax);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
-            }
-        }
-
-        if (x_new < curr_bb_coord[layer_num].xmin) {
-            bb_edge_new[layer_num].xmin = 1;
-            bb_coord_new[layer_num].xmin = x_new;
-        } else if (x_new == curr_bb_coord[layer_num].xmin) {
-            bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin + 1;
-            bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin;
-        }
-
-    } else if (x_new > x_old) {
-        if (x_old == curr_bb_coord[layer_num].xmin) {
-            update_bb_edge(net_id,
-                           bb_edge_new,
-                           bb_coord_new,
-                           bb_pin_sink_count_new,
-                           curr_bb_edge[layer_num].xmin,
-                           curr_bb_coord[layer_num].xmin,
-                           bb_edge_new[layer_num].xmin,
-                           bb_coord_new[layer_num].xmin);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
-            }
-        }
-
-        if (x_new > curr_bb_coord[layer_num].xmax) {
-            bb_edge_new[layer_num].xmax = 1;
-            bb_coord_new[layer_num].xmax = x_new;
-        } else if (x_new == curr_bb_coord[layer_num].xmax) {
-            bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1;
-            bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax;
-        }
-    }
-
-    if (y_new < y_old) {
-        if (y_old == curr_bb_coord[layer_num].ymax) {
-            update_bb_edge(net_id,
-                           bb_edge_new,
-                           bb_coord_new,
-                           bb_pin_sink_count_new,
-                           curr_bb_edge[layer_num].ymax,
-                           curr_bb_coord[layer_num].ymax,
-                           bb_edge_new[layer_num].ymax,
-                           bb_coord_new[layer_num].ymax);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
-            }
-        }
-
-        if (y_new < curr_bb_coord[layer_num].ymin) {
-            bb_edge_new[layer_num].ymin = 1;
-            bb_coord_new[layer_num].ymin = y_new;
-        } else if (y_new == curr_bb_coord[layer_num].ymin) {
-            bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin + 1;
-            bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin;
-        }
-
-    } else if (y_new > y_old) {
-        if (y_old == curr_bb_coord[layer_num].ymin) {
-            update_bb_edge(net_id,
-                           bb_edge_new,
-                           bb_coord_new,
-                           bb_pin_sink_count_new,
-                           curr_bb_edge[layer_num].ymin,
-                           curr_bb_coord[layer_num].ymin,
-                           bb_edge_new[layer_num].ymin,
-                           bb_coord_new[layer_num].ymin);
-            if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-                return;
-            }
-        }
-
-        if (y_new > curr_bb_coord[layer_num].ymax) {
-            bb_edge_new[layer_num].ymax = 1;
-            bb_coord_new[layer_num].ymax = y_new;
-        } else if (y_new == curr_bb_coord[layer_num].ymax) {
-            bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax + 1;
-            bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax;
-        }
-    }
-}
-
-static inline void update_bb_layer_changed(ClusterNetId net_id,
-                                           const t_physical_tile_loc& pin_old_loc,
-                                           const t_physical_tile_loc& pin_new_loc,
-                                           const std::vector<t_2D_bb>& curr_bb_edge,
-                                           const std::vector<t_2D_bb>& curr_bb_coord,
-                                           vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                           std::vector<t_2D_bb>& bb_edge_new,
-                                           std::vector<t_2D_bb>& bb_coord_new) {
-    int x_old = pin_old_loc.x;
-
-    int y_old = pin_old_loc.y;
-
-    int old_layer_num = pin_old_loc.layer_num;
-    int new_layer_num = pin_new_loc.layer_num;
-    VTR_ASSERT_SAFE(old_layer_num != new_layer_num);
-
-    if (x_old == curr_bb_coord[old_layer_num].xmax) {
-        update_bb_edge(net_id,
-                       bb_edge_new,
-                       bb_coord_new,
-                       bb_pin_sink_count_new,
-                       curr_bb_edge[old_layer_num].xmax,
-                       curr_bb_coord[old_layer_num].xmax,
-                       bb_edge_new[old_layer_num].xmax,
-                       bb_coord_new[old_layer_num].xmax);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-            return;
-        }
-    } else if (x_old == curr_bb_coord[old_layer_num].xmin) {
-        update_bb_edge(net_id,
-                       bb_edge_new,
-                       bb_coord_new,
-                       bb_pin_sink_count_new,
-                       curr_bb_edge[old_layer_num].xmin,
-                       curr_bb_coord[old_layer_num].xmin,
-                       bb_edge_new[old_layer_num].xmin,
-                       bb_coord_new[old_layer_num].xmin);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-            return;
-        }
-    }
-
-    if (y_old == curr_bb_coord[old_layer_num].ymax) {
-        update_bb_edge(net_id,
-                       bb_edge_new,
-                       bb_coord_new,
-                       bb_pin_sink_count_new,
-                       curr_bb_edge[old_layer_num].ymax,
-                       curr_bb_coord[old_layer_num].ymax,
-                       bb_edge_new[old_layer_num].ymax,
-                       bb_coord_new[old_layer_num].ymax);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-            return;
-        }
-    } else if (y_old == curr_bb_coord[old_layer_num].ymin) {
-        update_bb_edge(net_id,
-                       bb_edge_new,
-                       bb_coord_new,
-                       bb_pin_sink_count_new,
-                       curr_bb_edge[old_layer_num].ymin,
-                       curr_bb_coord[old_layer_num].ymin,
-                       bb_edge_new[old_layer_num].ymin,
-                       bb_coord_new[old_layer_num].ymin);
-        if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) {
-            return;
-        }
-    }
-
-    add_block_to_bb(pin_new_loc,
-                    curr_bb_edge[new_layer_num],
-                    curr_bb_coord[new_layer_num],
-                    bb_edge_new[new_layer_num],
-                    bb_coord_new[new_layer_num]);
-}
-
-static void update_bb_pin_sink_count(ClusterNetId /* net_id */,
-                                     const t_physical_tile_loc& pin_old_loc,
-                                     const t_physical_tile_loc& pin_new_loc,
-                                     const vtr::NdMatrixProxy<int, 1> curr_layer_pin_sink_count,
-                                     vtr::NdMatrixProxy<int, 1> bb_pin_sink_count_new,
-                                     bool is_output_pin) {
-    VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1);
-    for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) {
-        bb_pin_sink_count_new[layer_num] = curr_layer_pin_sink_count[layer_num];
-    }
-    if (!is_output_pin) {
-        bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1;
-        bb_pin_sink_count_new[pin_new_loc.layer_num] += 1;
-    }
-}
-
-static inline void update_bb_edge(ClusterNetId net_id,
-                                  std::vector<t_2D_bb>& bb_edge_new,
-                                  std::vector<t_2D_bb>& bb_coord_new,
-                                  vtr::NdMatrixProxy<int, 1> bb_layer_pin_sink_count,
-                                  const int& old_num_block_on_edge,
-                                  const int& old_edge_coord,
-                                  int& new_num_block_on_edge,
-                                  int& new_edge_coord) {
-    if (old_num_block_on_edge == 1) {
-        get_layer_bb_from_scratch(net_id,
-                                  bb_edge_new,
-                                  bb_coord_new,
-                                  bb_layer_pin_sink_count);
-        bb_updated_before[net_id] = GOT_FROM_SCRATCH;
-        return;
-    } else {
-        new_num_block_on_edge = old_num_block_on_edge - 1;
-        new_edge_coord = old_edge_coord;
-    }
-}
-
-static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc,
-                            const t_2D_bb& bb_edge_old,
-                            const t_2D_bb& bb_coord_old,
-                            t_2D_bb& bb_edge_new,
-                            t_2D_bb& bb_coord_new) {
-    int x_new = new_pin_loc.x;
-    int y_new = new_pin_loc.y;
-
-    if (x_new > bb_coord_old.xmax) {
-        bb_edge_new.xmax = 1;
-        bb_coord_new.xmax = x_new;
-    } else if (x_new == bb_coord_old.xmax) {
-        bb_edge_new.xmax = bb_edge_old.xmax + 1;
-    }
-
-    if (x_new < bb_coord_old.xmin) {
-        bb_edge_new.xmin = 1;
-        bb_coord_new.xmin = x_new;
-    } else if (x_new == bb_coord_old.xmin) {
-        bb_edge_new.xmin = bb_edge_old.xmin + 1;
-    }
-
-    if (y_new > bb_coord_old.ymax) {
-        bb_edge_new.ymax = 1;
-        bb_coord_new.ymax = y_new;
-    } else if (y_new == bb_coord_old.ymax) {
-        bb_edge_new.ymax = bb_edge_old.ymax + 1;
-    }
-
-    if (y_new < bb_coord_old.ymin) {
-        bb_edge_new.ymin = 1;
-        bb_coord_new.ymin = y_new;
-    } else if (y_new == bb_coord_old.ymin) {
-        bb_edge_new.ymin = bb_edge_old.ymin + 1;
-    }
-}
-
-static void free_fast_cost_update() {
-    chanx_place_cost_fac.clear();
-    chany_place_cost_fac.clear();
-}
-
-static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
-    /* Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac *
-     * arrays with the inverse of the average number of tracks per channel   *
-     * between [subhigh] and [sublow].  This is only useful for the cost     *
-     * function that takes the length of the net bounding box in each        *
-     * dimension divided by the average number of tracks in that direction.  *
-     * For other cost functions, you don't have to bother calling this       *
-     * routine; when using the cost function described above, however, you   *
-     * must always call this routine after you call init_chan and before     *
-     * you do any placement cost determination.  The place_cost_exp factor   *
-     * specifies to what power the width of the channel should be taken --   *
-     * larger numbers make narrower channels more expensive.                 */
-
-    auto& device_ctx = g_vpr_ctx.device();
-
-    /* Access arrays below as chan?_place_cost_fac[subhigh][sublow].  Since   *
-     * subhigh must be greater than or equal to sublow, we only need to       *
-     * allocate storage for the lower half of a matrix.                       */
-
-    //chanx_place_cost_fac = new float*[(device_ctx.grid.height())];
-    //for (size_t i = 0; i < device_ctx.grid.height(); i++)
-    //    chanx_place_cost_fac[i] = new float[(i + 1)];
-
-    //chany_place_cost_fac = new float*[(device_ctx.grid.width() + 1)];
-    //for (size_t i = 0; i < device_ctx.grid.width(); i++)
-    //    chany_place_cost_fac[i] = new float[(i + 1)];
-
-    chanx_place_cost_fac.resize({device_ctx.grid.height(), device_ctx.grid.height() + 1});
-    chany_place_cost_fac.resize({device_ctx.grid.width(), device_ctx.grid.width() + 1});
-
-    /* First compute the number of tracks between channel high and channel *
-     * low, inclusive, in an efficient manner.                             */
-
-    chanx_place_cost_fac[0][0] = device_ctx.chan_width.x_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.height(); high++) {
-        chanx_place_cost_fac[high][high] = device_ctx.chan_width.x_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chanx_place_cost_fac[high][low] = chanx_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.x_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  The cost function divides by the average     *
-     * number of tracks per channel, so by storing the inverse I convert   *
-     * this to a faster multiplication.  Take this final number to the     *
-     * place_cost_exp power -- numbers other than one mean this is no      *
-     * longer a simple "average number of tracks"; it is some power of     *
-     * that, allowing greater penalization of narrow channels.             */
-
-    for (size_t high = 0; high < device_ctx.grid.height(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chanx_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
-                chanx_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chanx_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chanx_place_cost_fac[high][low];
-            chanx_place_cost_fac[high][low] = pow(
-                (double)chanx_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-
-    /* Now do the same thing for the y-directed channels.  First get the  *
-     * number of tracks between channel high and channel low, inclusive.  */
-
-    chany_place_cost_fac[0][0] = device_ctx.chan_width.y_list[0];
-
-    for (size_t high = 1; high < device_ctx.grid.width(); high++) {
-        chany_place_cost_fac[high][high] = device_ctx.chan_width.y_list[high];
-        for (size_t low = 0; low < high; low++) {
-            chany_place_cost_fac[high][low] = chany_place_cost_fac[high - 1][low]
-                                              + device_ctx.chan_width.y_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low.  Take to specified power.                     */
-
-    for (size_t high = 0; high < device_ctx.grid.width(); high++)
-        for (size_t low = 0; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chany_place_cost_fac[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
-                chany_place_cost_fac[high][low] = 1.0f;
-            }
-
-            chany_place_cost_fac[high][low] = (high - low + 1.)
-                                              / chany_place_cost_fac[high][low];
-            chany_place_cost_fac[high][low] = pow(
-                (double)chany_place_cost_fac[high][low],
-                (double)place_cost_exp);
-        }
-}
-
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index d65d460446c..36c544ef344 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -383,5 +383,4 @@ inline bool is_loc_on_chip(t_physical_tile_loc loc) {
  *        require to check for all legality constraints.
  */
 bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_legality);
-
 #endif
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 220690cee21..6f47cf100cb 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -434,8 +434,8 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_
     return AtomPinId::INVALID();
 }
 
-/* Return the net pin which drive the CLB input connected to sink_pb_pin_id, or nullptr if none (i.e. driven internally)
- *   clb: Block in which the the sink pin is located on
+/* Return the net pin which drives the CLB input connected to sink_pb_pin_id, or nullptr if none (i.e. driven internally)
+ *   clb: Block on which the sink pin is located
  *   sink_pb_pin_id: The physical pin index of the sink pin on the block
  *
  *  Returns a tuple containing