diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp
index d085caa7d7d..5e0c3009766 100644
--- a/vpr/src/analytical_place/analytical_solver.cpp
+++ b/vpr/src/analytical_place/analytical_solver.cpp
@@ -236,41 +236,17 @@ void QPHybridSolver::init_linear_system() {
     A_sparse.setFromTriplets(tripletList.begin(), tripletList.end());
 }
 
-/**
- * @brief Helper method to update the linear system with anchors to the current
- *        partial placement.
- *
- * For each moveable block (with row = i) in the netlist:
- *      A[i][i] = A[i][i] + coeff_pseudo_anchor;
- *      b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor;
- * Where coeff_pseudo_anchor grows with each iteration.
- *
- * This is basically a fast way of adding a connection between all moveable
- * blocks in the netlist and their target fixed placement location.
- *
- * See add_connection_to_system.
- *
- *  @param A_sparse_diff    The ceofficient matrix to update.
- *  @param b_x_diff         The x-dimension constant vector to update.
- *  @param b_y_diff         The y-dimension constant vector to update.
- *  @param p_placement      The location the moveable blocks should be anchored
- *                          to.
- *  @param num_moveable_blocks  The number of moveable blocks in the netlist.
- *  @param row_id_to_blk_id     Lookup for the row id from the APBlock Id.
- *  @param iteration        The current iteration of the Global Placer.
- */
-static inline void update_linear_system_with_anchors(Eigen::SparseMatrix<double>& A_sparse_diff,
-                                                     Eigen::VectorXd& b_x_diff,
-                                                     Eigen::VectorXd& b_y_diff,
-                                                     PartialPlacement& p_placement,
-                                                     size_t num_moveable_blocks,
-                                                     vtr::vector<APRowId, APBlockId> row_id_to_blk_id,
-                                                     unsigned iteration) {
+void QPHybridSolver::update_linear_system_with_anchors(
+    Eigen::SparseMatrix<double>& A_sparse_diff,
+    Eigen::VectorXd& b_x_diff,
+    Eigen::VectorXd& b_y_diff,
+    PartialPlacement& p_placement,
+    unsigned iteration) {
     // Anchor weights grow exponentially with iteration.
-    double coeff_pseudo_anchor = 0.01 * std::exp((double)iteration / 5);
-    for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks; row_id_idx++) {
+    double coeff_pseudo_anchor = anchor_weight_mult_ * std::exp((double)iteration / anchor_weight_exp_fac_);
+    for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
         APRowId row_id = APRowId(row_id_idx);
-        APBlockId blk_id = row_id_to_blk_id[row_id];
+        APBlockId blk_id = row_id_to_blk_id_[row_id];
         double pseudo_w = coeff_pseudo_anchor;
         A_sparse_diff.coeffRef(row_id_idx, row_id_idx) += pseudo_w;
         b_x_diff(row_id_idx) += pseudo_w * p_placement.block_x_locs[blk_id];
@@ -289,8 +265,7 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
     //                         anchor-points (fixed block positions).
     if (iteration != 0) {
         update_linear_system_with_anchors(A_sparse_diff, b_x_diff, b_y_diff,
-                                          p_placement, num_moveable_blocks_,
-                                          row_id_to_blk_id_, iteration);
+                                          p_placement, iteration);
     }
     // Verify that the constant vectors are valid.
     VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!");
diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h
index bab26ab3b9d..02e5bafd8b1 100644
--- a/vpr/src/analytical_place/analytical_solver.h
+++ b/vpr/src/analytical_place/analytical_solver.h
@@ -155,6 +155,21 @@ class QPHybridSolver : public AnalyticalSolver {
     /// sparse.
     static constexpr size_t star_num_pins_threshold = 3;
 
+    // The following constants are used to configure the anchor weighting.
+    // The weights of anchors grow exponentially each iteration by the following
+    // function:
+    //      anchor_w = anchor_weight_mult_ * e^(iter / anchor_weight_exp_fac_)
+    // The numbers below were empircally found to work well.
+
+    /// @brief Multiplier for the anchorweight. The smaller this number is, the
+    ///        weaker the anchors will be at the start.
+    static constexpr double anchor_weight_mult_ = 0.001;
+
+    /// @brief Factor for controlling the growth of the exponential term in the
+    ///        weight factor function. Larger numbers will cause the anchor
+    ///        weights to grow slower.
+    static constexpr double anchor_weight_exp_fac_ = 5.0;
+
     /**
      * @brief Initializes the linear system of Ax = b_x and Ay = b_y based on
      *        the APNetlist and the fixed APBlock locations.
@@ -165,6 +180,35 @@ class QPHybridSolver : public AnalyticalSolver {
      */
     void init_linear_system();
 
+    /**
+     * @brief Helper method to update the linear system with anchors to the
+     *        current partial placement.
+     *
+     * For each moveable block (with row = i) in the netlist:
+     *      A[i][i] = A[i][i] + coeff_pseudo_anchor;
+     *      b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor;
+     * Where coeff_pseudo_anchor grows with each iteration.
+     *
+     * This is basically a fast way of adding a connection between all moveable
+     * blocks in the netlist and their target fixed placement location.
+     *
+     * See add_connection_to_system.
+     *
+     *  @param A_sparse_diff    The ceofficient matrix to update.
+     *  @param b_x_diff         The x-dimension constant vector to update.
+     *  @param b_y_diff         The y-dimension constant vector to update.
+     *  @param p_placement      The location the moveable blocks should be
+     *                          anchored to.
+     *  @param num_moveable_blocks  The number of moveable blocks in the netlist.
+     *  @param row_id_to_blk_id     Lookup for the row id from the APBlock Id.
+     *  @param iteration        The current iteration of the Global Placer.
+     */
+    void update_linear_system_with_anchors(Eigen::SparseMatrix<double>& A_sparse_diff,
+                                           Eigen::VectorXd& b_x_diff,
+                                           Eigen::VectorXd& b_y_diff,
+                                           PartialPlacement& p_placement,
+                                           unsigned iteration);
+
     // The following variables represent the linear system without any anchor
     // points. These are filled in the constructor and never modified.
     // When the anchor-points are taken into consideration, the diagonal of the
diff --git a/vpr/src/analytical_place/flat_placement_bins.h b/vpr/src/analytical_place/flat_placement_bins.h
index 89cd8900eb8..e94ed958312 100644
--- a/vpr/src/analytical_place/flat_placement_bins.h
+++ b/vpr/src/analytical_place/flat_placement_bins.h
@@ -111,7 +111,6 @@ class FlatPlacementBins {
     inline const vtr::Rect<double>& bin_region(FlatPlacementBinId bin_id) const {
         VTR_ASSERT(bin_id.is_valid());
         return bin_region_[bin_id];
-        ;
     }
 
     /**
diff --git a/vpr/src/analytical_place/flat_placement_density_manager.cpp b/vpr/src/analytical_place/flat_placement_density_manager.cpp
index 11209e19759..f4c37b191c8 100644
--- a/vpr/src/analytical_place/flat_placement_density_manager.cpp
+++ b/vpr/src/analytical_place/flat_placement_density_manager.cpp
@@ -80,6 +80,7 @@ FlatPlacementDensityManager::FlatPlacementDensityManager(const APNetlist& ap_net
                 auto tile_type = device_grid.get_physical_type(tile_loc);
                 int tw = tile_type->width;
                 int th = tile_type->height;
+                VTR_ASSERT_SAFE(tw != 0 && th != 0);
                 vtr::Rect<double> new_bin_region(vtr::Point<double>(x, y),
                                                  vtr::Point<double>(x + tw,
                                                                     y + th));
@@ -162,6 +163,10 @@ void FlatPlacementDensityManager::remove_block_from_bin(APBlockId blk_id,
 }
 
 void FlatPlacementDensityManager::import_placement_into_bins(const PartialPlacement& p_placement) {
+    // Empty the bins such that all blocks are no longer within the bins.
+    empty_bins();
+
+    // Insert each block in the netlist into their bin based on their placement.
     // TODO: Maybe import the fixed block locations in the constructor and then
     //       only import the moveable block locations.
     for (APBlockId blk_id : ap_netlist_.blocks()) {
@@ -215,9 +220,9 @@ void FlatPlacementDensityManager::empty_bins() {
     // Reset all of the bins and their utilizations.
     for (FlatPlacementBinId bin_id : bins_.bins()) {
         bins_.remove_all_blocks_from_bin(bin_id);
-        bin_utilization_[bin_id] = PrimitiveVector();
-        bin_overfill_[bin_id] = calc_bin_overfill(bin_utilization_[bin_id], bin_capacity_[bin_id]);
-        bin_underfill_[bin_id] = calc_bin_underfill(bin_utilization_[bin_id], bin_capacity_[bin_id]);
+        bin_utilization_[bin_id].clear();
+        bin_overfill_[bin_id].clear();
+        bin_underfill_[bin_id] = bin_capacity_[bin_id];
     }
     // Once all the bins are reset, all bins should be empty; therefore no bins
     // are overfilled.
diff --git a/vpr/src/analytical_place/flat_placement_density_manager.h b/vpr/src/analytical_place/flat_placement_density_manager.h
index d2038bbe34a..ad3977589c1 100644
--- a/vpr/src/analytical_place/flat_placement_density_manager.h
+++ b/vpr/src/analytical_place/flat_placement_density_manager.h
@@ -185,6 +185,9 @@ class FlatPlacementDensityManager {
      * @brief Import the given flat placement into the bins.
      *
      * This will place AP blocks into the bins that they are placed over.
+     *
+     * This will reset the bins before importing the placement. Anything inside
+     * the bins will be removed.
      */
     void import_placement_into_bins(const PartialPlacement& p_placement);
 
diff --git a/vpr/src/analytical_place/flat_placement_mass_calculator.cpp b/vpr/src/analytical_place/flat_placement_mass_calculator.cpp
index a7e34120357..c99aaf29339 100644
--- a/vpr/src/analytical_place/flat_placement_mass_calculator.cpp
+++ b/vpr/src/analytical_place/flat_placement_mass_calculator.cpp
@@ -234,6 +234,7 @@ static void print_capacities(const std::vector<PrimitiveVector>& logical_block_t
         VTR_LOG("\n");
     }
     VTR_LOG("\n");
+    // TODO: Print the masses of each model.
 }
 
 FlatPlacementMassCalculator::FlatPlacementMassCalculator(const APNetlist& ap_netlist,
diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp
index 4c51e454f18..0bcbb8f7052 100644
--- a/vpr/src/analytical_place/global_placer.cpp
+++ b/vpr/src/analytical_place/global_placer.cpp
@@ -13,12 +13,16 @@
 #include "analytical_solver.h"
 #include "ap_flow_enums.h"
 #include "ap_netlist.h"
+#include "ap_netlist_fwd.h"
 #include "atom_netlist.h"
 #include "device_grid.h"
+#include "flat_placement_bins.h"
 #include "flat_placement_density_manager.h"
+#include "globals.h"
 #include "partial_legalizer.h"
 #include "partial_placement.h"
 #include "physical_types.h"
+#include "primitive_vector.h"
 #include "vpr_error.h"
 #include "vtr_log.h"
 #include "vtr_time.h"
@@ -90,9 +94,74 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type,
     partial_legalizer_ = make_partial_legalizer(partial_legalizer_type,
                                                 ap_netlist_,
                                                 density_manager_,
+                                                prepacker,
                                                 log_verbosity_);
 }
 
+/**
+ * @brief Helper method to print the statistics on the given partial placement.
+ */
+static void print_placement_stats(const PartialPlacement& p_placement,
+                                  const APNetlist& ap_netlist,
+                                  FlatPlacementDensityManager& density_manager) {
+    // Print the placement HPWL
+    VTR_LOG("\tPlacement HPWL: %f\n", p_placement.get_hpwl(ap_netlist));
+
+    // Print density information. Need to reset the density manager to ensure
+    // the data is valid.
+    density_manager.import_placement_into_bins(p_placement);
+
+    // Print the number of overfilled bins.
+    size_t num_overfilled_bins = density_manager.get_overfilled_bins().size();
+    VTR_LOG("\tNumber of overfilled bins: %zu\n", num_overfilled_bins);
+
+    // Print the average overfill
+    float total_overfill = 0.0f;
+    for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
+        total_overfill += density_manager.get_bin_overfill(bin_id).manhattan_norm();
+    }
+    float avg_overfill = 0.0f;
+    if (num_overfilled_bins != 0)
+        avg_overfill = total_overfill / static_cast<float>(num_overfilled_bins);
+    VTR_LOG("\tAverage overfill magnitude: %f\n", avg_overfill);
+
+    // Print the number of overfilled tiles per type.
+    const auto& physical_tile_types = g_vpr_ctx.device().physical_tile_types;
+    const auto& device_grid = g_vpr_ctx.device().grid;
+    std::vector<unsigned> overfilled_tiles_by_type(physical_tile_types.size(), 0);
+    for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
+        const auto& bin_region = density_manager.flat_placement_bins().bin_region(bin_id);
+        auto tile_loc = t_physical_tile_loc((int)bin_region.xmin(),
+                                            (int)bin_region.ymin(),
+                                            0);
+        auto tile_type = device_grid.get_physical_type(tile_loc);
+        overfilled_tiles_by_type[tile_type->index]++;
+    }
+    VTR_LOG("\tOverfilled bins by tile type:\n");
+    for (size_t type_idx = 0; type_idx < physical_tile_types.size(); type_idx++) {
+        VTR_LOG("\t\t%10s: %zu\n",
+                physical_tile_types[type_idx].name.c_str(),
+                overfilled_tiles_by_type[type_idx]);
+    }
+
+    // Count the number of blocks that were placed in a bin which they cannot
+    // physically be placed into (according to their mass).
+    unsigned num_misplaced_blocks = 0;
+    for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) {
+        for (APBlockId ap_blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) {
+            // Get the blk mass and project it onto the capacity of its bin.
+            PrimitiveVector blk_mass = density_manager.mass_calculator().get_block_mass(ap_blk_id);
+            PrimitiveVector projected_mass = blk_mass;
+            projected_mass.project(density_manager.get_bin_capacity(bin_id));
+            // If the projected mass does not match its match, this implies that
+            // there this block does not belong in this bin.
+            if (projected_mass != blk_mass)
+                num_misplaced_blocks++;
+        }
+    }
+    VTR_LOG("\tNumber of blocks in an incompatible bin: %zu\n", num_misplaced_blocks);
+}
+
 /**
  * @brief Helper method to print the header of the per-iteration status updates
  *        of the global placer.
@@ -177,6 +246,13 @@ PartialPlacement SimPLGlobalPlacer::place() {
         if (hpwl_relative_gap < target_hpwl_relative_gap_)
             break;
     }
+
+    // Print some statistics on the final placement.
+    VTR_LOG("Placement after Global Placement:\n");
+    print_placement_stats(p_placement,
+                          ap_netlist_,
+                          *density_manager_);
+
     // Return the placement from the final iteration.
     // TODO: investigate saving the best solution found so far. It should be
     //       cheap to save a copy of the PartialPlacement object.
diff --git a/vpr/src/analytical_place/global_placer.h b/vpr/src/analytical_place/global_placer.h
index 9a120c8e5a8..196de86220c 100644
--- a/vpr/src/analytical_place/global_placer.h
+++ b/vpr/src/analytical_place/global_placer.h
@@ -116,7 +116,8 @@ class SimPLGlobalPlacer : public GlobalPlacer {
     ///        lower-bound placements. The placer will stop if the difference
     ///        between the two bounds, normalized to the upper-bound, is smaller
     ///        than this number.
-    static constexpr double target_hpwl_relative_gap_ = 0.10;
+    ///        This number was empircally found to work well.
+    static constexpr double target_hpwl_relative_gap_ = 0.05;
 
     /// @brief The solver which generates the lower-bound placement.
     std::unique_ptr<AnalyticalSolver> solver_;
diff --git a/vpr/src/analytical_place/model_grouper.cpp b/vpr/src/analytical_place/model_grouper.cpp
new file mode 100644
index 00000000000..0aca963c96a
--- /dev/null
+++ b/vpr/src/analytical_place/model_grouper.cpp
@@ -0,0 +1,184 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    March 2025
+ * @brief   Implementation of a model grouper class which groups models together
+ *          which must be legalized together in a flat placement.
+ */
+
+#include "model_grouper.h"
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include "cad_types.h"
+#include "logic_types.h"
+#include "prepack.h"
+#include "vtr_assert.h"
+#include "vtr_log.h"
+
+/**
+ * @brief Recursive helper function which gets the models in the given pattern
+ *        block.
+ *
+ *  @param pattern_block
+ *      The pattern block to get the models of.
+ *  @param models
+ *      A set of the models found so far.
+ *  @param block_visited
+ *      A vector of flags for each pattern block to signify which blocks have
+ *      been visited.
+ */
+static void get_pattern_models_recurr(t_pack_pattern_block* pattern_block,
+                                      std::unordered_set<int>& models,
+                                      std::vector<bool>& block_visited) {
+    // If the pattern block is invalid or this block has been visited, return.
+    if (pattern_block == nullptr || block_visited[pattern_block->block_id]) {
+        return;
+    }
+
+    // Mark this block as visited and insert its model into the models vector.
+    block_visited[pattern_block->block_id] = true;
+    models.insert(pattern_block->pb_type->model->index);
+
+    // Go through this block's connections and get their pattern models.
+    t_pack_pattern_connections* connection = pattern_block->connections;
+    while (connection != nullptr) {
+        get_pattern_models_recurr(connection->from_block, models, block_visited);
+        get_pattern_models_recurr(connection->to_block, models, block_visited);
+        connection = connection->next;
+    }
+}
+
+/**
+ * @brief Entry point into the recursive function above. Gets the models in
+ *        the given pack pattern.
+ */
+static std::unordered_set<int> get_pattern_models(const t_pack_patterns& pack_pattern) {
+    std::unordered_set<int> models_in_pattern;
+
+    // Initialize the visited flags for each block to false.
+    std::vector<bool> block_visited(pack_pattern.num_blocks, false);
+    // Begin the recursion with the root block.
+    get_pattern_models_recurr(pack_pattern.root_block, models_in_pattern, block_visited);
+
+    return models_in_pattern;
+}
+
+ModelGrouper::ModelGrouper(const Prepacker& prepacker,
+                           t_model* user_models,
+                           t_model* library_models,
+                           int log_verbosity) {
+    /**
+     * Group the models together based on their pack patterns. If model A and
+     * model B form a pattern, and model B and model C form a pattern, then
+     * models A, B, and C are in a group together.
+     *
+     * An efficient way to find this is to represent this problem as a graph,
+     * where each node is a model and each edge is a relationship where a model
+     * is in a pack pattern with another model. We can then perform BFS to find
+     * the connected sub-graphs which will be the groups.
+     */
+
+    // Get the number of models
+    // TODO: Clean up the models vectors in VTR.
+    std::unordered_map<int, char*> model_name;
+    unsigned num_models = 0;
+    t_model* model = library_models;
+    while (model != nullptr) {
+        model_name[model->index] = model->name;
+        num_models++;
+        model = model->next;
+    }
+    model = user_models;
+    while (model != nullptr) {
+        model_name[model->index] = model->name;
+        num_models++;
+        model = model->next;
+    }
+
+    // Create an adjacency list for the edges. An edge is formed where two
+    // models share a pack pattern together.
+    std::vector<std::unordered_set<int>> adj_list(num_models);
+    for (const t_pack_patterns& pack_pattern : prepacker.get_all_pack_patterns()) {
+        // Get the models within this pattern.
+        auto models_in_pattern = get_pattern_models(pack_pattern);
+        VTR_ASSERT_SAFE(!models_in_pattern.empty());
+
+        // Debug print the models within the pattern.
+        if (log_verbosity >= 20) {
+            VTR_LOG("Pattern: %s\n\t", pack_pattern.name);
+            for (int model_idx : models_in_pattern) {
+                VTR_LOG("%s ", model_name[model_idx]);
+            }
+            VTR_LOG("\n");
+        }
+
+        // Connect each of the models to the first model in the pattern. Since
+        // we only care if there exist a path from each model to another, we do
+        // not need to connect the models in a clique.
+        int first_model_idx = *models_in_pattern.begin();
+        for (int model_idx : models_in_pattern) {
+            adj_list[model_idx].insert(first_model_idx);
+            adj_list[first_model_idx].insert(model_idx);
+        }
+    }
+
+    // Perform BFS to group the models.
+    VTR_LOGV(log_verbosity >= 20,
+             "Finding model groups...\n");
+    std::queue<int> node_queue;
+    model_group_id_.resize(num_models, ModelGroupId::INVALID());
+    for (int model_idx = 0; model_idx < (int)num_models; model_idx++) {
+        // If this model is already in a group, skip it.
+        if (model_group_id_[model_idx].is_valid()) {
+            VTR_LOGV(log_verbosity >= 20,
+                     "\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]);
+            continue;
+        }
+
+        ModelGroupId group_id = ModelGroupId(group_ids_.size());
+        // Put the model in this group and push to the queue.
+        model_group_id_[model_idx] = group_id;
+        node_queue.push(model_idx);
+
+        while (!node_queue.empty()) {
+            // Pop a node from the queue, and explore its neighbors.
+            int node_model_idx = node_queue.front();
+            node_queue.pop();
+            for (int neighbor_model_idx : adj_list[node_model_idx]) {
+                // If this neighbor is already in this group, skip it.
+                if (model_group_id_[neighbor_model_idx].is_valid()) {
+                    VTR_ASSERT_SAFE(model_group_id_[neighbor_model_idx] == group_id);
+                    continue;
+                }
+                // Put the neighbor in this group and push it to the queue.
+                model_group_id_[neighbor_model_idx] = group_id;
+                node_queue.push(neighbor_model_idx);
+            }
+        }
+
+        VTR_LOGV(log_verbosity >= 20,
+                 "\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]);
+        group_ids_.push_back(group_id);
+    }
+
+    // Create a lookup between each group and the models it contains.
+    groups_.resize(groups().size());
+    for (int model_idx = 0; model_idx < (int)num_models; model_idx++) {
+        groups_[model_group_id_[model_idx]].push_back(model_idx);
+    }
+
+    // Debug printing for each group.
+    if (log_verbosity >= 20) {
+        for (ModelGroupId group_id : groups()) {
+            const std::vector<int>& group = groups_[group_id];
+            VTR_LOG("Group %zu:\n", group_id);
+            VTR_LOG("\tSize = %zu\n", group.size());
+            VTR_LOG("\tContained models:\n");
+            for (int model_idx : group) {
+                VTR_LOG("\t\t%s\n", model_name[model_idx]);
+            }
+        }
+    }
+}
diff --git a/vpr/src/analytical_place/model_grouper.h b/vpr/src/analytical_place/model_grouper.h
new file mode 100644
index 00000000000..d5a9113d6c1
--- /dev/null
+++ b/vpr/src/analytical_place/model_grouper.h
@@ -0,0 +1,115 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    March 2025
+ * @brief   Declaration of a model grouper class which groups together models
+ *          that must be legalized together in a flat placement.
+ */
+
+#pragma once
+
+#include <vector>
+#include "vtr_assert.h"
+#include "vtr_range.h"
+#include "vtr_strong_id.h"
+#include "vtr_vector.h"
+#include "vtr_vector_map.h"
+
+// Forward declarations.
+class Prepacker;
+struct t_model;
+
+/// @brief Tag for the ModelGroupId
+struct model_group_id_tag;
+
+/// @brief A unique ID of a group of models created by the ModelGrouper class.
+typedef vtr::StrongId<model_group_id_tag, size_t> ModelGroupId;
+
+/**
+ * @brief A manager class for grouping together models that must be legalized
+ *        together in a flat placement due to how they form molecules with each
+ *        other.
+ *
+ * When performing legalization of a flat placement, it is desirable to split
+ * the problem into independent legalization problems. We cannot place all of
+ * the blocks of different model types independently since some blocks are made
+ * of multiple different types of models. We wish to find the minimum number of
+ * models that we need to legalize at the same time.
+ *
+ * This class groups models together based on the pack patterns that they can
+ * form in the prepacker. If model A and model B can form a pack pattern, and
+ * model B and model C can form a pack pattern, then models A, B, and C form a
+ * group and must be legalized together.
+ *
+ * This class also manages what models each group contains and the group of each
+ * model, where the user can use IDs to get relavent information.
+ */
+class ModelGrouper {
+  public:
+    // Iterator for the model group IDs
+    typedef typename vtr::vector_map<ModelGroupId, ModelGroupId>::const_iterator group_iterator;
+
+    // Range for the model group IDs
+    typedef typename vtr::Range<group_iterator> group_range;
+
+  public:
+    ModelGrouper() = delete;
+
+    /**
+     * @brief Constructor for the model grouper class. Groups are formed here.
+     *
+     *  @param prepacker
+     *      The prepacker used to create molecules in the flat placement. This
+     *      provides the pack patterns for forming the groups.
+     *  @param user_models
+     *      Linked list of user-provided models.
+     *  @param library_models
+     *      Linked list of library models.
+     *  @param log_verbosity
+     *      The verbosity of log messages in the grouper class.
+     */
+    ModelGrouper(const Prepacker& prepacker,
+                 t_model* user_models,
+                 t_model* library_models,
+                 int log_verbosity);
+
+    /**
+     * @brief Returns a list of all valid group IDs.
+     */
+    inline group_range groups() const {
+        return vtr::make_range(group_ids_.begin(), group_ids_.end());
+    }
+
+    /**
+     * @brief Gets the group ID of the given model.
+     */
+    inline ModelGroupId get_model_group_id(int model_index) const {
+        VTR_ASSERT_SAFE_MSG(model_index < (int)model_group_id_.size(),
+                            "Model index outside of range for model_group_id_");
+        ModelGroupId group_id = model_group_id_[model_index];
+        VTR_ASSERT_SAFE_MSG(group_id.is_valid(),
+                            "Model is not in a group");
+        return group_id;
+    }
+
+    /**
+     * @brief Gets the models in the given group.
+     */
+    inline const std::vector<int>& get_models_in_group(ModelGroupId group_id) const {
+        VTR_ASSERT_SAFE_MSG(group_id.is_valid(),
+                            "Invalid group id");
+        VTR_ASSERT_SAFE_MSG(groups_[group_id].size() != 0,
+                            "Group is empty");
+        return groups_[group_id];
+    }
+
+  private:
+    /// @brief List of all group IDs.
+    vtr::vector_map<ModelGroupId, ModelGroupId> group_ids_;
+
+    /// @brief A lookup between models and the group ID that contains them.
+    std::vector<ModelGroupId> model_group_id_;
+
+    /// @brief A lookup between each group ID and the models in that group.
+    vtr::vector<ModelGroupId, std::vector<int>> groups_;
+};
diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp
index f91f66b74e0..3d5d8dd25e9 100644
--- a/vpr/src/analytical_place/partial_legalizer.cpp
+++ b/vpr/src/analytical_place/partial_legalizer.cpp
@@ -11,6 +11,8 @@
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
+#include <functional>
+#include <iterator>
 #include <limits>
 #include <memory>
 #include <queue>
@@ -23,22 +25,27 @@
 #include "flat_placement_density_manager.h"
 #include "flat_placement_mass_calculator.h"
 #include "globals.h"
+#include "model_grouper.h"
 #include "partial_placement.h"
 #include "physical_types.h"
+#include "prepack.h"
 #include "primitive_vector.h"
 #include "vpr_context.h"
 #include "vpr_error.h"
 #include "vtr_assert.h"
 #include "vtr_geometry.h"
 #include "vtr_log.h"
+#include "vtr_math.h"
 #include "vtr_prefix_sum.h"
 #include "vtr_strong_id.h"
+#include "vtr_time.h"
 #include "vtr_vector.h"
 #include "vtr_vector_map.h"
 
 std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer legalizer_type,
                                                          const APNetlist& netlist,
                                                          std::shared_ptr<FlatPlacementDensityManager> density_manager,
+                                                         const Prepacker& prepacker,
                                                          int log_verbosity) {
     // Based on the partial legalizer type passed in, build the partial legalizer.
     switch (legalizer_type) {
@@ -49,6 +56,7 @@ std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer leg
         case e_partial_legalizer::BI_PARTITIONING:
             return std::make_unique<BiPartitioningPartialLegalizer>(netlist,
                                                                     density_manager,
+                                                                    prepacker,
                                                                     log_verbosity);
         default:
             VPR_FATAL_ERROR(VPR_ERROR_AP,
@@ -592,8 +600,6 @@ static void print_flow_based_legalizer_status(size_t iteration,
 void FlowBasedLegalizer::legalize(PartialPlacement& p_placement) {
     VTR_LOGV(log_verbosity_ >= 10, "Running Flow-Based Legalizer\n");
 
-    // Reset the bins from the previous iteration and prepare for this iteration.
-    density_manager_->empty_bins();
     // Import the partial placement into bins.
     density_manager_->import_placement_into_bins(p_placement);
     // Verify that the placement was imported correctly.
@@ -696,104 +702,375 @@ void FlowBasedLegalizer::legalize(PartialPlacement& p_placement) {
     density_manager_->export_placement_from_bins(p_placement);
 }
 
-// This namespace contains enums and classes used for bi-partitioning.
-namespace {
+PerModelPrefixSum2D::PerModelPrefixSum2D(const FlatPlacementDensityManager& density_manager,
+                                         t_model* user_models,
+                                         t_model* library_models,
+                                         std::function<float(int, size_t, size_t)> lookup) {
+    // Get the number of models in the architecture.
+    // TODO: We really need to clean up how models are stored in VPR...
+    t_model* cur = user_models;
+    int num_models = 0;
+    while (cur != nullptr) {
+        num_models++;
+        cur = cur->next;
+    }
+    cur = library_models;
+    while (cur != nullptr) {
+        num_models++;
+        cur = cur->next;
+    }
 
-/**
- * @brief Enum for the direction of a partition.
- */
-enum class e_partition_dir {
-    VERTICAL,
-    HORIZONTAL
-};
+    // Get the size that the prefix sums should be.
+    size_t width, height, layers;
+    std::tie(width, height, layers) = density_manager.get_overall_placeable_region_size();
 
-/**
- * @brief Spatial window used to spread the blocks contained within.
- *
- * This window's region is identified and grown until it has enough space to
- * accomodate the blocks stored within. This window is then successivly
- * partitioned until it is small enough (blocks are not too dense).
- */
-struct SpreadingWindow {
-    /// @brief The blocks contained within this window.
-    std::vector<APBlockId> contained_blocks;
+    // Create each of the prefix sums.
+    model_prefix_sum_.resize(num_models);
+    for (int model_index = 0; model_index < num_models; model_index++) {
+        model_prefix_sum_[model_index] = vtr::PrefixSum2D<float>(
+            width,
+            height,
+            [&](size_t x, size_t y) {
+                return lookup(model_index, x, y);
+            });
+    }
+}
 
-    /// @brief The 2D region of space that this window covers.
-    vtr::Rect<double> region;
-};
+float PerModelPrefixSum2D::get_model_sum(int model_index,
+                                         const vtr::Rect<double>& region) const {
+    VTR_ASSERT_SAFE(model_index < (int)model_prefix_sum_.size() && model_index >= 0);
+    // Get the sum over the given region.
+    return model_prefix_sum_[model_index].get_sum(region.xmin(),
+                                                  region.ymin(),
+                                                  region.xmax() - 1,
+                                                  region.ymax() - 1);
+}
 
-} // namespace
+PrimitiveVector PerModelPrefixSum2D::get_sum(const std::vector<int>& model_indices,
+                                             const vtr::Rect<double>& region) const {
+    PrimitiveVector res;
+    for (int model_index : model_indices) {
+        VTR_ASSERT_SAFE(res.get_dim_val(model_index) == 0.0f);
+        res.set_dim_val(model_index, get_model_sum(model_index, region));
+    }
+    return res;
+}
 
 BiPartitioningPartialLegalizer::BiPartitioningPartialLegalizer(
     const APNetlist& netlist,
     std::shared_ptr<FlatPlacementDensityManager> density_manager,
+    const Prepacker& prepacker,
     int log_verbosity)
     : PartialLegalizer(netlist, log_verbosity)
-    , density_manager_(density_manager) {}
+    , density_manager_(density_manager)
+    , model_grouper_(prepacker,
+                     g_vpr_ctx.device().arch->models,
+                     g_vpr_ctx.device().arch->model_library,
+                     log_verbosity) {
+    // Compute the capacity prefix sum. Capacity is assumed to not change
+    // between iterations of the partial legalizer.
+    capacity_prefix_sum_ = PerModelPrefixSum2D(
+        *density_manager,
+        g_vpr_ctx.device().arch->models,
+        g_vpr_ctx.device().arch->model_library,
+        [&](int model_index, size_t x, size_t y) {
+            // Get the bin at this grid location.
+            FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0);
+            // Get the capacity of the bin for this model.
+            float cap = density_manager_->get_bin_capacity(bin_id).get_dim_val(model_index);
+            VTR_ASSERT_SAFE(cap >= 0.0f);
+            // Bins may be large, but the prefix sum assumes a 1x1 grid of
+            // values. Normalize by the area of the bin to turn this into
+            // a 1x1 bin equivalent.
+            const vtr::Rect<double>& bin_region = density_manager_->flat_placement_bins().bin_region(bin_id);
+            float bin_area = bin_region.width() * bin_region.height();
+            VTR_ASSERT_SAFE(!vtr::isclose(bin_area, 0.f));
+            return cap / bin_area;
+        });
+}
+
+void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) {
+    VTR_LOGV(log_verbosity_ >= 10, "Running Bi-Partitioning Legalizer\n");
+
+    // Prepare the density manager.
+    density_manager_->import_placement_into_bins(p_placement);
+
+    // Quick return. If there are no overfilled bins, there is nothing to spread.
+    if (density_manager_->get_overfilled_bins().size() == 0) {
+        VTR_LOGV(log_verbosity_ >= 10, "No overfilled bins. Nothing to legalize.\n");
+        return;
+    }
+
+    if (log_verbosity_ >= 10) {
+        size_t num_overfilled_bins = density_manager_->get_overfilled_bins().size();
+        VTR_LOG("\tNumber of overfilled blocks before legalization: %zu\n",
+                num_overfilled_bins);
+        // FIXME: Make this a method in the density manager class.
+        float avg_overfill = 0.f;
+        for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) {
+            avg_overfill += density_manager_->get_bin_overfill(overfilled_bin_id).manhattan_norm();
+        }
+        VTR_LOG("\t\tAverage overfill per overfilled bin: %f\n",
+                avg_overfill / static_cast<float>(num_overfilled_bins));
+    }
+
+    // 1) Identify the groups that need to be spread
+    std::unordered_set<ModelGroupId> groups_to_spread;
+    for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) {
+        // Get the overfilled models in this bin.
+        const PrimitiveVector& overfill = density_manager_->get_bin_overfill(overfilled_bin_id);
+        std::vector<int> overfilled_models = overfill.get_non_zero_dims();
+        // For each model, insert its group into the set. Set will handle dupes.
+        for (int model_index : overfilled_models) {
+            groups_to_spread.insert(model_grouper_.get_model_group_id(model_index));
+        }
+    }
+
+    // 2) For each group, identify non-overlapping windows and spread
+    vtr::Timer runtime_timer;
+    float window_identification_time = 0.0f;
+    float window_spreading_time = 0.0f;
+    for (ModelGroupId group_id : groups_to_spread) {
+        VTR_LOGV(log_verbosity_ >= 10, "\tSpreading group %zu\n", group_id);
+        // Identify non-overlapping spreading windows.
+        float window_identification_start_time = runtime_timer.elapsed_sec();
+        auto non_overlapping_windows = identify_non_overlapping_windows(group_id);
+        window_identification_time += runtime_timer.elapsed_sec() - window_identification_start_time;
+        VTR_ASSERT(non_overlapping_windows.size() != 0);
+
+        // Spread the blocks over the non-overlapping windows.
+        float window_spreading_start_time = runtime_timer.elapsed_sec();
+        spread_over_windows(non_overlapping_windows, p_placement, group_id);
+        window_spreading_time += runtime_timer.elapsed_sec() - window_spreading_start_time;
+    }
+
+    // FIXME: Remove this duplicate code...
+    if (log_verbosity_ >= 10) {
+        size_t num_overfilled_bins = density_manager_->get_overfilled_bins().size();
+        VTR_LOG("\tNumber of overfilled blocks after legalization: %zu\n",
+                num_overfilled_bins);
+        // FIXME: Make this a method in the density manager class.
+        float avg_overfill = 0.f;
+        for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) {
+            avg_overfill += density_manager_->get_bin_overfill(overfilled_bin_id).manhattan_norm();
+        }
+        VTR_LOG("\t\tAverage overfill per overfilled bin: %f\n",
+                avg_overfill / static_cast<float>(num_overfilled_bins));
+        VTR_LOG("\tTime spent identifying windows: %g\n", window_identification_time);
+        VTR_LOG("\tTime spent spreading windows: %g\n", window_spreading_time);
+    }
+
+    // Export the legalized placement to the partial placement.
+    density_manager_->export_placement_from_bins(p_placement);
+}
+
+std::vector<SpreadingWindow> BiPartitioningPartialLegalizer::identify_non_overlapping_windows(ModelGroupId group_id) {
+
+    // 1) Cluster the overfilled bins. This will make creating minimum spanning
+    //    windows more efficient.
+    auto overfilled_bin_clusters = get_overfilled_bin_clusters(group_id);
+
+    // 2) For each of the overfilled bin clusters, create a minimum window such
+    //    that there is enough space in the window for the atoms inside.
+    auto windows = get_min_windows_around_clusters(overfilled_bin_clusters, group_id);
+
+    // 3) Merge overlapping windows.
+    merge_overlapping_windows(windows);
+
+    // TODO: Investigate shrinking the windows.
+
+    // 4) Move the blocks out of their bins and into the windows.
+    move_blocks_into_windows(windows, group_id);
+
+    return windows;
+}
 
 /**
- * @brief Identify spreading windows which contain overfilled bins on the device
- *        and do not overlap.
+ * @brief Helper method to check if the given PrimitiveVector has any values
+ *        in the model dimensions in the given group.
  *
- * This process is split into 3 stages:
- *      1) Identify overfilled bins and grow windows around them. These windows
- *         will grow until there is just enough space to accomodate the blocks
- *         within the window (capacity of the window is larger than the utilization).
- *      2) Merge overlapping windows.
- *      3) Move the blocks within these window regions from their bins into
- *         their windows. This updates the current utilization of bins, making
- *         spreading easier.
+ * This method assumes the vector is non-negative. If the vector had any negative
+ * dimensions, it does not make sense to ask if it is in the group or not.
  */
-static std::vector<SpreadingWindow> identify_non_overlapping_windows(
-    const APNetlist& netlist,
-    FlatPlacementDensityManager& density_manager) {
-    // Identify overfilled bins
-    const std::unordered_set<FlatPlacementBinId>& overfilled_bins = density_manager.get_overfilled_bins();
-
-    // Create a prefix sum for the capacity.
-    // We will need to get the capacity of 2D regions of the device very often
-    // in the algorithm below. This greatly improves the time complexity.
-    // TODO: This should not change between iterations of spreading. This can
-    //       be moved to the constructor.
-    size_t width, height, layers;
-    std::tie(width, height, layers) = density_manager.get_overall_placeable_region_size();
-    vtr::PrefixSum2D<float> capacity_prefix_sum(width, height, [&](size_t x, size_t y) {
-        FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0);
-        // For now we take the L1 norm of the bin divided by its area.
-        // The L1 norm is just a count of the number of primitives that
-        // can fit into the bin (without caring for primitive type). We
-        // divide by area such that large bins (1x4 for example) get
-        // normalized to 1x1 regions.
-        const vtr::Rect<double>& bin_region = density_manager.flat_placement_bins().bin_region(bin_id);
-        float bin_area = bin_region.width() * bin_region.height();
-        return density_manager.get_bin_capacity(bin_id).manhattan_norm() / bin_area;
-    });
+static bool is_vector_in_group(const PrimitiveVector& vec,
+                               ModelGroupId group_id,
+                               const ModelGrouper& model_grouper) {
+    VTR_ASSERT_SAFE(vec.is_non_negative());
+    const std::vector<int>& models_in_group = model_grouper.get_models_in_group(group_id);
+    for (int model_index : models_in_group) {
+        float dim_val = vec.get_dim_val(model_index);
+        if (dim_val != 0.0f)
+            return true;
+    }
+    return false;
+}
 
-    // Create a prefix sum for the utilization.
-    // The utilization of the bins will change between routing iterations, so
-    // this prefix sum must be recomputed.
-    vtr::PrefixSum2D<float> utilization_prefix_sum(width, height, [&](size_t x, size_t y) {
-        FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0);
-        // This is computed the same way as the capacity prefix sum above.
-        const vtr::Rect<double>& bin_region = density_manager.flat_placement_bins().bin_region(bin_id);
-        float bin_area = bin_region.width() * bin_region.height();
-        return density_manager.get_bin_utilization(bin_id).manhattan_norm() / bin_area;
-    });
+/**
+ * @brief Checks if the overfilled models in the given overfilled bin is in the
+ *        given model group.
+ *
+ * This method does not check if the bin could be in the given group (for
+ * example the capacity), this checks if the overfilled blocks are in the group.
+ */
+static bool is_overfilled_bin_in_group(FlatPlacementBinId overfilled_bin_id,
+                                       ModelGroupId group_id,
+                                       const FlatPlacementDensityManager& density_manager,
+                                       const ModelGrouper& model_grouper) {
+    const PrimitiveVector& bin_overfill = density_manager.get_bin_overfill(overfilled_bin_id);
+    VTR_ASSERT_SAFE(bin_overfill.is_non_zero());
+    return is_vector_in_group(bin_overfill, group_id, model_grouper);
+}
+
+/**
+ * @brief Checks if the given AP block is in the given model group.
+ *
+ * An AP block is in a model group if it contains any models in the model group.
+ */
+static bool is_block_in_group(APBlockId blk_id,
+                              ModelGroupId group_id,
+                              const FlatPlacementDensityManager& density_manager,
+                              const ModelGrouper& model_grouper) {
+    const PrimitiveVector& blk_mass = density_manager.mass_calculator().get_block_mass(blk_id);
+    return is_vector_in_group(blk_mass, group_id, model_grouper);
+}
+
+std::vector<FlatPlacementBinCluster> BiPartitioningPartialLegalizer::get_overfilled_bin_clusters(
+    ModelGroupId group_id) {
+    // Use BFS over the overfilled bins to cluster them.
+    std::vector<FlatPlacementBinCluster> overfilled_bin_clusters;
+    // Maintain the distance from the last overfilled bin
+    vtr::vector<FlatPlacementBinId, int> dist(density_manager_->flat_placement_bins().bins().size(), -1);
+    for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) {
+        // If this bin is not overfilled with the models in the group, skip.
+        if (!is_overfilled_bin_in_group(overfilled_bin_id,
+                                        group_id,
+                                        *density_manager_,
+                                        model_grouper_)) {
+            continue;
+        }
+        // If this bin is already in a cluster, skip.
+        if (dist[overfilled_bin_id] != -1)
+            continue;
+        dist[overfilled_bin_id] = 0;
+        // Collect nearby bins into a vector.
+        FlatPlacementBinCluster nearby_bins;
+        nearby_bins.push_back(overfilled_bin_id);
+        // Create a queue and insert the overfilled bin into it.
+        std::queue<FlatPlacementBinId> bin_queue;
+        bin_queue.push(overfilled_bin_id);
+        while (!bin_queue.empty()) {
+            // Pop a bin from queue.
+            FlatPlacementBinId bin_node = bin_queue.front();
+            bin_queue.pop();
+            // If the node's distance from an overfilled bin is the max gap,
+            // do not explore its neighbors.
+            if (dist[bin_node] > max_bin_cluster_gap_)
+                continue;
+            // Explore the neighbors of this bin.
+            for (FlatPlacementBinId neighbor : get_direct_neighbors_of_bin(bin_node, *density_manager_)) {
+                int neighbor_dist = dist[bin_node] + 1;
+                // If this neighbor has been explore with a better distance,
+                // do not explore it.
+                if (dist[neighbor] != -1 && dist[neighbor] <= neighbor_dist)
+                    continue;
+                // If the neighbor is an overfilled bin that we care about, add
+                // it to the list of nearby bins and set its distance to 0.
+                if (density_manager_->bin_is_overfilled(neighbor)
+                    && is_overfilled_bin_in_group(neighbor, group_id, *density_manager_, model_grouper_)) {
+                    nearby_bins.push_back(neighbor);
+                    dist[neighbor] = 0;
+                } else {
+                    dist[neighbor] = neighbor_dist;
+                }
+                // Enqueue the neighbor.
+                bin_queue.push(neighbor);
+            }
+        }
+
+        // Move the cluster into the vector of overfilled bin clusters.
+        overfilled_bin_clusters.push_back(std::move(nearby_bins));
+    }
+
+    return overfilled_bin_clusters;
+}
 
-    // 1) For each of the overfilled bins, create and store a minimum window.
-    // TODO: This is a very simple algorithm which currently only uses the number
-    //       of primitives within the regions, not the primitive types. Need to
-    //       investigate this further.
+/**
+ * @brief Helper method to decide if the given region's utilization is higher
+ *        than its capacity.
+ */
+static bool is_region_overfilled(const vtr::Rect<double>& region,
+                                 const PerModelPrefixSum2D& capacity_prefix_sum,
+                                 const PerModelPrefixSum2D& utilization_prefix_sum,
+                                 const std::vector<int>& model_indices) {
+    // Go through each model in the model group we are interested in.
+    for (int model_index : model_indices) {
+        // Get the capacity of this region for this model.
+        float region_model_capacity = capacity_prefix_sum.get_model_sum(model_index,
+                                                                        region);
+        // Get the utilization of this region for this model.
+        float region_model_utilization = utilization_prefix_sum.get_model_sum(model_index,
+                                                                              region);
+        // If the utilization is higher than the capacity, then this region is
+        // overfilled.
+        // TODO: Look into adding some head room to account for rounding.
+        if (region_model_utilization > region_model_capacity)
+            return true;
+    }
+
+    // If the utilization is less than or equal to the capacity for each model
+    // then this region is not overfilled.
+    return false;
+}
+
+std::vector<SpreadingWindow> BiPartitioningPartialLegalizer::get_min_windows_around_clusters(
+    const std::vector<FlatPlacementBinCluster>& overfilled_bin_clusters,
+    ModelGroupId group_id) {
     // TODO: Currently, we greedily grow the region by 1 in all directions until
     //       the capacity is larger than the utilization. This may not produce
     //       the minimum window. Should investigate "touching-up" the windows.
+    // FIXME: It may be a good idea to sort the bins by their overfill here. Then
+    //        we can check for overlap as we go.
+
+    // Get the width, height, and number of layers for the spreading region.
+    // This is used by the growing part of this routine to prevent the windows
+    // from outgrowing the device.
+    size_t width, height, layers;
+    std::tie(width, height, layers) = density_manager_->get_overall_placeable_region_size();
+
+    // Precompute a prefix sum for the current utilization of each 1x1 region
+    // of the device. This needs to be recomputed every time the bins are
+    // modified, so it is recomputed here.
+    PerModelPrefixSum2D utilization_prefix_sum(
+        *density_manager_,
+        g_vpr_ctx.device().arch->models,
+        g_vpr_ctx.device().arch->model_library,
+        [&](int model_index, size_t x, size_t y) {
+            FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0);
+            // This is computed the same way as the capacity prefix sum above.
+            const vtr::Rect<double>& bin_region = density_manager_->flat_placement_bins().bin_region(bin_id);
+            float bin_area = bin_region.width() * bin_region.height();
+            float util = density_manager_->get_bin_utilization(bin_id).get_dim_val(model_index);
+            VTR_ASSERT_SAFE(util >= 0.0f);
+            return util / bin_area;
+        });
+
+    // Create windows for each overfilled bin cluster.
     std::vector<SpreadingWindow> windows;
-    for (FlatPlacementBinId bin_id : overfilled_bins) {
-        // Create a new window for this bin.
+    for (const std::vector<FlatPlacementBinId>& overfilled_bin_cluster : overfilled_bin_clusters) {
+        // Create a new window for this cluster of bins.
         SpreadingWindow new_window;
-        // Initialize the region to the region of the bin.
-        new_window.region = density_manager.flat_placement_bins().bin_region(bin_id);
+
+        // Set the region of the window to the bounding box of the cluster of bins.
+        size_t num_bins_in_cluster = overfilled_bin_cluster.size();
+        VTR_ASSERT_SAFE(num_bins_in_cluster != 0);
         vtr::Rect<double>& region = new_window.region;
+        region = density_manager_->flat_placement_bins().bin_region(overfilled_bin_cluster[0]);
+        for (size_t i = 1; i < num_bins_in_cluster; i++) {
+            region = vtr::bounding_box(region,
+                                       density_manager_->flat_placement_bins().bin_region(overfilled_bin_cluster[i]));
+        }
+
+        // Grow the region until it is just large enough to not overfill
         while (true) {
             // Grow the region by 1 on all sides.
             double new_xmin = std::clamp<double>(region.xmin() - 1.0, 0.0, width);
@@ -807,28 +1084,25 @@ static std::vector<SpreadingWindow> identify_non_overlapping_windows(
                 break;
             }
 
-            // If the utilization is lower than the capacity, stop growing.
             region.set_xmin(new_xmin);
             region.set_xmax(new_xmax);
             region.set_ymin(new_ymin);
             region.set_ymax(new_ymax);
-            float region_capacity = capacity_prefix_sum.get_sum(region.xmin(),
-                                                                region.ymin(),
-                                                                region.xmax() - 1,
-                                                                region.ymax() - 1);
-
-            float region_utilization = utilization_prefix_sum.get_sum(region.xmin(),
-                                                                      region.ymin(),
-                                                                      region.xmax() - 1,
-                                                                      region.ymax() - 1);
-            if (region_utilization < region_capacity)
+
+            // If the region is no longer overfilled, stop growing.
+            if (!is_region_overfilled(region, capacity_prefix_sum_, utilization_prefix_sum, model_grouper_.get_models_in_group(group_id)))
                 break;
         }
         // Insert this window into the list of windows.
         windows.emplace_back(std::move(new_window));
     }
 
-    // 2) Merge overlapping bins and store into new array.
+    return windows;
+}
+
+void BiPartitioningPartialLegalizer::merge_overlapping_windows(
+    std::vector<SpreadingWindow>& windows) {
+    // Merge overlapping windows.
     // TODO: This is a very basic merging process which will identify the
     //       minimum region containing both windows; however, after merging it
     //       is very likely that this window will now be too large. Need to
@@ -877,7 +1151,14 @@ static std::vector<SpreadingWindow> identify_non_overlapping_windows(
         non_overlapping_windows.emplace_back(std::move(windows[i]));
     }
 
-    // 3) Move the blocks out of their bins and into the windows.
+    // Store the results into the input window.
+    windows = std::move(non_overlapping_windows);
+}
+
+void BiPartitioningPartialLegalizer::move_blocks_into_windows(
+    std::vector<SpreadingWindow>& non_overlapping_windows,
+    ModelGroupId group_id) {
+    // Move the blocks from their bins into the windows that should contain them.
     // TODO: It may be good for debugging to check if the windows have nothing
     //       to move. This may indicate a problem (overfilled bins of fixed
     //       blocks, overlapping windows, etc.).
@@ -891,49 +1172,56 @@ static std::vector<SpreadingWindow> identify_non_overlapping_windows(
         for (size_t x = lower_x; x <= upper_x; x++) {
             for (size_t y = lower_y; y <= upper_y; y++) {
                 // Get all of the movable blocks from the bin.
-                FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0);
                 std::vector<APBlockId> moveable_blks;
-                moveable_blks.reserve(density_manager.flat_placement_bins().bin_contained_blocks(bin_id).size());
-                for (APBlockId blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) {
-                    if (netlist.block_mobility(blk_id) == APBlockMobility::MOVEABLE)
-                        moveable_blks.push_back(blk_id);
+                FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0);
+                const auto& bin_contained_blocks = density_manager_->flat_placement_bins().bin_contained_blocks(bin_id);
+                moveable_blks.reserve(bin_contained_blocks.size());
+                for (APBlockId blk_id : bin_contained_blocks) {
+                    // If this block is not moveable, do not move it.
+                    if (netlist_.block_mobility(blk_id) != APBlockMobility::MOVEABLE)
+                        continue;
+                    // If this block is not in the group, do not move it.
+                    if (!is_block_in_group(blk_id, group_id, *density_manager_, model_grouper_))
+                        continue;
+
+                    moveable_blks.push_back(blk_id);
                 }
                 // Remove the moveable blocks from their bins and store into
                 // the windows.
                 for (APBlockId blk_id : moveable_blks) {
-                    density_manager.remove_block_from_bin(blk_id, bin_id);
+                    density_manager_->remove_block_from_bin(blk_id, bin_id);
                     window.contained_blocks.push_back(blk_id);
                 }
             }
         }
     }
-
-    return non_overlapping_windows;
 }
 
-void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) {
-    VTR_LOGV(log_verbosity_ >= 10, "Running Bi-Partitioning Legalizer\n");
-
-    // Prepare the density manager.
-    density_manager_->empty_bins();
-    density_manager_->import_placement_into_bins(p_placement);
-
-    // Quick return. If there are no overfilled bins, there is nothing to spread.
-    if (density_manager_->get_overfilled_bins().size() == 0) {
-        VTR_LOGV(log_verbosity_ >= 10, "No overfilled bins. Nothing to legalize.\n");
-        return;
+void BiPartitioningPartialLegalizer::spread_over_windows(std::vector<SpreadingWindow>& non_overlapping_windows,
+                                                         const PartialPlacement& p_placement,
+                                                         ModelGroupId group_id) {
+    if (log_verbosity_ >= 10) {
+        VTR_LOG("\tIdentified %zu non-overlapping spreading windows.\n",
+                non_overlapping_windows.size());
+
+        if (log_verbosity_ >= 20) {
+            for (const SpreadingWindow& window : non_overlapping_windows) {
+                VTR_LOG("\t\t[(%.1f, %.1f), (%.1f, %.1f)]\n",
+                        window.region.xmin(), window.region.ymin(),
+                        window.region.xmax(), window.region.ymax());
+                PrimitiveVector window_capacity = capacity_prefix_sum_.get_sum(model_grouper_.get_models_in_group(group_id),
+                                                                               window.region);
+                VTR_LOG("\t\t\tCapacity: %f\n",
+                        window_capacity.manhattan_norm());
+                VTR_LOG("\t\t\tNumber of contained blocks: %zu\n",
+                        window.contained_blocks.size());
+            }
+        }
     }
 
-    // Identify non-overlapping spreading windows.
-    std::vector<SpreadingWindow> initial_windows = identify_non_overlapping_windows(netlist_, *density_manager_);
-    VTR_ASSERT(initial_windows.size() != 0);
-    VTR_LOGV(log_verbosity_ >= 10,
-             "\tIdentified %zu non-overlapping spreading windows.\n",
-             initial_windows.size());
-
     // Insert the windows into a queue for spreading.
     std::queue<SpreadingWindow> window_queue;
-    for (SpreadingWindow& window : initial_windows) {
+    for (SpreadingWindow& window : non_overlapping_windows) {
         window_queue.push(std::move(window));
     }
 
@@ -971,101 +1259,256 @@ void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) {
         }
 
         // 2) Partition the window.
-        // Select the partition direction.
-        // To keep it simple, we partition the direction which would cut the
-        // region the most.
-        // TODO: Should explore making the partition line based on the capacity
-        //       of the two partitioned regions. We may want to cut the
-        //       region in half such that the mass of the atoms contained within
-        //       the two future regions is equal.
-        e_partition_dir partition_dir = e_partition_dir::VERTICAL;
-        if (window.region.height() > window.region.width())
-            partition_dir = e_partition_dir::HORIZONTAL;
-
-        // To keep it simple, just cut the space in half.
-        // TODO: Should investigate other cutting techniques. Cutting perfectly
-        //       in half may not be the most efficient technique.
-        SpreadingWindow lower_window;
-        SpreadingWindow upper_window;
-        if (partition_dir == e_partition_dir::VERTICAL) {
-            // Find the x-coordinate of a cut line directly in the middle of the
-            // region. We floor this to prevent fractional cut lines.
-            double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0);
-
-            // Cut the region at this cut line.
-            lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
-                                                                       window.region.ymin()),
-                                                    vtr::Point<double>(pivot_x,
-                                                                       window.region.ymax()));
-
-            upper_window.region = vtr::Rect<double>(vtr::Point<double>(pivot_x,
-                                                                       window.region.ymin()),
-                                                    vtr::Point<double>(window.region.xmax(),
-                                                                       window.region.ymax()));
-        } else {
-            VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL);
-            // Similarly in the y direction, find the non-fractional y coordinate
-            // to make a horizontal cut.
-            double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0);
-
-            // Then cut the window.
-            lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
-                                                                       window.region.ymin()),
-                                                    vtr::Point<double>(window.region.xmax(),
-                                                                       pivot_y));
-
-            upper_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
-                                                                       pivot_y),
-                                                    vtr::Point<double>(window.region.xmax(),
-                                                                       window.region.ymax()));
-        }
+        auto partitioned_window = partition_window(window);
 
         // 3) Partition the blocks.
-        // For now, just evenly partition the blocks based on their solved
-        // positions.
-        // TODO: This is a huge simplification. We do not even know if the lower
-        //       partition has space for the blocks that want to be on that side!
-        //       Instead of just using x/y position, we also need to take into
-        //       account the mass of the blocks and ensure that there is enough
-        //       capacity for the given block's mass. One idea is to partition
-        //       the blocks using this basic approach and then fixing up any
-        //       blocks that should not be on the given side (due to type or
-        //       capacity constraints).
-        if (partition_dir == e_partition_dir::VERTICAL) {
-            // Sort the blocks in the window by the x coordinate.
-            std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) {
-                return p_placement.block_x_locs[a] < p_placement.block_x_locs[b];
-            });
+        partition_blocks_in_window(window, partitioned_window, group_id, p_placement);
 
-        } else {
-            VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL);
-            // Sort the blocks in the window by the y coordinate.
-            std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) {
-                return p_placement.block_y_locs[a] < p_placement.block_y_locs[b];
-            });
+        // 4) Enqueue the new windows.
+        window_queue.push(std::move(partitioned_window.lower_window));
+        window_queue.push(std::move(partitioned_window.upper_window));
+
+        // Pop the top element off the queue. This will invalidate the window
+        // object.
+        window_queue.pop();
+    }
+
+    if (log_verbosity_ >= 10) {
+        VTR_LOG("\t%zu finalized windows.\n",
+                finished_windows.size());
+
+        if (log_verbosity_ >= 30) {
+            for (const SpreadingWindow& window : finished_windows) {
+                VTR_LOG("\t\t[(%.1f, %.1f), (%.1f, %.1f)]\n",
+                        window.region.xmin(), window.region.ymin(),
+                        window.region.xmax(), window.region.ymax());
+                PrimitiveVector window_capacity = capacity_prefix_sum_.get_sum(model_grouper_.get_models_in_group(group_id),
+                                                                               window.region);
+                VTR_LOG("\t\t\tCapacity: %f\n",
+                        window_capacity.manhattan_norm());
+                VTR_LOG("\t\t\tNumber of contained blocks: %zu\n",
+                        window.contained_blocks.size());
+            }
         }
+    }
+
+    // Move the blocks into the bins.
+    move_blocks_out_of_windows(finished_windows);
+
+    // Verify that the bins are valid after moving blocks back from windows.
+    VTR_ASSERT_SAFE(density_manager_->verify());
+}
+
+PartitionedWindow BiPartitioningPartialLegalizer::partition_window(SpreadingWindow& window) {
+    PartitionedWindow partitioned_window;
+
+    // Select the partition direction.
+    // To keep it simple, we partition the direction which would cut the
+    // region the most.
+    // TODO: Should explore making the partition line based on the capacity
+    //       of the two partitioned regions. We may want to cut the
+    //       region in half such that the mass of the atoms contained within
+    //       the two future regions is equal.
+    partitioned_window.partition_dir = e_partition_dir::VERTICAL;
+    if (window.region.height() > window.region.width())
+        partitioned_window.partition_dir = e_partition_dir::HORIZONTAL;
+
+    // To keep it simple, just cut the space in half.
+    // TODO: Should investigate other cutting techniques. Cutting perfectly
+    //       in half may not be the most efficient technique.
+    SpreadingWindow& lower_window = partitioned_window.lower_window;
+    SpreadingWindow& upper_window = partitioned_window.upper_window;
+    partitioned_window.pivot_pos = 0.f;
+    if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) {
+        // Find the x-coordinate of a cut line directly in the middle of the
+        // region. We floor this to prevent fractional cut lines.
+        double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0);
+
+        // Cut the region at this cut line.
+        lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
+                                                                   window.region.ymin()),
+                                                vtr::Point<double>(pivot_x,
+                                                                   window.region.ymax()));
+
+        upper_window.region = vtr::Rect<double>(vtr::Point<double>(pivot_x,
+                                                                   window.region.ymin()),
+                                                vtr::Point<double>(window.region.xmax(),
+                                                                   window.region.ymax()));
+        partitioned_window.pivot_pos = pivot_x;
+    } else {
+        VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL);
+        // Similarly in the y direction, find the non-fractional y coordinate
+        // to make a horizontal cut.
+        double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0);
+
+        // Then cut the window.
+        lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
+                                                                   window.region.ymin()),
+                                                vtr::Point<double>(window.region.xmax(),
+                                                                   pivot_y));
+
+        upper_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
+                                                                   pivot_y),
+                                                vtr::Point<double>(window.region.xmax(),
+                                                                   window.region.ymax()));
+        partitioned_window.pivot_pos = pivot_y;
+    }
+
+    return partitioned_window;
+}
+
+void BiPartitioningPartialLegalizer::partition_blocks_in_window(
+    SpreadingWindow& window,
+    PartitionedWindow& partitioned_window,
+    ModelGroupId group_id,
+    const PartialPlacement& p_placement) {
+
+    SpreadingWindow& lower_window = partitioned_window.lower_window;
+    SpreadingWindow& upper_window = partitioned_window.upper_window;
+
+    // Get the capacity of each window partition.
+    const std::vector<int>& model_indices = model_grouper_.get_models_in_group(group_id);
+    PrimitiveVector lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices,
+                                                                         lower_window.region);
+    PrimitiveVector upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices,
+                                                                         upper_window.region);
+
+    // Due to the division by the area, we may get numerical underflows /
+    // overflows which accumulate. If they accumulate in the positive
+    // direction, it is not a big deal; but in the negative direction it
+    // will cause problems with the algorithm below. Clamp any negative
+    // numbers to 0.
+    lower_window_capacity.relu();
+    upper_window_capacity.relu();
+    PrimitiveVector lower_window_underfill = lower_window_capacity;
+    PrimitiveVector upper_window_underfill = upper_window_capacity;
+    VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative());
+    VTR_ASSERT_SAFE(upper_window_underfill.is_non_negative());
+
+    // FIXME: We need to take into account the current utilization of the
+    //        fixed blocks... We need to take into account that they are there.
+    //        Currently we assume the underfill is the capacity
+    //        Without this, we may overfill blocks which have fixed blocks in
+    //        them.
+
+    // If the lower window has no space, put all of the blocks in the upper window.
+    // NOTE: We give some room due to numerical overflows from the prefix sum.
+    if (lower_window_underfill.manhattan_norm() < 0.01f) {
+        upper_window.contained_blocks = std::move(window.contained_blocks);
+        return;
+    }
+    // If the upper window has no space, put all of the blocks in the lower window.
+    if (upper_window_underfill.manhattan_norm() < 0.01f) {
+        lower_window.contained_blocks = std::move(window.contained_blocks);
+        return;
+    }
 
-        // Find the pivot block position.
-        size_t pivot = window.contained_blocks.size() / 2;
+    // Reserve space in each of the windows to make insertion faster.
+    upper_window.contained_blocks.reserve(window.contained_blocks.size());
+    lower_window.contained_blocks.reserve(window.contained_blocks.size());
+
+    // Sort the blocks and get the pivot index. The pivot index is the index in
+    // the windows contained block which decides which sub-window the block
+    // wants to be in. The blocks at indices [0, pivot) want to be in the lower
+    // window, blocks at indices [pivot, num_blks) want to be in the upper window.
+    // This want is based on the solved positions of the blocks.
+    size_t pivot;
+    if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) {
+        // Sort the blocks in the window by the x coordinate.
+        std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) {
+            return p_placement.block_x_locs[a] < p_placement.block_x_locs[b];
+        });
+        auto upper = std::upper_bound(window.contained_blocks.begin(),
+                                      window.contained_blocks.end(),
+                                      partitioned_window.pivot_pos,
+                                      [&](double value, APBlockId blk_id) {
+                                          return value < p_placement.block_x_locs[blk_id];
+                                      });
+        pivot = std::distance(window.contained_blocks.begin(), upper);
+    } else {
+        VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL);
+        // Sort the blocks in the window by the y coordinate.
+        std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) {
+            return p_placement.block_y_locs[a] < p_placement.block_y_locs[b];
+        });
+        auto upper = std::upper_bound(window.contained_blocks.begin(),
+                                      window.contained_blocks.end(),
+                                      partitioned_window.pivot_pos,
+                                      [&](double value, APBlockId blk_id) {
+                                          return value < p_placement.block_y_locs[blk_id];
+                                      });
+        pivot = std::distance(window.contained_blocks.begin(), upper);
+    }
 
-        // Copy the blocks to the windows based on the pivot.
-        for (size_t i = 0; i < pivot; i++) {
+    // Try to place the blocks that want to be in the lower window from lower
+    // to upper.
+    std::vector<APBlockId> unplaced_blocks;
+    for (size_t i = 0; i < pivot; i++) {
+        const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]);
+        VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative());
+        // Try to put the blk in the window.
+        lower_window_underfill -= blk_mass;
+        if (lower_window_underfill.is_non_negative())
+            // If the underfill is not negative, then we can add it to the window.
             lower_window.contained_blocks.push_back(window.contained_blocks[i]);
+        else {
+            // If the underfill went negative, undo the addition and mark this
+            // block as unplaced.
+            lower_window_underfill += blk_mass;
+            unplaced_blocks.push_back(window.contained_blocks[i]);
         }
-        for (size_t i = pivot; i < window.contained_blocks.size(); i++) {
+    }
+    // Try to place the blocks that want to be in the upper window from upper
+    // to lower.
+    // NOTE: This needs to be an int in case the pivot is 0.
+    for (int i = window.contained_blocks.size() - 1; i >= (int)pivot; i--) {
+        const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]);
+        VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative());
+        upper_window_underfill -= blk_mass;
+        if (upper_window_underfill.is_non_negative())
             upper_window.contained_blocks.push_back(window.contained_blocks[i]);
+        else {
+            upper_window_underfill += blk_mass;
+            unplaced_blocks.push_back(window.contained_blocks[i]);
         }
+    }
 
-        // 4) Enqueue the new windows.
-        window_queue.push(std::move(lower_window));
-        window_queue.push(std::move(upper_window));
-
-        // Pop the top element off the queue. This will invalidate the window
-        // object.
-        window_queue.pop();
+    // Handle the unplaced blocks.
+    // To handle these blocks, we will try to balance the overfill in both
+    // windows. To do this we sort the unplaced blocks by largest mass to
+    // smallest mass. Then we place each block in the bin with the highest
+    // underfill.
+    std::sort(unplaced_blocks.begin(),
+              unplaced_blocks.end(),
+              [&](APBlockId a, APBlockId b) {
+                  const auto& blk_a_mass = density_manager_->mass_calculator().get_block_mass(a);
+                  const auto& blk_b_mass = density_manager_->mass_calculator().get_block_mass(b);
+                  return blk_a_mass.manhattan_norm() > blk_b_mass.manhattan_norm();
+              });
+    for (APBlockId blk_id : unplaced_blocks) {
+        // Project the underfill from each window onto the mass. This gives us
+        // the overfill in the dimensions the mass cares about.
+        const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(blk_id);
+        PrimitiveVector projected_lower_window_underfill = lower_window_underfill;
+        lower_window_underfill.project(blk_mass);
+        PrimitiveVector projected_upper_window_underfill = upper_window_underfill;
+        upper_window_underfill.project(blk_mass);
+        // Put the block in the window with a higher underfill. This tries to
+        // balance the overfill as much as possible. This works even if the
+        // overfill becomes negative.
+        if (projected_lower_window_underfill.manhattan_norm() >= projected_upper_window_underfill.manhattan_norm()) {
+            lower_window.contained_blocks.push_back(blk_id);
+            lower_window_underfill -= blk_mass;
+        } else {
+            upper_window.contained_blocks.push_back(blk_id);
+            upper_window_underfill -= blk_mass;
+        }
     }
+}
+
+void BiPartitioningPartialLegalizer::move_blocks_out_of_windows(
+    std::vector<SpreadingWindow>& finished_windows) {
 
-    // Move the blocks into the bins.
     for (const SpreadingWindow& window : finished_windows) {
         // Get the bin at the center of the window.
         vtr::Point<double> center = get_center_of_rect(window.region);
@@ -1079,10 +1522,4 @@ void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) {
             density_manager_->insert_block_into_bin(blk_id, bin_id);
         }
     }
-
-    // Verify that the bins are valid before export.
-    VTR_ASSERT(density_manager_->verify());
-
-    // Export the legalized placement to the partial placement.
-    density_manager_->export_placement_from_bins(p_placement);
 }
diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h
index 2921465fae3..a6cf5d30bc9 100644
--- a/vpr/src/analytical_place/partial_legalizer.h
+++ b/vpr/src/analytical_place/partial_legalizer.h
@@ -13,12 +13,16 @@
 
 #pragma once
 
+#include <functional>
 #include <memory>
 #include <vector>
 #include "ap_netlist_fwd.h"
 #include "flat_placement_bins.h"
 #include "flat_placement_density_manager.h"
+#include "model_grouper.h"
 #include "primitive_vector.h"
+#include "vtr_geometry.h"
+#include "vtr_prefix_sum.h"
 #include "vtr_vector.h"
 
 // Forward declarations
@@ -90,6 +94,7 @@ class PartialLegalizer {
 std::unique_ptr<PartialLegalizer> make_partial_legalizer(e_partial_legalizer legalizer_type,
                                                          const APNetlist& netlist,
                                                          std::shared_ptr<FlatPlacementDensityManager> density_manager,
+                                                         const Prepacker& prepacker,
                                                          int log_verbosity);
 
 /**
@@ -240,6 +245,97 @@ class FlowBasedLegalizer : public PartialLegalizer {
     void legalize(PartialPlacement& p_placement) final;
 };
 
+/**
+ * @brief A cluster of flat placement bins.
+ */
+typedef typename std::vector<FlatPlacementBinId> FlatPlacementBinCluster;
+
+/**
+ * @brief Enum for the direction of a partition.
+ */
+enum class e_partition_dir {
+    VERTICAL,
+    HORIZONTAL
+};
+
+/**
+ * @brief Spatial window used to spread the blocks contained within.
+ *
+ * This window's region is identified and grown until it has enough space to
+ * accomodate the blocks stored within. This window is then successivly
+ * partitioned until it is small enough (blocks are not too dense).
+ */
+struct SpreadingWindow {
+    /// @brief The blocks contained within this window.
+    std::vector<APBlockId> contained_blocks;
+
+    /// @brief The 2D region of space that this window covers.
+    vtr::Rect<double> region;
+};
+
+/**
+ * @brief Struct to hold the information from partitioning a window. Contains
+ *        the two window partitions and some information about how they were
+ *        generated.
+ */
+struct PartitionedWindow {
+    /// @brief The direction of the partition.
+    e_partition_dir partition_dir;
+
+    /// @brief The position that the parent window was split at.
+    double pivot_pos;
+
+    /// @brief The lower window. This is the left partition when the direction
+    ///        is vertical, and the bottom partition when the direction is
+    ///        horizontal.
+    SpreadingWindow lower_window;
+
+    /// @brief The upper window. This is the right partition when the direction
+    ///        is vertical, and the top partition when the direction is
+    ///        horizontal.
+    SpreadingWindow upper_window;
+};
+
+/**
+ * @brief Wrapper class around the prefix sum class which creates a prefix sum
+ *        for each model type and has helper methods for getting the sums over
+ *        regions.
+ */
+class PerModelPrefixSum2D {
+  public:
+    PerModelPrefixSum2D() = default;
+
+    /**
+     * @brief Construct prefix sums for each of the models in the architecture.
+     *
+     * Uses the density manager to get the size of the placeable region.
+     *
+     * The lookup is a lambda used to populate the prefix sum. It provides
+     * the model index, x, and y to be populated.
+     */
+    PerModelPrefixSum2D(const FlatPlacementDensityManager& density_manager,
+                        t_model* user_models,
+                        t_model* library_models,
+                        std::function<float(int, size_t, size_t)> lookup);
+
+    /**
+     * @brief Get the sum for a given model over the given region.
+     */
+    float get_model_sum(int model_index,
+                        const vtr::Rect<double>& region) const;
+
+    /**
+     * @brief Get the multi-dimensional sum over the given model indices over
+     *        the given region.
+     */
+    PrimitiveVector get_sum(const std::vector<int>& model_indices,
+                            const vtr::Rect<double>& region) const;
+
+  private:
+    /// @brief Per-Model Prefix Sums
+    std::vector<vtr::PrefixSum2D<float>> model_prefix_sum_;
+};
+
 /**
  * @brief A bi-paritioning spreading full legalizer.
  *
@@ -258,6 +354,19 @@ class FlowBasedLegalizer : public PartialLegalizer {
  *          GPlace3.0: https://doi.org/10.1145/3233244
  */
 class BiPartitioningPartialLegalizer : public PartialLegalizer {
+  private:
+    /// @brief The maximum gap between overfilled bins we can have in a flat
+    ///        placement bin cluster. For example, if this is set to 1, we will
+    ///        allow two overfilled bins to be clustered together if they only
+    ///        have 1 non-overfilled bin of gap between them.
+    /// The rational behind this is that it allows us to predict that the windows
+    /// created for each cluster will overlap if they are within some gap distance.
+    /// Increasing this number too much may cluster bins together too much and
+    /// create large windows; decreasing this number will put more pressure on
+    /// the window generation code, which can increase window size and runtime.
+    /// TODO: Should this be distance instead of number of bins?
+    static constexpr int max_bin_cluster_gap_ = 1;
+
   public:
     /**
      * @brief Constructor for the bi-partitioning partial legalizer.
@@ -267,6 +376,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
      */
     BiPartitioningPartialLegalizer(const APNetlist& netlist,
                                    std::shared_ptr<FlatPlacementDensityManager> density_manager,
+                                   const Prepacker& prepacker,
                                    int log_verbosity);
 
     /**
@@ -278,8 +388,130 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
      */
     void legalize(PartialPlacement& p_placement) final;
 
+  private:
+    // ========================================================================
+    //      Identifying spreading windows
+    // ========================================================================
+
+    /**
+     * @brief Identify spreading windows which contain overfilled bins in the
+     *        given model group on the device and do not overlap.
+     *
+     * This process is split into 4 stages:
+     *      1) Overfilled bins are identified and clustered.
+     *      2) Grow windows around the overfilled bin clusters. These windows
+     *         will grow until there is just enough space to accomodate the blocks
+     *         within the window (capacity of the window is larger than the utilization).
+     *      3) Merge overlapping windows.
+     *      4) Move the blocks within these window regions from their bins into
+     *         their windows. This updates the current utilization of bins, making
+     *         spreading easier.
+     *
+     * We identify non-overlapping windows for different model groups independtly
+     * for a few reasons:
+     *  - Each model group, by design, can be spread independent of each other.
+     *    This reduces the problem size by the number of groups.
+     *  - Without model groups, one block placed on the wrong side of the chip
+     *    may create a window the size of the entire chip! This would rip up and
+     *    spread all the blocks in the chip, which is very expensive.
+     *  - This allows us to ignore block models which are already in legal
+     *    positions.
+     */
+    std::vector<SpreadingWindow> identify_non_overlapping_windows(ModelGroupId group_id);
+
+    /**
+     * @brief Identifies clusters of overfilled bins for the given model group.
+     *
+     * This locates clusters of overfilled bins which are within a given
+     * distance from each other.
+     */
+    std::vector<FlatPlacementBinCluster> get_overfilled_bin_clusters(ModelGroupId group_id);
+
+    /**
+     * @brief Creates and grows minimum spanning windows around the given
+     *        overfilled bin clusters.
+     *
+     * Here, minimum means that the windows are just large enough such that the
+     * capacity of the bins within the window is larger than the utilization for
+     * the given model group.
+     */
+    std::vector<SpreadingWindow> get_min_windows_around_clusters(
+        const std::vector<FlatPlacementBinCluster>& overfilled_bin_clusters,
+        ModelGroupId group_id);
+
+    /**
+     * @brief Merges overlapping windows in the given vector of windows.
+     *
+     * The resulting merged windows is stored in the given windows object.
+     */
+    void merge_overlapping_windows(std::vector<SpreadingWindow>& windows);
+
+    /**
+     * @brief Moves the blocks out of their bins and into their window.
+     *
+     * Only blocks in the given model group will be moved.
+     */
+    void move_blocks_into_windows(std::vector<SpreadingWindow>& non_overlapping_windows,
+                                  ModelGroupId group_id);
+
+    // ========================================================================
+    //      Spreading blocks over windows
+    // ========================================================================
+
+    /**
+     * @brief Spread the blocks over each of the given non-overlapping windows.
+     *
+     * The partial placement solution from the solver is used to decide which
+     * window partition to put a block into. The model group this window is
+     * spreading over can make it more efficient to make decisions.
+     */
+    void spread_over_windows(std::vector<SpreadingWindow>& non_overlapping_windows,
+                             const PartialPlacement& p_placement,
+                             ModelGroupId group_id);
+
+    /**
+     * @brief Partition the given window into two sub-windows.
+     *
+     * We return extra information about how the window was created; for example,
+     * the direction of the partition (vertical / horizontal) and the position
+     * of the cut.
+     */
+    PartitionedWindow partition_window(SpreadingWindow& window);
+
+    /**
+     * @brief Partition the blocks in the given window into the partitioned
+     *        windows.
+     *
+     * This is kept separate from splitting the physical window region for
+     * cleanliness. After this point, the window will not have any atoms in
+     * it.
+     */
+    void partition_blocks_in_window(SpreadingWindow& window,
+                                    PartitionedWindow& partitioned_window,
+                                    ModelGroupId group_id,
+                                    const PartialPlacement& p_placement);
+
+    /**
+     * @brief Move the blocks out of the given windows and put them back into
+     *        the correct bin according to the window that contains them.
+     */
+    void move_blocks_out_of_windows(std::vector<SpreadingWindow>& finished_windows);
+
   private:
     /// @brief The density manager which manages the capacity and utilization
     ///        of regions of the device.
     std::shared_ptr<FlatPlacementDensityManager> density_manager_;
+
+    /// @brief Grouper object which handles grouping together models which must
+    ///        be spread together. Models are grouped based on the pack patterns
+    ///        that they can form with each other.
+    ModelGrouper model_grouper_;
+
+    /// @brief The prefix sum for the capacity of the device, as given by the
+    ///        density manager. We will need to get the capacity of 2D regions
+    ///        of the device very often for this partial legalizer. This data
+    ///        structure greatly improves the time complexity of this operation.
+    ///
+    /// This is populated in the constructor and not modified.
+    PerModelPrefixSum2D capacity_prefix_sum_;
 };
diff --git a/vpr/src/analytical_place/primitive_vector.h b/vpr/src/analytical_place/primitive_vector.h
index 3297d417915..d76ae8b509d 100644
--- a/vpr/src/analytical_place/primitive_vector.h
+++ b/vpr/src/analytical_place/primitive_vector.h
@@ -10,8 +10,11 @@
 
 #pragma once
 
+#include <cmath>
 #include <cstdlib>
 #include <unordered_map>
+#include <vector>
+#include "vtr_log.h"
 
 /**
  * @brief A sparse vector class to store an M-dimensional quantity of primitives
@@ -48,9 +51,24 @@ class PrimitiveVector {
      * This is a common enough feature to use its own setter.
      */
     inline void add_val_to_dim(float val, size_t dim) {
-        if (data_.count(dim) == 0)
-            data_[dim] = 0.f;
-        data_[dim] += val;
+        auto it = data_.find(dim);
+        if (it == data_.end())
+            data_.insert({dim, val});
+        else {
+            it->second += val;
+        }
+    }
+
+    /**
+     * @brief Subtract the value to the given dimension.
+     */
+    inline void subtract_val_from_dim(float val, size_t dim) {
+        auto it = data_.find(dim);
+        if (it == data_.end())
+            data_.insert({dim, -1.0f * val});
+        else {
+            it->second -= val;
+        }
     }
 
     /**
@@ -104,19 +122,26 @@ class PrimitiveVector {
      */
     inline PrimitiveVector& operator+=(const PrimitiveVector& rhs) {
         for (const auto& p : rhs.data_) {
-            float dim_val = get_dim_val(p.first);
-            set_dim_val(p.first, dim_val + p.second);
+            add_val_to_dim(p.second, p.first);
         }
         return *this;
     }
 
+    /**
+     * @brief Element-wise addition of this with rhs.
+     */
+    inline PrimitiveVector operator+(const PrimitiveVector& rhs) const {
+        PrimitiveVector res = *this;
+        res += rhs;
+        return res;
+    }
+
     /**
      * @brief Element-wise de-accumulation of rhs into this.
      */
     inline PrimitiveVector& operator-=(const PrimitiveVector& rhs) {
         for (const auto& p : rhs.data_) {
-            float dim_val = get_dim_val(p.first);
-            set_dim_val(p.first, dim_val - p.second);
+            subtract_val_from_dim(p.second, p.first);
         }
         return *this;
     }
@@ -140,6 +165,25 @@ class PrimitiveVector {
         return *this;
     }
 
+    /**
+     * @brief Element-wise division with a scalar.
+     */
+    inline PrimitiveVector& operator/=(float rhs) {
+        for (auto& p : data_) {
+            p.second /= rhs;
+        }
+        return *this;
+    }
+
+    /**
+     * @brief Element-wise division with a scalar.
+     */
+    inline PrimitiveVector operator/(float rhs) const {
+        PrimitiveVector res = *this;
+        res /= rhs;
+        return res;
+    }
+
     /**
      * @brief Returns true if any dimension of this vector is less than any
      *        dimension of rhs; false otherwise.
@@ -168,12 +212,11 @@ class PrimitiveVector {
      * is positive, it will not change.
      */
     inline void relu() {
-        for (auto& p : data_) {
-            // TODO: Should remove the zero elements from the map to improve
-            //       efficiency.
-            if (p.second < 0.f)
-                p.second = 0.f;
-        }
+        std::erase_if(data_, [](const std::pair<size_t, float>& p) {
+            // Note: we erase the numbers from the map to improve the performance
+            //       of future operations on this vector.
+            return p.second <= 0.0f;
+        });
     }
 
     /**
@@ -234,12 +277,36 @@ class PrimitiveVector {
     inline void project(const PrimitiveVector& dir) {
         // For each dimension of this vector, if that dimension is zero in dir
         // set the dimension to zero.
+        std::erase_if(data_, [&](const std::pair<size_t, float>& p) {
+            return dir.get_dim_val(p.first) == 0.0f;
+        });
+    }
+
+    /**
+     * @brief Gets the non-zero dimensions of this vector.
+     */
+    inline std::vector<int> get_non_zero_dims() const {
+        std::vector<int> non_zero_dims;
         for (auto& p : data_) {
-            // TODO: Instead of zeroing the dimension, it should be removed
-            //       from the map.
-            if (dir.get_dim_val(p.first) == 0.f)
-                p.second = 0.f;
+            if (p.second != 0.0f)
+                non_zero_dims.push_back(p.first);
         }
+        return non_zero_dims;
+    }
+
+    /**
+     * @brief Returns true if this and other do not share any non-zero dimensions.
+     */
+    inline bool are_dims_disjoint(const PrimitiveVector& other) const {
+        for (const auto& p : other.data_) {
+            // If this and other both have a shared dimension, then they are not
+            // perpendicular.
+            if (p.second != 0.0f && get_dim_val(p.first) != 0.0f) {
+                return false;
+            }
+        }
+        // If they do not share any dimensions, then they are perpendicular.
+        return true;
     }
 
     /**
@@ -268,4 +335,13 @@ class PrimitiveVector {
         }
         return res;
     }
+
+    /**
+     * @brief Debug printing method.
+     */
+    inline void print() const {
+        for (const auto& p : data_) {
+            VTR_LOG("(%zu, %f)\n", p.first, p.second);
+        }
+    }
 };
diff --git a/vpr/src/pack/appack_context.h b/vpr/src/pack/appack_context.h
index 9ec11fb3273..fac548d6360 100644
--- a/vpr/src/pack/appack_context.h
+++ b/vpr/src/pack/appack_context.h
@@ -62,14 +62,16 @@ struct t_appack_options {
     // We use the following gain attenuation function:
     //      attenuation = { 1 - (quad_fac * d)^2    if d < dist_th
     //                    { 1 / sqrt(d - sqrt_offset)  if d >= dist_th
+    // The numbers below were empirically found to work well.
+
     // Distance threshold which decides when to use quadratic decay or inverted
     // sqrt decay. If the distance is less than this threshold, quadratic decay
     // is used. Inverted sqrt is used otherwise.
-    float dist_th = 1.0f;
+    float dist_th = 5.0f;
     // Horizontal offset to the inverted sqrt decay.
-    float sqrt_offset = -2.9f;
+    float sqrt_offset = -1.1f;
     // Scaling factor for the quadratic decay term.
-    float quad_fac = 0.7f;
+    float quad_fac = 0.1543f;
 
     // =========== Candidate selection distance ============================ //
     // When selecting candidates, what distance from the cluster will we
diff --git a/vpr/src/pack/prepack.h b/vpr/src/pack/prepack.h
index e521908d251..5222046ddb6 100644
--- a/vpr/src/pack/prepack.h
+++ b/vpr/src/pack/prepack.h
@@ -286,6 +286,13 @@ class Prepacker {
         return chain_info_.size();
     }
 
+    /**
+     * @brief Get a list of all the pack patterns in the architecture.
+     */
+    inline const std::vector<t_pack_patterns>& get_all_pack_patterns() const {
+        return list_of_pack_patterns;
+    }
+
   private:
     /**
      * Pre-pack atoms in netlist to molecules
diff --git a/vpr/test/test_ap_primitive_vector.cpp b/vpr/test/test_ap_primitive_vector.cpp
index 7a29334e939..425f4e20f35 100644
--- a/vpr/test/test_ap_primitive_vector.cpp
+++ b/vpr/test/test_ap_primitive_vector.cpp
@@ -8,6 +8,7 @@
  * PrimitiveVector object are working as expected.
  */
 
+#include <algorithm>
 #include "catch2/catch_test_macros.hpp"
 #include "primitive_vector.h"
 
@@ -310,6 +311,60 @@ TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") {
         res = PrimitiveVector::max(vec2, vec1);
         REQUIRE(res == golden);
     }
+
+    SECTION("Test more operators and methods") {
+        PrimitiveVector vec1, vec2;
+
+        // Subtract value from dimension
+        vec1.set_dim_val(0, 5.f);
+        vec1.subtract_val_from_dim(3.f, 0);
+        REQUIRE(vec1.get_dim_val(0) == 2.f);
+
+        // Element-wise addition operator
+        vec1.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(1, 2.f);
+        vec2.clear();
+        vec2.set_dim_val(0, 3.f);
+        vec2.set_dim_val(1, 4.f);
+        PrimitiveVector vec_sum = vec1 + vec2;
+        REQUIRE(vec_sum.get_dim_val(0) == 4.f);
+        REQUIRE(vec_sum.get_dim_val(1) == 6.f);
+
+        // Element-wise division operator
+        vec1.clear();
+        vec1.set_dim_val(0, 10.f);
+        vec1.set_dim_val(1, 20.f);
+        vec1 /= 2.f;
+        REQUIRE(vec1.get_dim_val(0) == 5.f);
+        REQUIRE(vec1.get_dim_val(1) == 10.f);
+
+        // Element-wise division operator (const)
+        vec1.clear();
+        vec1.set_dim_val(0, 10.f);
+        vec1.set_dim_val(1, 20.f);
+        PrimitiveVector vec_div = vec1 / 2.f;
+        REQUIRE(vec_div.get_dim_val(0) == 5.f);
+        REQUIRE(vec_div.get_dim_val(1) == 10.f);
+
+        // Get non-zero dimensions
+        vec1.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec1.set_dim_val(2, 3.f);
+        std::vector<int> non_zero_dims = vec1.get_non_zero_dims();
+        REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 0) != non_zero_dims.end());
+        REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 2) != non_zero_dims.end());
+        REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 1) == non_zero_dims.end());
+
+        // Test orthogonal vectors
+        vec1.clear();
+        vec2.clear();
+        vec1.set_dim_val(0, 1.f);
+        vec2.set_dim_val(1, 2.f);
+        REQUIRE(vec1.are_dims_disjoint(vec2));
+        vec2.set_dim_val(0, 3.f);
+        REQUIRE(!vec1.are_dims_disjoint(vec2));
+    }
 }
 
 } // namespace
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt
index 54b30cafac6..f132845c781 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt
@@ -1,5 +1,5 @@
- arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  routed_wirelength	  avg_routed_wirelength	  routed_wiresegment	  avg_routed_wiresegment	  total_nets_routed	  total_connections_routed	  total_heap_pushes	  total_heap_pops	  logic_block_area_total	  logic_block_area_used	  routing_area_total	  routing_area_per_tile	  crit_path_route_success_iteration	  num_rr_graph_nodes	  num_rr_graph_edges	  collapsed_nodes	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  create_rr_graph_time	  create_intra_cluster_rr_graph_time	  adding_internal_edges	  route_mem	  crit_path_route_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	  router_lookahead_mem	  tile_lookahead_computation_time	  router_lookahead_computation_time	 
- k6_frac_N10_40nm.xml	  apex4.pre-vpr.blif	  common	  6.15	  vpr	  74.52 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  86	  9	  -1	  -1	  success	  v8.0.0-12210-g8eeb433b5-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-01T22:46:27	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  76312	  9	  19	  897	  28	  0	  768	  114	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  7446	  10050	  1525	  5847	  2678	  74.5 MiB	  1.98	  0.01	  5.22187	  -85.9445	  -5.22187	  nan	  0.05	  0.00204197	  0.00165471	  0.0922108	  0.0778153	  74.5 MiB	  1.98	  74.5 MiB	  1.87	  12280	  16.0104	  3195	  4.16558	  8207	  35340	  1711962	  391448	  1.05632e+07	  4.63488e+06	  1.26944e+06	  4958.75	  35	  28900	  206586	  -1	  5.82297	  nan	  -93.0212	  -5.82297	  0	  0	  0.19	  -1	  -1	  74.5 MiB	  0.60	  0.268738	  0.231571	  74.5 MiB	  -1	  0.05	 
- k6_frac_N10_40nm.xml	  des.pre-vpr.blif	  common	  2.87	  vpr	  75.93 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  53	  256	  -1	  -1	  success	  v8.0.0-12210-g8eeb433b5-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-01T22:46:27	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  77752	  256	  245	  954	  501	  0	  711	  554	  22	  22	  484	  -1	  mcnc_large	  -1	  -1	  8904	  66500	  1807	  14947	  49746	  75.9 MiB	  0.88	  0.01	  4.19633	  -806.67	  -4.19633	  nan	  0.07	  0.00209601	  0.00184942	  0.0749397	  0.0672821	  75.9 MiB	  0.88	  75.9 MiB	  0.87	  12620	  17.7496	  3382	  4.75668	  3608	  8619	  480767	  96513	  2.15576e+07	  2.85638e+06	  1.49107e+06	  3080.73	  15	  47664	  245996	  -1	  4.54897	  nan	  -867.702	  -4.54897	  0	  0	  0.22	  -1	  -1	  75.9 MiB	  0.19	  0.170591	  0.156391	  75.9 MiB	  -1	  0.07	 
- k6_frac_N10_40nm.xml	  ex1010.pre-vpr.blif	  common	  19.24	  vpr	  103.73 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  289	  10	  -1	  -1	  success	  v8.0.0-12210-g8eeb433b5-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-01T22:46:27	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  106224	  10	  10	  2659	  20	  0	  2320	  309	  22	  22	  484	  -1	  mcnc_large	  -1	  -1	  33337	  60861	  15622	  40285	  4954	  103.7 MiB	  7.37	  0.03	  7.08906	  -67.526	  -7.08906	  nan	  0.15	  0.00509718	  0.00406142	  0.35604	  0.28949	  103.7 MiB	  7.37	  103.7 MiB	  7.07	  48698	  20.9905	  12433	  5.35905	  17466	  71913	  3700066	  508136	  2.15576e+07	  1.55754e+07	  3.51389e+06	  7260.09	  20	  64568	  594370	  -1	  7.09981	  nan	  -68.5294	  -7.09981	  0	  0	  0.63	  -1	  -1	  103.7 MiB	  1.13	  0.742979	  0.632564	  103.7 MiB	  -1	  0.15	 
- k6_frac_N10_40nm.xml	  seq.pre-vpr.blif	  common	  5.15	  vpr	  75.82 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  85	  41	  -1	  -1	  success	  v8.0.0-12210-g8eeb433b5-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-01T22:46:27	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  77640	  41	  35	  1006	  76	  0	  827	  161	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  8073	  13708	  1574	  6075	  6059	  75.8 MiB	  1.93	  0.01	  5.2078	  -150.175	  -5.2078	  nan	  0.05	  0.00241319	  0.00198256	  0.0910059	  0.0770604	  75.8 MiB	  1.93	  75.8 MiB	  1.81	  13112	  15.8549	  3429	  4.14631	  6281	  26105	  949531	  164260	  1.05632e+07	  4.58099e+06	  1.26944e+06	  4958.75	  19	  28900	  206586	  -1	  5.48717	  nan	  -159.221	  -5.48717	  0	  0	  0.19	  -1	  -1	  75.8 MiB	  0.33	  0.222488	  0.193946	  75.8 MiB	  -1	  0.05	 
+ arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  initial_placed_wirelength_est	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  initial_placed_CPD_est	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  routed_wirelength	  avg_routed_wirelength	  routed_wiresegment	  avg_routed_wiresegment	  total_nets_routed	  total_connections_routed	  total_heap_pushes	  total_heap_pops	  logic_block_area_total	  logic_block_area_used	  routing_area_total	  routing_area_per_tile	  crit_path_route_success_iteration	  num_rr_graph_nodes	  num_rr_graph_edges	  collapsed_nodes	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  create_rr_graph_time	  create_intra_cluster_rr_graph_time	  adding_internal_edges	  route_mem	  crit_path_route_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	  router_lookahead_mem	  tile_lookahead_computation_time	  router_lookahead_computation_time	 
+ k6_frac_N10_40nm.xml	  apex4.pre-vpr.blif	  common	  4.94	  vpr	  74.77 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  80	  9	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  76564	  9	  19	  897	  28	  0	  624	  108	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  10315	  6596	  9617	  1559	  5516	  2542	  74.8 MiB	  1.83	  0.01	  6.75959	  5.07271	  -83.5391	  -5.07271	  nan	  0.05	  0.00162447	  0.001265	  0.077793	  0.0643277	  74.8 MiB	  1.83	  74.8 MiB	  1.37	  11052	  17.7400	  2817	  4.52167	  5101	  22566	  851127	  138852	  1.05632e+07	  4.31152e+06	  1.26944e+06	  4958.75	  19	  28900	  206586	  -1	  5.37355	  nan	  -88.7113	  -5.37355	  0	  0	  0.20	  -1	  -1	  74.8 MiB	  0.27	  0.190594	  0.164391	  74.8 MiB	  -1	  0.05	 
+ k6_frac_N10_40nm.xml	  des.pre-vpr.blif	  common	  2.43	  vpr	  75.06 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  59	  256	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  76860	  256	  245	  954	  501	  0	  589	  560	  22	  22	  484	  -1	  mcnc_large	  -1	  -1	  10234	  7797	  51314	  1070	  11670	  38574	  75.1 MiB	  0.67	  0.01	  6.53248	  4.02447	  -785.149	  -4.02447	  nan	  0.07	  0.00226809	  0.00205398	  0.0618196	  0.0559082	  75.1 MiB	  0.67	  75.1 MiB	  0.37	  10533	  17.8829	  2862	  4.85908	  2507	  5465	  336298	  76364	  2.15576e+07	  3.17975e+06	  1.49107e+06	  3080.73	  19	  47664	  245996	  -1	  4.35047	  nan	  -842.961	  -4.35047	  0	  0	  0.22	  -1	  -1	  75.1 MiB	  0.19	  0.175627	  0.161726	  75.1 MiB	  -1	  0.07	 
+ k6_frac_N10_40nm.xml	  ex1010.pre-vpr.blif	  common	  18.05	  vpr	  102.53 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  283	  10	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  104988	  10	  10	  2659	  20	  0	  1537	  303	  22	  22	  484	  -1	  mcnc_large	  -1	  -1	  38269	  26758	  56238	  15119	  35900	  5219	  102.5 MiB	  7.03	  0.02	  10.0331	  6.59208	  -63.1998	  -6.59208	  nan	  0.16	  0.00681329	  0.00553283	  0.410131	  0.342368	  102.5 MiB	  7.03	  102.5 MiB	  4.80	  40340	  26.2459	  10213	  6.64476	  10566	  57669	  2722491	  354615	  2.15576e+07	  1.5252e+07	  3.51389e+06	  7260.09	  18	  64568	  594370	  -1	  6.59758	  nan	  -64.3078	  -6.59758	  0	  0	  0.64	  -1	  -1	  102.5 MiB	  0.98	  0.800154	  0.691255	  102.5 MiB	  -1	  0.16	 
+ k6_frac_N10_40nm.xml	  seq.pre-vpr.blif	  common	  4.80	  vpr	  75.61 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  87	  41	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  77428	  41	  35	  1006	  76	  0	  667	  163	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  11495	  7037	  12623	  1276	  5735	  5612	  75.6 MiB	  1.80	  0.01	  6.34209	  4.94158	  -140.443	  -4.94158	  nan	  0.05	  0.00182801	  0.00144126	  0.0708206	  0.0592281	  75.6 MiB	  1.80	  75.6 MiB	  1.33	  11301	  16.9430	  2961	  4.43928	  4738	  21343	  723412	  125961	  1.05632e+07	  4.68878e+06	  1.26944e+06	  4958.75	  18	  28900	  206586	  -1	  5.29948	  nan	  -148.755	  -5.29948	  0	  0	  0.19	  -1	  -1	  75.6 MiB	  0.26	  0.191646	  0.16645	  75.6 MiB	  -1	  0.05	 
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt
index 787b532b0b7..6597f69926e 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt
@@ -1,4 +1,4 @@
-arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	ap_mem	ap_time	ap_full_legalizer_mem	ap_full_legalizer_time	routed_wirelength	avg_routed_wirelength	routed_wiresegment	avg_routed_wiresegment	total_nets_routed	total_connections_routed	total_heap_pushes	total_heap_pops	logic_block_area_total	logic_block_area_used	routing_area_total	routing_area_per_tile	crit_path_route_success_iteration	num_rr_graph_nodes	num_rr_graph_edges	collapsed_nodes	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	create_rr_graph_time	create_intra_cluster_rr_graph_time	adding_internal_edges	route_mem	crit_path_route_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	router_lookahead_mem	tile_lookahead_computation_time	router_lookahead_computation_time	
-k6_frac_N10_40nm.xml	apex4.pre-vpr.blif	common	4.74	vpr	74.21 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	86	9	-1	-1	success	v8.0.0-12241-g26615cb38	release VTR_ASSERT_LEVEL=3	GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	2025-03-12T19:05:19	srivatsan-Precision-Tower-5810	/home/alex/vtr-verilog-to-routing	75996	9	19	897	28	0	768	114	16	16	256	-1	mcnc_medium	-1	-1	-1	-1	-1	-1	-1	74.2 MiB	1.66	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	74.2 MiB	1.66	74.2 MiB	1.55	17094	22.2868	4573	5.96219	5603	20605	894991	145381	1.05632e+07	4.63488e+06	1.26944e+06	4958.75	18	28900	206586	-1	6.8999	nan	-108.582	-6.8999	0	0	0.19	-1	-1	74.2 MiB	0.30	0.11634	0.103759	74.2 MiB	-1	0.05	
-k6_frac_N10_40nm.xml	des.pre-vpr.blif	common	2.23	vpr	74.90 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	53	256	-1	-1	success	v8.0.0-12241-g26615cb38	release VTR_ASSERT_LEVEL=3	GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	2025-03-12T19:05:19	srivatsan-Precision-Tower-5810	/home/alex/vtr-verilog-to-routing	76700	256	245	954	501	0	711	554	22	22	484	-1	mcnc_large	-1	-1	-1	-1	-1	-1	-1	74.9 MiB	0.48	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	74.9 MiB	0.48	74.9 MiB	0.47	14934	21.0042	3961	5.57103	3454	8241	562985	107042	2.15576e+07	2.85638e+06	1.49107e+06	3080.73	15	47664	245996	-1	5.95192	nan	-973.234	-5.95192	0	0	0.22	-1	-1	74.9 MiB	0.20	0.0953982	0.0888954	74.9 MiB	-1	0.07	
-k6_frac_N10_40nm.xml	seq.pre-vpr.blif	common	4.68	vpr	75.69 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	85	41	-1	-1	success	v8.0.0-12241-g26615cb38	release VTR_ASSERT_LEVEL=3	GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	2025-03-12T19:05:19	srivatsan-Precision-Tower-5810	/home/alex/vtr-verilog-to-routing	77504	41	35	1006	76	0	827	161	16	16	256	-1	mcnc_medium	-1	-1	-1	-1	-1	-1	-1	75.7 MiB	1.57	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	75.7 MiB	1.57	75.7 MiB	1.44	19170	23.1802	5187	6.27207	6058	23325	1081692	174542	1.05632e+07	4.58099e+06	1.26944e+06	4958.75	18	28900	206586	-1	6.76552	nan	-194.633	-6.76552	0	0	0.20	-1	-1	75.7 MiB	0.35	0.128528	0.115002	75.7 MiB	-1	0.06	
+ arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  initial_placed_wirelength_est	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  initial_placed_CPD_est	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  routed_wirelength	  avg_routed_wirelength	  routed_wiresegment	  avg_routed_wiresegment	  total_nets_routed	  total_connections_routed	  total_heap_pushes	  total_heap_pops	  logic_block_area_total	  logic_block_area_used	  routing_area_total	  routing_area_per_tile	  crit_path_route_success_iteration	  num_rr_graph_nodes	  num_rr_graph_edges	  collapsed_nodes	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  create_rr_graph_time	  create_intra_cluster_rr_graph_time	  adding_internal_edges	  route_mem	  crit_path_route_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	  router_lookahead_mem	  tile_lookahead_computation_time	  router_lookahead_computation_time	 
+ k6_frac_N10_40nm.xml	  apex4.pre-vpr.blif	  common	  4.57	  vpr	  74.60 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  80	  9	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  76392	  9	  19	  897	  28	  0	  624	  108	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  74.6 MiB	  1.65	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  74.6 MiB	  1.65	  74.6 MiB	  1.48	  14371	  23.0674	  3784	  6.07384	  4075	  16657	  665456	  103737	  1.05632e+07	  4.31152e+06	  1.26944e+06	  4958.75	  17	  28900	  206586	  -1	  6.63192	  nan	  -103.794	  -6.63192	  0	  0	  0.19	  -1	  -1	  74.6 MiB	  0.27	  0.120295	  0.107523	  74.6 MiB	  -1	  0.05	 
+ k6_frac_N10_40nm.xml	  des.pre-vpr.blif	  common	  1.93	  vpr	  75.64 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  59	  256	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  77456	  256	  245	  954	  501	  0	  589	  560	  22	  22	  484	  -1	  mcnc_large	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  75.6 MiB	  0.38	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  75.6 MiB	  0.38	  75.6 MiB	  0.37	  12828	  21.7793	  3449	  5.85569	  2290	  4763	  363294	  72848	  2.15576e+07	  3.17975e+06	  1.49107e+06	  3080.73	  12	  47664	  245996	  -1	  6.32147	  nan	  -1032.91	  -6.32147	  0	  0	  0.22	  -1	  -1	  75.6 MiB	  0.16	  0.08541	  0.0798207	  75.6 MiB	  -1	  0.07	 
+ k6_frac_N10_40nm.xml	  seq.pre-vpr.blif	  common	  4.34	  vpr	  75.52 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  87	  41	  -1	  -1	  success	  v8.0.0-12284-g0a886e4da-dirty	  release VTR_ASSERT_LEVEL=3	  GNU 13.2.0 on Linux-6.8.0-49-generic x86_64	  2025-03-19T20:42:32	  srivatsan-Precision-Tower-5810	  /home/alex/vtr-verilog-to-routing	  77332	  41	  35	  1006	  76	  0	  667	  163	  16	  16	  256	  -1	  mcnc_medium	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  75.5 MiB	  1.46	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  75.5 MiB	  1.46	  75.5 MiB	  1.27	  15928	  23.8801	  4303	  6.45127	  4201	  18009	  720686	  116311	  1.05632e+07	  4.68878e+06	  1.26944e+06	  4958.75	  16	  28900	  206586	  -1	  6.42149	  nan	  -177.756	  -6.42149	  0	  0	  0.20	  -1	  -1	  75.5 MiB	  0.28	  0.122598	  0.110096	  75.5 MiB	  -1	  0.05