diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp
index 35b65f4062..cf518ec1b0 100644
--- a/vpr/src/analytical_place/analytical_solver.cpp
+++ b/vpr/src/analytical_place/analytical_solver.cpp
@@ -21,8 +21,11 @@
 #include "flat_placement_types.h"
 #include "partial_placement.h"
 #include "ap_netlist.h"
+#include "place_delay_model.h"
+#include "timing_info.h"
 #include "vpr_error.h"
 #include "vtr_assert.h"
+#include "vtr_math.h"
 #include "vtr_time.h"
 #include "vtr_vector.h"
 
@@ -49,6 +52,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
                                                          const DeviceGrid& device_grid,
                                                          const AtomNetlist& atom_netlist,
                                                          const PreClusterTimingManager& pre_cluster_timing_manager,
+                                                         std::shared_ptr<PlaceDelayModel> place_delay_model,
                                                          float ap_timing_tradeoff,
                                                          unsigned num_threads,
                                                          int log_verbosity) {
@@ -81,6 +85,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
             (void)device_grid;
             (void)atom_netlist;
             (void)pre_cluster_timing_manager;
+            (void)place_delay_model;
             (void)ap_timing_tradeoff;
             (void)log_verbosity;
             VPR_FATAL_ERROR(VPR_ERROR_AP,
@@ -93,6 +98,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
                                                device_grid,
                                                atom_netlist,
                                                pre_cluster_timing_manager,
+                                               place_delay_model,
                                                ap_timing_tradeoff,
                                                log_verbosity);
 #else
@@ -110,7 +116,6 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
 
 AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
                                    const AtomNetlist& atom_netlist,
-                                   const PreClusterTimingManager& pre_cluster_timing_manager,
                                    const DeviceGrid& device_grid,
                                    float ap_timing_tradeoff,
                                    int log_verbosity)
@@ -160,40 +165,6 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
         current_row_id++;
         num_moveable_blocks_++;
     }
-
-    update_net_weights(pre_cluster_timing_manager);
-}
-
-void AnalyticalSolver::update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) {
-    // If the pre-cluster timing manager has not been initialized (i.e. timing
-    // analysis is off), no need to update.
-    if (!pre_cluster_timing_manager.is_valid())
-        return;
-
-    // For each of the nets, update the net weights.
-    for (APNetId net_id : netlist_.nets()) {
-        // Note: To save time, we do not compute the weights of nets that we
-        //       do not care about for AP. This leaves their weights at 1.0 just
-        //       in case they are accidentally used.
-        if (netlist_.net_is_ignored(net_id))
-            continue;
-
-        AtomNetId atom_net_id = netlist_.net_atom_net(net_id);
-        VTR_ASSERT_SAFE(atom_net_id.is_valid());
-
-        float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist_);
-
-        // When optimizing for WL, the net weights are just set to 1 (meaning
-        // that we want to minimize the WL of nets).
-        // When optimizing for timing, the net weights are set to the timing
-        // criticality, which is based on the lowest slack of any edge belonging
-        // to this net.
-        // The intuition is that we care more about shrinking the wirelength of
-        // more critical connections than less critical ones.
-        // Use the AP timing trade-off term to linearly interpolate between these
-        // weighting terms.
-        net_weights_[net_id] = ap_timing_tradeoff_ * crit + (1.0f - ap_timing_tradeoff_);
-    }
 }
 
 #ifdef EIGEN_INSTALLED
@@ -524,6 +495,41 @@ void QPHybridSolver::store_solution_into_placement(const Eigen::VectorXd& x_soln
     }
 }
 
+void QPHybridSolver::update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) {
+    // For the quadratic solver, we use a basic net weighting scheme for adding
+    // timing to the objective.
+
+    // If the pre-cluster timing manager has not been initialized (i.e. timing
+    // analysis is off), no need to update.
+    if (!pre_cluster_timing_manager.is_valid())
+        return;
+
+    // For each of the nets, update the net weights.
+    for (APNetId net_id : netlist_.nets()) {
+        // Note: To save time, we do not compute the weights of nets that we
+        //       do not care about for AP. This leaves their weights at 1.0 just
+        //       in case they are accidentally used.
+        if (netlist_.net_is_ignored(net_id))
+            continue;
+
+        AtomNetId atom_net_id = netlist_.net_atom_net(net_id);
+        VTR_ASSERT_SAFE(atom_net_id.is_valid());
+
+        float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist_);
+
+        // When optimizing for WL, the net weights are just set to 1 (meaning
+        // that we want to minimize the WL of nets).
+        // When optimizing for timing, the net weights are set to the timing
+        // criticality, which is based on the lowest slack of any edge belonging
+        // to this net.
+        // The intuition is that we care more about shrinking the wirelength of
+        // more critical connections than less critical ones.
+        // Use the AP timing trade-off term to linearly interpolate between these
+        // weighting terms.
+        net_weights_[net_id] = ap_timing_tradeoff_ * crit + (1.0f - ap_timing_tradeoff_);
+    }
+}
+
 void QPHybridSolver::print_statistics() {
     VTR_LOG("QP-Hybrid Solver Statistics:\n");
     VTR_LOG("\tTotal number of CG iterations: %u\n", total_num_cg_iters_);
@@ -645,7 +651,7 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement
 
         // Set up the linear system, including anchor points.
         float build_linear_system_start_time = runtime_timer.elapsed_sec();
-        init_linear_system(p_placement);
+        init_linear_system(p_placement, iteration);
         if (iteration != 0)
             update_linear_system_with_anchors(iteration);
         total_time_spent_building_linear_system_ += runtime_timer.elapsed_sec() - build_linear_system_start_time;
@@ -850,7 +856,180 @@ void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
     }
 }
 
-void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
+// Use Finite Differences to compute derivative.
+std::pair<double, double> B2BSolver::get_delay_derivative(APBlockId driver_blk,
+                                                          APBlockId sink_blk,
+                                                          const PartialPlacement& p_placement) {
+
+    // Get the flat distance from the driver block to the sink block.
+    // NOTE: Here we take the magnitude of the difference since we assume that
+    //       the delay is symmetric (same delay regardless if you are going left
+    //       or right for example). This simplifies the code below some by having
+    //       us only focus on the positive axis.
+    float flat_dx = std::abs(p_placement.block_x_locs[sink_blk] - p_placement.block_x_locs[driver_blk]);
+    float flat_dy = std::abs(p_placement.block_y_locs[sink_blk] - p_placement.block_y_locs[driver_blk]);
+
+    // TODO: Handle 3D FPGAs for this method.
+    int layer_num = 0;
+    VTR_ASSERT_SAFE_MSG(p_placement.block_layer_nums[driver_blk] == layer_num && p_placement.block_layer_nums[sink_blk] == layer_num,
+                        "3D FPGAs not supported yet in the B2B solver");
+
+    // Get the physical tile location of the legalized driver block. The PlaceDelayModel
+    // may use this position to determine the physical tile the wire is coming from.
+    // When the placement is being solved, the driver may be moved to a physical tile
+    // which cannot implement any wires and the delays become infinite. By using the
+    // legalized position of the driver block, we ensure that the delays always exist
+    // (assuming the partial legalizer only places blocks in locations that a block
+    // can be implemented, which it currently does).
+    t_physical_tile_loc driver_block_loc(block_x_locs_legalized[driver_blk],
+                                         block_y_locs_legalized[driver_blk],
+                                         layer_num);
+
+    // Get the physical tle location of the sink block, relative to the driver block.
+    // Based on the current implementation of the PlaceDelayModel, the location of this
+    // block does not actually matter, only the difference in x and y position is used.
+    // Hence, it is ok if this position is off the device, so long as the difference
+    // in x/y is not larger than the width/height of the device.
+    t_physical_tile_loc sink_block_loc(driver_block_loc.x + flat_dx,
+                                       driver_block_loc.y + flat_dy,
+                                       layer_num);
+
+    int tile_dx = sink_block_loc.x - driver_block_loc.x;
+    int tile_dy = sink_block_loc.y - driver_block_loc.y;
+    VTR_ASSERT_SAFE(tile_dx < (int)device_grid_width_);
+    VTR_ASSERT_SAFE(tile_dy < (int)device_grid_height_);
+
+    // Get the delay of a wire going from the given driver block location to the
+    // given sink block location. This should only use the physical tile type of
+    // the driver block location and the dx / dy of the positions to compute
+    // delay.
+    float current_edge_delay = place_delay_model_->delay(driver_block_loc,
+                                                         0 /*from_pin*/,
+                                                         sink_block_loc,
+                                                         0 /*to_pin*/);
+
+    // Get the delays of going from the driver block to the blocks directly
+    // surrounding the sink block (one tile above, below, left, and right).
+    // These will be used to compute the derivative.
+    t_physical_tile_loc right_block_loc(sink_block_loc.x + 1,
+                                        sink_block_loc.y,
+                                        sink_block_loc.layer_num);
+    t_physical_tile_loc left_block_loc(sink_block_loc.x - 1,
+                                       sink_block_loc.y,
+                                       sink_block_loc.layer_num);
+    t_physical_tile_loc upper_block_loc(sink_block_loc.x,
+                                        sink_block_loc.y + 1,
+                                        sink_block_loc.layer_num);
+    t_physical_tile_loc lower_block_loc(sink_block_loc.x,
+                                        sink_block_loc.y - 1,
+                                        sink_block_loc.layer_num);
+
+    float right_edge_delay = place_delay_model_->delay(driver_block_loc,
+                                                       0 /*from_pin*/,
+                                                       right_block_loc,
+                                                       0 /*to_pin*/);
+    float left_edge_delay = place_delay_model_->delay(driver_block_loc,
+                                                      0 /*from_pin*/,
+                                                      left_block_loc,
+                                                      0 /*to_pin*/);
+    float upper_edge_delay = place_delay_model_->delay(driver_block_loc,
+                                                       0 /*from_pin*/,
+                                                       upper_block_loc,
+                                                       0 /*to_pin*/);
+    float lower_edge_delay = place_delay_model_->delay(driver_block_loc,
+                                                       0 /*from_pin*/,
+                                                       lower_block_loc,
+                                                       0 /*to_pin*/);
+
+    // Use Finite Differences to compute the instantanious derivative of delay
+    // with respect to tile position at this current distance from the driver
+    // block to the sink block.
+    //
+    // Finite Differences are used to compute the derivative of a discrete
+    // function at a given point.
+    //
+    // To compute the derivative of a discrete function at a point, we get the
+    // difference in delay one tile ahead of the current point (the forward
+    // difference) and one tile behind the current point (the backward difference).
+    // We can then approximate the derivative by averaging the forward and backward
+    // differences to get what is called the central difference.
+    float forward_difference_x = right_edge_delay - current_edge_delay;
+    float backward_difference_x = current_edge_delay - left_edge_delay;
+    float central_difference_x = (forward_difference_x + backward_difference_x) / 2.0f;
+
+    float forward_difference_y = upper_edge_delay - current_edge_delay;
+    float backward_difference_y = current_edge_delay - lower_edge_delay;
+    float central_difference_y = (forward_difference_y + backward_difference_y) / 2.0f;
+
+    // Set the resulting derivative to be equal to the central difference.
+    float d_delay_x = central_difference_x;
+    float d_delay_y = central_difference_y;
+
+    // For approximating the derivative of our PlaceDelayModel, there is a special
+    // case when the distance between the driver and sink are 0 in x or y. Since
+    // our delay models are symmetric, the forward and backward difference will
+    // be equal in magnitude and opposite. This means the central difference will
+    // be 0. This is not good since it would cause the objective to ignore the
+    // delay of blocks within the same cluster (making them more incentivized to
+    // not be in the same cluster together). To prevent this, we set the derivative
+    // to be the forward difference in that case. It must be the forward difference
+    // since the backward difference will likely be negative. This basically sets
+    // the derivative to be the penalty for putting the driver and sink in different
+    // tiles.
+    if (tile_dx == 0) {
+        VTR_ASSERT_SAFE_MSG(forward_difference_x == -1.0f * backward_difference_x,
+                            "Delay model expected to be symmetric");
+        d_delay_x = forward_difference_x;
+    }
+    if (tile_dy == 0) {
+        VTR_ASSERT_SAFE_MSG(forward_difference_y == -1.0f * backward_difference_y,
+                            "Delay model expected to be symmetric");
+        d_delay_y = forward_difference_y;
+    }
+
+    return std::make_pair(d_delay_x, d_delay_y);
+}
+
+std::pair<double, double> B2BSolver::get_delay_normalization_facs(APBlockId driver_blk) {
+    // We want to find normalization factors for the delays along connections.
+    // A simple normalization factor to use is 1 over the delay of leaving a
+    // tile. This should be able to remove the units without changing the value
+    // too much.
+
+    // Similar to calcuting the derivative, we want to use the legalized position
+    // of the driver block to try and estimate the delay from that block type.
+    t_physical_tile_loc driver_block_loc(block_x_locs_legalized[driver_blk],
+                                         block_y_locs_legalized[driver_blk],
+                                         0 /*layer_num*/);
+
+    // Get the delay of exiting the block.
+    double norm_fac_inv_x = place_delay_model_->delay(driver_block_loc,
+                                                      0 /*from_pin*/,
+                                                      {driver_block_loc.x + 1, driver_block_loc.y, driver_block_loc.layer_num},
+                                                      0 /*to_pin*/);
+    double norm_fac_inv_y = place_delay_model_->delay(driver_block_loc,
+                                                      0 /*from_pin*/,
+                                                      {driver_block_loc.x, driver_block_loc.y + 1, driver_block_loc.layer_num},
+                                                      0 /*to_pin*/);
+
+    // Normalization factors are expected to be non-negative.
+    VTR_ASSERT_SAFE(norm_fac_inv_x >= 0.0);
+    VTR_ASSERT_SAFE(norm_fac_inv_y >= 0.0);
+
+    // The normalization factors will become infinite if we divide by 0 delay.
+    // If the normalization factor is near 0, just set it to 1e-9 (or on the order
+    // of a nanosecond). This should not be hit, but this is just a safety to
+    // prevent infinities from entering the objective by mistake.
+    if (vtr::isclose(norm_fac_inv_x, 0.0))
+        norm_fac_inv_x = 1e-9;
+    if (vtr::isclose(norm_fac_inv_y, 0.0))
+        norm_fac_inv_y = 1e-9;
+
+    // Return the normalization factors.
+    return std::make_pair(1.0 / norm_fac_inv_x, 1.0 / norm_fac_inv_y);
+}
+
+void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned iteration) {
     // Reset the linear system
     A_sparse_x = Eigen::SparseMatrix<double>(num_moveable_blocks_, num_moveable_blocks_);
     A_sparse_y = Eigen::SparseMatrix<double>(num_moveable_blocks_, num_moveable_blocks_);
@@ -871,7 +1050,10 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
         size_t num_pins = netlist_.net_pins(net_id).size();
         VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");
 
-        double net_w = net_weights_[net_id];
+        // ====================================================================
+        // Wirelength Connections
+        // ====================================================================
+        double wl_net_w = (1.0f - ap_timing_tradeoff_) * net_weights_[net_id];
 
         // Find the bounding blocks
         APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_);
@@ -883,19 +1065,85 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
         for (APPinId pin_id : netlist_.net_pins(net_id)) {
             APBlockId blk_id = netlist_.pin_block(pin_id);
             if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) {
-                add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
-                add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
+                add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x);
+                add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x);
             }
             if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) {
-                add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
-                add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
+                add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y);
+                add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y);
             }
         }
 
         // Connect the bounds to each other. Its just easier to put these here
         // instead of in the for loop above.
-        add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
-        add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
+        add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x);
+        add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y);
+
+        // ====================================================================
+        // Timing Connections
+        // ====================================================================
+        // Only add timing connection if timing analysis is on and we are not
+        // in the first iteration. The current timing flow needs legalized
+        // positions to compute the delay derivative, which do not exist until
+        // the next iteration. Its fine to do one wirelength driven iteration first.
+        if (pre_cluster_timing_manager_.is_valid() && iteration != 0) {
+            // Create connections from each driver pin to each of its sink pins.
+            // This will incentivize shrinking the distance from drivers to sinks
+            // of connections which would improve the timing.
+            APPinId driver_pin = netlist_.net_driver(net_id);
+            APBlockId driver_blk = netlist_.pin_block(driver_pin);
+            for (APPinId net_pin : netlist_.net_pins(net_id)) {
+                if (net_pin == driver_pin)
+                    continue;
+                APBlockId sink_blk = netlist_.pin_block(net_pin);
+
+                // Get the instantaneous derivative of delay at the given distance
+                // from driver to sink. This will provide a value which is higher
+                // if the tradeoff between delay and wirelength is better, and
+                // lower when the tradeoff between delay and wirelength is worse.
+                auto [d_delay_x, d_delay_y] = get_delay_derivative(driver_blk,
+                                                                   sink_blk,
+                                                                   p_placement);
+
+                // Since the delay between two blocks may not monotonically increase
+                // (it may go down with distance due to different length wires), it
+                // is possible for the derivative of delay to be negative. The weight
+                // terms in this formulation should not be negative to prevent infinite
+                // answers. To prevent this, clamp the derivative to 0.
+                // TODO: If this is negative, it means that the sink should try to move
+                //       away from the driver. Perhaps add an anchor point to pull the
+                //       sink away.
+                if (d_delay_x < 0)
+                    d_delay_x = 0;
+                if (d_delay_y < 0)
+                    d_delay_y = 0;
+
+                // The units for delay is in seconds; however the units for
+                // the wirelength term is in tile. To ensure the units match,
+                // we need to normalize away the time units. Get normalization
+                // factors to remove the time units.
+                auto [delay_x_norm, delay_y_norm] = get_delay_normalization_facs(driver_blk);
+
+                // Get the criticality of this timing edge from driver to sink.
+                double crit = pre_cluster_timing_manager_.get_timing_info().setup_pin_criticality(netlist_.pin_atom_pin(net_pin));
+
+                // Set the weight of the connection from driver to sink equal to:
+                //      weight_tradeoff_terms * (1 + crit) * d_delay * delay_norm
+                // The intuition is that we want the solver to shrink the distance
+                // from drivers to sinks (which would improve timing) for edges
+                // with the best tradeoff between delay and wire, with a focus
+                // on the more critical edges.
+                double timing_net_w = ap_timing_tradeoff_ * net_weights_[net_id] * timing_slope_fac_ * (1.0 + crit);
+
+                add_connection_to_system(driver_blk, sink_blk,
+                                         2 /*num_pins*/, timing_net_w * d_delay_x * delay_x_norm,
+                                         p_placement.block_x_locs, triplet_list_x, b_x);
+
+                add_connection_to_system(driver_blk, sink_blk,
+                                         2 /*num_pins*/, timing_net_w * d_delay_y * delay_y_norm,
+                                         p_placement.block_y_locs, triplet_list_y, b_y);
+            }
+        }
     }
 
     // Build the sparse connectivity matrices from the triplets.
@@ -956,6 +1204,13 @@ void B2BSolver::store_solution_into_placement(Eigen::VectorXd& x_soln,
     }
 }
 
+void B2BSolver::update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) {
+    (void)pre_cluster_timing_manager;
+    // Currently does not do anything. Eventually should investigate updating the
+    // net weights.
+    return;
+}
+
 void B2BSolver::print_statistics() {
     VTR_LOG("B2B Solver Statistics:\n");
     VTR_LOG("\tTotal number of CG iterations: %u\n", total_num_cg_iters_);
diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h
index cd14660dc2..0fd7407a95 100644
--- a/vpr/src/analytical_place/analytical_solver.h
+++ b/vpr/src/analytical_place/analytical_solver.h
@@ -11,6 +11,7 @@
 #include "ap_flow_enums.h"
 #include "ap_netlist.h"
 #include "device_grid.h"
+#include "place_delay_model.h"
 #include "vtr_strong_id.h"
 #include "vtr_vector.h"
 
@@ -62,7 +63,6 @@ class AnalyticalSolver {
      */
     AnalyticalSolver(const APNetlist& netlist,
                      const AtomNetlist& atom_netlist,
-                     const PreClusterTimingManager& pre_cluster_timing_manager,
                      const DeviceGrid& device_grid,
                      float ap_timing_tradeoff,
                      int log_verbosity);
@@ -101,7 +101,7 @@ class AnalyticalSolver {
      *  @param pre_cluster_timing_manager
      *      The timing manager which manages the criticalities of the nets.
      */
-    void update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager);
+    virtual void update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) = 0;
 
   protected:
     /// @brief The APNetlist the solver is optimizing over. It is implied that
@@ -168,6 +168,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
                                                          const DeviceGrid& device_grid,
                                                          const AtomNetlist& atom_netlist,
                                                          const PreClusterTimingManager& pre_cluster_timing_manager,
+                                                         std::shared_ptr<PlaceDelayModel> place_delay_model,
                                                          float ap_timing_tradeoff,
                                                          unsigned num_threads,
                                                          int log_verbosity);
@@ -331,13 +332,18 @@ class QPHybridSolver : public AnalyticalSolver {
                    int log_verbosity)
         : AnalyticalSolver(netlist,
                            atom_netlist,
-                           pre_cluster_timing_manager,
                            device_grid,
                            ap_timing_tradeoff,
                            log_verbosity) {
+        // Update the net weights. These net weights are used when the linear
+        // system is initialized.
+        update_net_weights(pre_cluster_timing_manager);
+
         // Initializing the linear system only depends on the netlist and fixed
         // block locations. Both are provided by the netlist, allowing this to
         // be initialized in the constructor.
+        // TODO: Investigate re-initializing the linear system every so often
+        //       given changes in the net weights / timing.
         init_linear_system();
 
         // Initialize the guesses for the first iteration.
@@ -366,6 +372,14 @@ class QPHybridSolver : public AnalyticalSolver {
      */
     void solve(unsigned iteration, PartialPlacement& p_placement) final;
 
+    /**
+     * @brief Update the net weights according to the criticality of the nets.
+     *
+     *  @param pre_cluster_timing_manager
+     *      The timing manager which manages the criticalities of the nets.
+     */
+    void update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) final;
+
     /**
      * @brief Print statistics of the solver.
      */
@@ -463,19 +477,26 @@ class B2BSolver : public AnalyticalSolver {
     ///        weights to grow slower.
     static constexpr double anchor_weight_exp_fac_ = 5.0;
 
+    /// @brief Factor for controlling the strength of the timing term in the
+    ///        objective relative to the wirelength term. By increasing this
+    ///        number, the solver will focus more on timing and less on wirelength.
+    static constexpr double timing_slope_fac_ = 0.75;
+
   public:
     B2BSolver(const APNetlist& ap_netlist,
               const DeviceGrid& device_grid,
               const AtomNetlist& atom_netlist,
               const PreClusterTimingManager& pre_cluster_timing_manager,
+              std::shared_ptr<PlaceDelayModel> place_delay_model,
               float ap_timing_tradeoff,
               int log_verbosity)
         : AnalyticalSolver(ap_netlist,
                            atom_netlist,
-                           pre_cluster_timing_manager,
                            device_grid,
                            ap_timing_tradeoff,
-                           log_verbosity) {}
+                           log_verbosity)
+        , pre_cluster_timing_manager_(pre_cluster_timing_manager)
+        , place_delay_model_(place_delay_model) {}
 
     /**
      * @brief Perform an iteration of the B2B solver, storing the result into
@@ -503,6 +524,13 @@ class B2BSolver : public AnalyticalSolver {
      */
     void solve(unsigned iteration, PartialPlacement& p_placement) final;
 
+    /**
+     * @brief Update the net weights. Currently unused by the B2B solver.
+     *
+     * TODO: Investigate weighting by some factor of fanout.
+     */
+    void update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager);
+
     /**
      * @brief Print overall statistics on this solver.
      *
@@ -569,6 +597,38 @@ class B2BSolver : public AnalyticalSolver {
                                   std::vector<Eigen::Triplet<double>>& triplet_list,
                                   Eigen::VectorXd& b);
 
+    /**
+     * @brief Get the instantaneous derivative of delay for the given driver
+     *        and sink pair.
+     *
+     * The instantaneous derivative gives the amount delay would increase or
+     * decrease for a change in distance. This is passed into the objective
+     * function to help guide the solver to trading off timing and wirelength.
+     *
+     *  @param driver_blk
+     *      The driver block for the edge to get the derivative of.
+     *  @param sink_blk
+     *      The sink block for the edge to get the derivative of.
+     *  @param p_placement
+     *      The current placement of the AP blocks. Used to get the current
+     *      distance from the driver to the sink.
+     *
+     *  @return The instantaneous derivative of delay with respect to distance
+     *          in the x and y dimensions respectively.
+     */
+    std::pair<double, double> get_delay_derivative(APBlockId driver_blk,
+                                                   APBlockId sink_blk,
+                                                   const PartialPlacement& p_placement);
+
+    /**
+     * @brief Get normalization factors to normalize away time units out of the
+     *        objective.
+     *
+     *  @param driver_blk
+     *      The driver block of the edge to normalize the objecive for.
+     */
+    std::pair<double, double> get_delay_normalization_facs(APBlockId driver_blk);
+
     /**
      * @brief Initializes the linear system with the given partial placement.
      *
@@ -580,7 +640,7 @@ class B2BSolver : public AnalyticalSolver {
      * This will set the connectivity matrices (A) and constant vectors (b) to
      * be solved by B2B.
      */
-    void init_linear_system(PartialPlacement& p_placement);
+    void init_linear_system(PartialPlacement& p_placement, unsigned iteration);
 
     /**
      * @brief Updates the linear system with anchor-blocks from the legalized
@@ -642,6 +702,14 @@ class B2BSolver : public AnalyticalSolver {
     ///        loop so far. This includes creating the CG solver object and
     ///        actually solving for a solution.
     float total_time_spent_solving_linear_system_ = 0.0f;
+
+    /// @brief Timing manager object used for calculating the criticality of
+    ///        edges in the graph.
+    const PreClusterTimingManager& pre_cluster_timing_manager_;
+
+    /// @breif The place delay model used for calculating the delay between
+    ///        to tiles on the FPGA. Used for computing the timing terms.
+    std::shared_ptr<PlaceDelayModel> place_delay_model_;
 };
 
 #endif // EIGEN_INSTALLED
diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp
index 79221963e7..ebc00cd9c0 100644
--- a/vpr/src/analytical_place/global_placer.cpp
+++ b/vpr/src/analytical_place/global_placer.cpp
@@ -95,6 +95,7 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_ap_analytical_solver analytical_solver_ty
                                      device_grid,
                                      atom_netlist,
                                      pre_cluster_timing_manager_,
+                                     place_delay_model_,
                                      ap_timing_tradeoff,
                                      num_threads,
                                      log_verbosity_);