verilog-to-routing · vaughnbetz · Apr 11, 2025 · Apr 3, 2025
diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
@@ -1253,6 +1253,15 @@ Analytical Placement is generally split into three stages:
 
     **Default:** ``annealer``
 
+.. option:: --ap_timing_tradeoff <float>
+
+    Controls the trade-off between wirelength (HPWL) and delay minimization in the AP flow.
+
+    A value of 0.0 makes the AP flow focus completely on wirelength minimization,
+    while a value of 1.0 makes the AP flow focus completely on timing optimization.
+
+    **Default:** ``0.5``
+
 .. option:: --ap_verbosity <int>
 
     Controls the verbosity of the AP flow output.

diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp
@@ -7,9 +7,11 @@
 
 #include "analytical_placement_flow.h"
 #include <memory>
+#include "PreClusterTimingManager.h"
 #include "analytical_solver.h"
 #include "ap_netlist.h"
 #include "atom_netlist.h"
+#include "cluster_util.h"
 #include "detailed_placer.h"
 #include "full_legalizer.h"
 #include "gen_ap_netlist_from_atoms.h"
@@ -120,6 +122,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
                                           const AtomNetlist& atom_nlist,
                                           const APNetlist& ap_netlist,
                                           const Prepacker& prepacker,
+                                          const PreClusterTimingManager& pre_cluster_timing_manager,
                                           const DeviceContext& device_ctx) {
     if (g_vpr_ctx.atom().flat_placement_info().valid) {
         VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n");
@@ -139,6 +142,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
                                                                          device_ctx.grid,
                                                                          device_ctx.logical_block_types,
                                                                          device_ctx.physical_tile_types,
+                                                                         pre_cluster_timing_manager,
+                                                                         ap_opts.ap_timing_tradeoff,
                                                                          ap_opts.log_verbosity);
         return global_placer->place();
     }
@@ -163,12 +168,25 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
                                                      constraints);
     print_ap_netlist_stats(ap_netlist);
 
+    // Pre-compute the pre-clustering timing delays. This object will be passed
+    // into the global placer and the full legalizer to make them timing driven.
+    PreClusterTimingManager pre_cluster_timing_manager(vpr_setup.PackerOpts.timing_driven,
+                                                       atom_nlist,
+                                                       g_vpr_ctx.atom().lookup(),
+                                                       prepacker,
+                                                       vpr_setup.PackerOpts.timing_update_type,
+                                                       *device_ctx.arch,
+                                                       vpr_setup.RoutingArch,
+                                                       vpr_setup.PackerOpts.device_layout,
+                                                       vpr_setup.AnalysisOpts);
+
     // Run the Global Placer.
     const t_ap_opts& ap_opts = vpr_setup.APOpts;
     PartialPlacement p_placement = run_global_placer(ap_opts,
                                                      atom_nlist,
                                                      ap_netlist,
                                                      prepacker,
+                                                     pre_cluster_timing_manager,
                                                      device_ctx);
 
     // Verify that the partial placement is valid before running the full
@@ -185,6 +203,7 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
                                                                         ap_netlist,
                                                                         atom_nlist,
                                                                         prepacker,
+                                                                        pre_cluster_timing_manager,
                                                                         vpr_setup,
                                                                         *device_ctx.arch,
                                                                         device_ctx.grid);

diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp
@@ -13,6 +13,9 @@
 #include <memory>
 #include <utility>
 #include <vector>
+#include "PreClusterTimingManager.h"
+#include "atom_netlist.h"
+#include "atom_netlist_fwd.h"
 #include "device_grid.h"
 #include "flat_placement_types.h"
 #include "partial_placement.h"
@@ -42,23 +45,39 @@
 std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
                                                          const APNetlist& netlist,
                                                          const DeviceGrid& device_grid,
+                                                         const AtomNetlist& atom_netlist,
+                                                         const PreClusterTimingManager& pre_cluster_timing_manager,
+                                                         float ap_timing_tradeoff,
                                                          int log_verbosity) {
     // Based on the solver type passed in, build the solver.
     switch (solver_type) {
         case e_ap_analytical_solver::QP_Hybrid:
 #ifdef EIGEN_INSTALLED
-            return std::make_unique<QPHybridSolver>(netlist, device_grid, log_verbosity);
+            return std::make_unique<QPHybridSolver>(netlist,
+                                                    device_grid,
+                                                    atom_netlist,
+                                                    pre_cluster_timing_manager,
+                                                    ap_timing_tradeoff,
+                                                    log_verbosity);
 #else
             (void)netlist;
             (void)device_grid;
+            (void)atom_netlist;
+            (void)pre_cluster_timing_manager;
+            (void)ap_timing_tradeoff;
             (void)log_verbosity;
             VPR_FATAL_ERROR(VPR_ERROR_AP,
                             "QP Hybrid Solver requires the Eigen library");
             break;
 #endif // EIGEN_INSTALLED
         case e_ap_analytical_solver::LP_B2B:
 #ifdef EIGEN_INSTALLED
-            return std::make_unique<B2BSolver>(netlist, device_grid, log_verbosity);
+            return std::make_unique<B2BSolver>(netlist,
+                                               device_grid,
+                                               atom_netlist,
+                                               pre_cluster_timing_manager,
+                                               ap_timing_tradeoff,
+                                               log_verbosity);
 #else
             VPR_FATAL_ERROR(VPR_ERROR_AP,
                             "LP B2B Solver requires the Eigen library");
@@ -72,10 +91,15 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
     return nullptr;
 }
 
-AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
+AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
+                                   const AtomNetlist& atom_netlist,
+                                   const PreClusterTimingManager& pre_cluster_timing_manager,
+                                   float ap_timing_tradeoff,
+                                   int log_verbosity)
     : netlist_(netlist)
     , blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
     , row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
+    , net_weights_(netlist.nets().size(), 1.0f)
     , log_verbosity_(log_verbosity) {
     // Get the number of moveable blocks in the netlist and create a unique
     // row ID from [0, num_moveable_blocks) for each moveable block in the
@@ -94,6 +118,21 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
         current_row_id++;
         num_moveable_blocks_++;
     }
+
+    if (pre_cluster_timing_manager.is_valid()) {
+        for (APNetId net_id : netlist.nets()) {
+            // Get the atom net associated with the given AP net. When
+            // constructing the AP netlist, we happen to set the name of each
+            // AP net to the same name as the atom net that generated them!
+            // TODO: Create a proper lookup structure to go from the AP Netlist
+            //       back to the Atom Netlist.
+            AtomNetId atom_net_id = atom_netlist.find_net(netlist.net_name(net_id));
+            VTR_ASSERT(atom_net_id.is_valid());
+            float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist);
+
+            net_weights_[net_id] = ap_timing_tradeoff * crit + (1.0f - ap_timing_tradeoff);
+        }
+    }
 }
 
 #ifdef EIGEN_INSTALLED
@@ -201,12 +240,15 @@ void QPHybridSolver::init_linear_system() {
     for (APNetId net_id : netlist_.nets()) {
         size_t num_pins = netlist_.net_pins(net_id).size();
         VTR_ASSERT_DEBUG(num_pins > 1);
+
+        double net_weight = net_weights_[net_id];
+
         if (num_pins > star_num_pins_threshold) {
             // Create a star node and connect each block in the net to the star
             // node.
             // Using the weight from FastPlace
             // TODO: Investigate other weight terms.
-            double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
+            double w = net_weight * static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
             size_t star_node_id = num_moveable_blocks_ + star_node_offset;
             for (APPinId pin_id : netlist_.net_pins(net_id)) {
                 APBlockId blk_id = netlist_.pin_block(pin_id);
@@ -220,7 +262,7 @@ void QPHybridSolver::init_linear_system() {
             // exactly once to every other block in the net.
             // Using the weight from FastPlace
             // TODO: Investigate other weight terms.
-            double w = 1.0 / static_cast<double>(num_pins - 1);
+            double w = net_weight * 1.0 / static_cast<double>(num_pins - 1);
             for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) {
                 APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx);
                 APBlockId first_blk_id = netlist_.pin_block(first_pin_id);
@@ -638,6 +680,7 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id,
 void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
                                          APBlockId second_blk_id,
                                          size_t num_pins,
+                                         double net_w,
                                          const vtr::vector<APBlockId, double>& blk_locs,
                                          std::vector<Eigen::Triplet<double>>& triplet_list,
                                          Eigen::VectorXd& b) {
@@ -660,7 +703,7 @@ void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
     // The denominator of weight is zero, which causes infinity term in the matrix. Another way of
     // interpreting epsilon is the minimum distance two nodes are considered to be in placement.
     double dist = std::max(std::abs(blk_locs[first_blk_id] - blk_locs[second_blk_id]), distance_epsilon_);
-    double w = (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
+    double w = net_w * (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
 
     // Update the connectivity matrix and the constant vector.
     // This is similar to how connections are added for the quadratic formulation.
@@ -696,6 +739,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
         size_t num_pins = netlist_.net_pins(net_id).size();
         VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");
 
+        double net_w = net_weights_[net_id];
+
         // Find the bounding blocks
         APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_);
 
@@ -706,19 +751,19 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
         for (APPinId pin_id : netlist_.net_pins(net_id)) {
             APBlockId blk_id = netlist_.pin_block(pin_id);
             if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) {
-                add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
-                add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
+                add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
+                add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
             }
             if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) {
-                add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
-                add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
+                add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
+                add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
             }
         }
 
         // Connect the bounds to each other. Its just easier to put these here
         // instead of in the for loop above.
-        add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
-        add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
+        add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
+        add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
     }
 
     // Build the sparse connectivity matrices from the triplets.

diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h
@@ -31,6 +31,8 @@
 // Forward declarations
 class PartialPlacement;
 class APNetlist;
+class AtomNetlist;
+class PreClusterTimingManager;
 
 /**
  * @brief A strong ID for the rows in a matrix used during solving.
@@ -60,7 +62,11 @@ class AnalyticalSolver {
      * Initializes the internal data members of the base class which are useful
      * for all solvers.
      */
-    AnalyticalSolver(const APNetlist& netlist, int log_verbosity);
+    AnalyticalSolver(const APNetlist& netlist,
+                     const AtomNetlist& atom_netlist,
+                     const PreClusterTimingManager& pre_cluster_timing_manager,
+                     float ap_timing_tradeoff,
+                     int log_verbosity);
 
     /**
      * @brief Run an iteration of the solver using the given partial placement
@@ -113,6 +119,12 @@ class AnalyticalSolver {
     ///        solver.
     vtr::vector<APRowId, APBlockId> row_id_to_blk_id_;
 
+    /// @brief The base weight of each net in the AP netlist. This weight can
+    ///        be used to make the solver more interested in some nets over
+    ///        others. These weights can be any positive value, but are often
+    ///        between 0 and 1.
+    vtr::vector<APNetId, float> net_weights_;
+
     /// @brief The verbosity of log messages in the Analytical Solver.
     int log_verbosity_;
 };
@@ -123,6 +135,9 @@ class AnalyticalSolver {
 std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
                                                          const APNetlist& netlist,
                                                          const DeviceGrid& device_grid,
+                                                         const AtomNetlist& atom_netlist,
+                                                         const PreClusterTimingManager& pre_cluster_timing_manager,
+                                                         float ap_timing_tradeoff,
                                                          int log_verbosity);
 
 // The Eigen library is used to solve matrix equations in the following solvers.
@@ -278,8 +293,11 @@ class QPHybridSolver : public AnalyticalSolver {
      */
     QPHybridSolver(const APNetlist& netlist,
                    const DeviceGrid& device_grid,
+                   const AtomNetlist& atom_netlist,
+                   const PreClusterTimingManager& pre_cluster_timing_manager,
+                   float ap_timing_tradeoff,
                    int log_verbosity)
-        : AnalyticalSolver(netlist, log_verbosity) {
+        : AnalyticalSolver(netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity) {
         // Initializing the linear system only depends on the netlist and fixed
         // block locations. Both are provided by the netlist, allowing this to
         // be initialized in the constructor.
@@ -411,8 +429,11 @@ class B2BSolver : public AnalyticalSolver {
   public:
     B2BSolver(const APNetlist& ap_netlist,
               const DeviceGrid& device_grid,
+              const AtomNetlist& atom_netlist,
+              const PreClusterTimingManager& pre_cluster_timing_manager,
+              float ap_timing_tradeoff,
               int log_verbosity)
-        : AnalyticalSolver(ap_netlist, log_verbosity)
+        : AnalyticalSolver(ap_netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity)
         , device_grid_width_(device_grid.width())
         , device_grid_height_(device_grid.height()) {}
 
@@ -503,6 +524,7 @@ class B2BSolver : public AnalyticalSolver {
     void add_connection_to_system(APBlockId first_blk_id,
                                   APBlockId second_blk_id,
                                   size_t num_pins,
+                                  double net_w,
                                   const vtr::vector<APBlockId, double>& blk_locs,
                                   std::vector<Eigen::Triplet<double>>& triplet_list,
                                   Eigen::VectorXd& b);