[AP] Created AP Netlist and Fixed Bug in B2B

AlexandreSinger · AlexandreSinger · commit 74c7c7ac1f09 · 2024-07-16T16:22:56.000-04:00
Instead of iterating over the AtomNetlist, created our own netlist
object which is a vector of vectors. This can be made much more
efficient by using the Netlist class as a base class; however, doing
this now to make is easier.

In the process of implementing this feature, fixed a bug in the B2B
formulation which was causing it not to converge.
diff --git a/vpr/src/place/analytical_placement/AnalyticalSolver.cpp b/vpr/src/place/analytical_placement/AnalyticalSolver.cpp
@@ -9,16 +9,12 @@
 #include <limits>
 #include <memory>
 #include <random>
-#include <unordered_set>
 #include <utility>
 #include <vector>
 #include "PartialPlacement.h"
-#include "atom_netlist.h"
 #include "globals.h"
-#include "partition_region.h"
 #include "vpr_context.h"
 #include "vpr_error.h"
-#include "vpr_types.h"
 #include "vtr_assert.h"
 
 std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type) {
@@ -96,11 +92,9 @@ static inline void populate_hybrid_matrix(Eigen::SparseMatrix<double> &A_sparse,
                                           Eigen::VectorXd &b_y,
                                           PartialPlacement& p_placement) {
     size_t num_star_nodes = 0;
-    const AtomNetlist& netlist = p_placement.atom_netlist;
-    for (AtomNetId net_id : netlist.nets()) {
-        if (p_placement.net_is_ignored_for_placement(net_id))
-            continue;
-        if (netlist.net_pins(net_id).size() > 3)
+    for (const std::vector<size_t> &net : p_placement.ap_netlist) {
+        size_t num_pins = net.size();
+        if (num_pins > 3)
             num_star_nodes++;
     }
 
@@ -109,28 +103,19 @@ static inline void populate_hybrid_matrix(Eigen::SparseMatrix<double> &A_sparse,
     b_x = Eigen::VectorXd::Zero(num_moveable_nodes + num_star_nodes);
     b_y = Eigen::VectorXd::Zero(num_moveable_nodes + num_star_nodes);
 
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-
+    size_t num_nets = p_placement.ap_netlist.size();
     std::vector<Eigen::Triplet<double>> tripletList;
-    tripletList.reserve(num_moveable_nodes * netlist.nets().size());
+    tripletList.reserve(num_moveable_nodes * num_nets);
 
     size_t star_node_offset = 0;
-    // FIXME: Instead of iterating over the whole nelist and reverse looking up
-    //        it may make more sense to pre-compute the netlist.
-    for (AtomNetId net_id : netlist.nets()) {
-        if (p_placement.net_is_ignored_for_placement(net_id))
-            continue;
-        int num_pins = netlist.net_pins(net_id).size();
+    for (const std::vector<size_t> &net : p_placement.ap_netlist) {
+        size_t num_pins = net.size();
         VTR_ASSERT(num_pins > 1);
         if (num_pins > 3) {
-            // FIXME: THIS WAS DIRECTLY COPIED FROM THE STAR FORMULATION. MOVE TO OWN FUNCTION.
-            //          (with the exeption of the star node offset).
             // Using the weight from FastPlace
             double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
             size_t star_node_id = num_moveable_nodes + star_node_offset;
-            for (AtomPinId pin_id : netlist.net_pins(net_id)) {
-                AtomBlockId blk_id = netlist.pin_block(pin_id);
-                size_t node_id = p_placement.get_node_id_from_blk(blk_id, atom_ctx.atom_molecules);
+            for (size_t node_id : net) {
                 // Note: the star node is always moveable
                 if (p_placement.is_moveable_node(node_id)) {
                     tripletList.emplace_back(star_node_id, star_node_id, w);
@@ -145,19 +130,13 @@ static inline void populate_hybrid_matrix(Eigen::SparseMatrix<double> &A_sparse,
             }
             star_node_offset++;
         } else {
-            // FIXME: THIS WAS DIRECTLY COPIED FROM THE CLIQUE FORMULATION. MOVE TO OWN FUNCTION.
             // Using the weight from FastPlace
             double w = 1.0 / static_cast<double>(num_pins - 1);
 
-            for (int ipin = 0; ipin < num_pins; ipin++) {
-                // FIXME: Is it possible for two pins to be connected to the same block?
-                //        I am wondering if this doesnt matter because it would appear as tho
-                //        this block really wants to be connected lol.
-                AtomBlockId first_block_id = netlist.net_pin_block(net_id, ipin);
-                size_t first_node_id = p_placement.get_node_id_from_blk(first_block_id, atom_ctx.atom_molecules);
-                for (int jpin = ipin + 1; jpin < num_pins; jpin++) {
-                    AtomBlockId second_block_id = netlist.net_pin_block(net_id, jpin);
-                    size_t second_node_id = p_placement.get_node_id_from_blk(second_block_id, atom_ctx.atom_molecules);
+            for (size_t inode_idx = 0; inode_idx < num_pins; inode_idx++) {
+                size_t first_node_id = net[inode_idx];
+                for (size_t jnode_idx = inode_idx + 1; jnode_idx < num_pins; jnode_idx++) {
+                    size_t second_node_id = net[jnode_idx];
                     // Make sure that the first node is moveable. This makes creating the connection easier.
                     if (!p_placement.is_moveable_node(first_node_id)) {
                         if (!p_placement.is_moveable_node(second_node_id)) {
@@ -268,7 +247,7 @@ void B2BSolver::initialize_placement_least_dense(PartialPlacement &p_placement)
 }
 
 // This function return the two nodes on the bound of a netlist, (max, min)
-std::pair<size_t, size_t> B2BSolver::boundNode(std::vector<size_t>& node_ids, std::vector<double>& node_locs){
+std::pair<size_t, size_t> B2BSolver::boundNode(const std::vector<size_t>& node_ids, const std::vector<double>& node_locs){
     auto compare = [&node_locs](size_t a, size_t b) {
         return node_locs[a] < node_locs[b];
     };
@@ -279,54 +258,36 @@ std::pair<size_t, size_t> B2BSolver::boundNode(std::vector<size_t>& node_ids, st
 }
 
 void B2BSolver::populate_matrix(PartialPlacement &p_placement) {
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-    const AtomNetlist& netlist = p_placement.atom_netlist;
     // Resetting As bs
     A_sparse_x = Eigen::SparseMatrix<double>(A_sparse_x.rows(), A_sparse_x.cols());
     A_sparse_y = Eigen::SparseMatrix<double>(A_sparse_y.rows(), A_sparse_y.cols());
     // A_sparse_x.setZero();
     // A_sparse_y.setZero();
+    size_t num_nets = p_placement.ap_netlist.size();
     std::vector<Eigen::Triplet<double>> tripletList_x;
-    tripletList_x.reserve(p_placement.num_moveable_nodes * netlist.nets().size());
+    tripletList_x.reserve(p_placement.num_moveable_nodes * num_nets);
     std::vector<Eigen::Triplet<double>> tripletList_y;
-    tripletList_y.reserve(p_placement.num_moveable_nodes * netlist.nets().size());
+    tripletList_y.reserve(p_placement.num_moveable_nodes * num_nets);
     b_x = Eigen::VectorXd::Zero(p_placement.num_moveable_nodes);
     b_y = Eigen::VectorXd::Zero(p_placement.num_moveable_nodes);
 
-    for (AtomNetId net_id : netlist.nets()) {
-        if (p_placement.net_is_ignored_for_placement(net_id))
-            continue;
-
-        int num_pins = netlist.net_pins(net_id).size();
+    for (const std::vector<size_t> &net : p_placement.ap_netlist) {
+        int num_pins = net.size();
         VTR_ASSERT(num_pins > 1 && "net least has at least 2 pins");
-        
-        std::vector<size_t> node_ids;
-        for (AtomPinId pin_id : netlist.net_pins(net_id)) {
-            AtomBlockId blk_id = netlist.pin_block(pin_id);
-            size_t node_id = p_placement.get_node_id_from_blk(blk_id, atom_ctx.atom_molecules);
-            node_ids.push_back(node_id);
-        }
-        // remove duplicated node, they are there becaues of prepacked molecules.
-        // FIXME: duplicate exists because atoms are packed in to molecules so some edges are now hidden.
-        // We can create our own netlist class to resolve this problem.
-        std::set<size_t> node_ids_set(node_ids.begin(), node_ids.end());
-        std::vector<size_t> node_ids_no_duplicate(node_ids_set.begin(), node_ids_set.end());
-        
-        if (node_ids_no_duplicate.size() <= 1){
-            continue;
-        }
-        // TODO: do this in a for loop instead of creating vectors
-        auto [maxXId, minXId] = boundNode(node_ids_no_duplicate, p_placement.node_loc_x);
-        auto [maxYId, minYId] = boundNode(node_ids_no_duplicate, p_placement.node_loc_y);
+
+        // TODO: do this in a single for loop. Will likely be more efficient than
+        //       iterating 4 times.
+        auto [maxXId, minXId] = boundNode(net, p_placement.node_loc_x);
+        auto [maxYId, minYId] = boundNode(net, p_placement.node_loc_y);
         // assign arbitrary node as bound node when they are all equal
         // TODO: although deterministic, investigate other ways to break ties.
         if (maxXId == minXId) {
-            maxXId = node_ids_no_duplicate[0];
-            minXId = node_ids_no_duplicate[1];
+            maxXId = net[0];
+            minXId = net[1];
         }
         if (maxYId == minYId) {
-            maxYId = node_ids_no_duplicate[0];
-            minYId = node_ids_no_duplicate[1];
+            maxYId = net[0];
+            minYId = net[1];
         }
         auto add_node = [&](size_t first_node_id, size_t second_node_id, unsigned num_nodes, bool is_x){
             if (!p_placement.is_moveable_node(first_node_id)) {
@@ -366,20 +327,19 @@ void B2BSolver::populate_matrix(PartialPlacement &p_placement) {
                 }
             }
         };
-        // TODO: when adding custom netlist, also modify here.
-        size_t num_nodes = node_ids_no_duplicate.size();
-        for (size_t node_id = 0; node_id < num_nodes; node_id++) {
+
+        for (size_t node_id : net) {
             if (node_id != maxXId && node_id != minXId) {
-                add_node(node_id, maxXId, num_nodes, true);
-                add_node(node_id, minXId, num_nodes, true);
+                add_node(node_id, maxXId, num_pins, true);
+                add_node(node_id, minXId, num_pins, true);
             } 
             if (node_id != maxYId && node_id != minYId) {
-                add_node(node_id, maxYId, num_nodes, false);
-                add_node(node_id, minYId, num_nodes, false);
+                add_node(node_id, maxYId, num_pins, false);
+                add_node(node_id, minYId, num_pins, false);
             } 
         }
-        add_node(maxXId, minXId, num_nodes, true);
-        add_node(maxYId, minYId, num_nodes, false);
+        add_node(maxXId, minXId, num_pins, true);
+        add_node(maxYId, minYId, num_pins, false);
     }
     A_sparse_x.setFromTriplets(tripletList_x.begin(), tripletList_x.end());
     A_sparse_y.setFromTriplets(tripletList_y.begin(), tripletList_y.end());
@@ -388,8 +348,11 @@ void B2BSolver::populate_matrix(PartialPlacement &p_placement) {
 // This function adds anchors for legalized solution. Anchors are treated as fixed node,
 // each connecting to a movable node. Number of nodes in a anchor net is always 2.
 void B2BSolver::populate_matrix_anchor(PartialPlacement& p_placement, unsigned iteration) {
-    double coeff_pseudo_anchor = 0.001 * std::exp((double)iteration/29.0);
+    // double coeff_pseudo_anchor = 0.001 * std::exp((double)iteration/29.0);
     // double coeff_pseudo_anchor = std::exp((double)iteration/1.0);
+
+    // Using alpha from the SimPL paper
+    double coeff_pseudo_anchor = 0.01 * (1.0 + static_cast<double>(iteration));
     for (size_t i = 0; i < p_placement.num_moveable_nodes; i++){
         // Anchor node are always 2 pins.
         double pseudo_w_x = coeff_pseudo_anchor*2.0/std::max(std::abs(p_placement.node_loc_x[i] - node_loc_x_legalized[i]), epsilon);
@@ -479,4 +442,4 @@ void B2BSolver::solve(unsigned iteration, PartialPlacement &p_placement) {
     // store solved position in data structure of this class
     // node_loc_x_solved = p_placement.node_loc_x;
     // node_loc_y_solved = p_placement.node_loc_y;
-}
+}
diff --git a/vpr/src/place/analytical_placement/AnalyticalSolver.h b/vpr/src/place/analytical_placement/AnalyticalSolver.h
@@ -45,7 +45,7 @@ class B2BSolver : public AnalyticalSolver {
         void initialize_placement_least_dense(PartialPlacement &p_placement);
         void populate_matrix(PartialPlacement &p_placement);
         void populate_matrix_anchor(PartialPlacement& p_placement, unsigned iteration);
-        std::pair<size_t, size_t> boundNode(std::vector<size_t> &node_id, std::vector<double> &node_loc);
+        std::pair<size_t, size_t> boundNode(const std::vector<size_t> &node_id, const std::vector<double> &node_loc);
         
         static inline const double epsilon = 1e-6;
         static inline const unsigned inner_iterations = 30;
@@ -63,4 +63,4 @@ class B2BSolver : public AnalyticalSolver {
         std::vector<double> node_loc_y_solved;
         std::vector<double> node_loc_x_legalized;
         std::vector<double> node_loc_y_legalized;
-};
+};
diff --git a/vpr/src/place/analytical_placement/PartialPlacement.cpp b/vpr/src/place/analytical_placement/PartialPlacement.cpp
@@ -20,12 +20,16 @@ PartialPlacement::PartialPlacement(const AtomNetlist& netlist,
 
     const AtomContext& atom_ctx = g_vpr_ctx.atom();
 
+    std::vector<std::unordered_set<t_pack_molecule*>> interesting_nets;
+    interesting_nets.reserve(atom_netlist.nets().size());
+
     // Collect the unique moveable and fixed molecules from the netlist.
     std::unordered_set<t_pack_molecule*> moveable_mols;
     std::unordered_set<t_pack_molecule*> fixed_mols;
     for (const AtomNetId& net_id : atom_netlist.nets()) {
         if (net_is_ignored_for_placement(net_id))
             continue;
+        std::unordered_set<t_pack_molecule*> net_mols;
         for (const AtomPinId& pin_id : atom_netlist.net_pins(net_id)) {
             AtomBlockId blk_id = atom_netlist.pin_block(pin_id);
             // Get the molecule for this block.
@@ -35,7 +39,9 @@ PartialPlacement::PartialPlacement(const AtomNetlist& netlist,
                 fixed_mols.insert(mol);
             else
                 moveable_mols.insert(mol);
+            net_mols.insert(mol);
         }
+        interesting_nets.push_back(std::move(net_mols));
     }
 
     // Ensure that no fixed molecules are moveable (safety check)
@@ -74,6 +80,35 @@ PartialPlacement::PartialPlacement(const AtomNetlist& netlist,
         node_loc_y[fixed_node_id] = fixed_blocks_y[fixed_blk_id];
     }
 
+    // Create the AP Netlist
+    // FIXME: We should experiment with having duplicate pins! It is possible
+    //        that a block would have multiple pin inputs connected to the same
+    //        net. This should give that block more power...
+    for (const std::unordered_set<t_pack_molecule*> &mols : interesting_nets) {
+        // If the number of molecules in a net is 1 or less, we do not care.
+        // This can happen when a LUT + FF are packed together and now the
+        // net connects to itself.
+        if (mols.size() <= 1)
+            continue;
+        // If the molecules connected by a net are all fixed, then we do not care
+        // about this net.
+        bool is_all_fixed = true;
+        for (t_pack_molecule *mol : mols) {
+            if (moveable_mols.find(mol) != moveable_mols.end()) {
+                is_all_fixed = false;
+                break;
+            }
+        }
+        if (is_all_fixed)
+            continue;
+        // Insert these nodes into the AP Netlist.
+        std::vector<size_t> net_nodes;
+        net_nodes.reserve(mols.size());
+        for (t_pack_molecule *mol : mols) {
+            net_nodes.push_back(mol_to_node_id[mol]);
+        }
+        ap_netlist.emplace_back(std::move(net_nodes));
+    }
 }
 
 double PartialPlacement::get_HPWL() {
@@ -83,6 +118,11 @@ double PartialPlacement::get_HPWL() {
         // FIXME: Confirm if this should be here.
         if (net_is_ignored_for_placement(atom_netlist, net_id))
             continue;
+        // FIXME: This is not necessarily correct for solving HPWL. The x and y
+        //        positions should be clamped to the grid coordinates.
+        //        However, this may make it harder to debug since we lose the
+        //        granualirty of the HPWL. Perhaps create an option to get a
+        //        clamped / unclamped version.
         double min_x = std::numeric_limits<double>::max();
         double max_x = std::numeric_limits<double>::lowest();
         double min_y = std::numeric_limits<double>::max();
@@ -226,4 +266,4 @@ void PartialPlacement::unicode_art(){
     }
     VTR_LOG("unicode_art end\n");
     fflush(stderr);
-}
+}
diff --git a/vpr/src/place/analytical_placement/PartialPlacement.h b/vpr/src/place/analytical_placement/PartialPlacement.h
@@ -64,9 +64,21 @@ class PartialPlacement {
         VTR_LOG("Number of moveable nodes: %zu\n", num_moveable_nodes);
         VTR_LOG("Number of fixed nodes: %zu\n", num_nodes - num_moveable_nodes);
         VTR_LOG("Number of total nodes: %zu\n", num_nodes);
+        VTR_LOG("Number of AP nets: %zu\n", ap_netlist.size());
     }
 
     const AtomNetlist& atom_netlist;
+    // Analytical Placement-specific Netlist
+    // This is a netlist containing the nets that AP cares about, using the node
+    // type that AP uses. It also removes duplicate nodes from the net. This
+    // will not include all nets.
+    // Nets which are ignored:
+    //  - nets "ignored for placement", see net_is_ignored_for_placement
+    //  - nets that only connect to 1 (or less) nodes
+    //  - nets that do not contain any moveable nodes
+    // TODO: Eventually we should use the actual Netlist class to contain this
+    // information. vector of vectors is inefficient and may be dangerous.
+    std::vector<std::vector<size_t>> ap_netlist;
     std::map<t_pack_molecule*, size_t> mol_to_node_id;
     std::vector<t_pack_molecule*> node_id_to_mol;
     std::vector<double> node_loc_x;
diff --git a/vpr/src/place/analytical_placement/analytical_placement_flow.cpp b/vpr/src/place/analytical_placement/analytical_placement_flow.cpp
@@ -80,25 +80,25 @@ void run_analytical_placement_flow() {
 
     // Set up the partial placement object
     PartialPlacement p_placement = PartialPlacement(atom_netlist, fixed_blocks, fixed_blocks_x, fixed_blocks_y);
+    p_placement.print_stats();
     // Solve the QP problem
     std::unique_ptr<AnalyticalSolver> solver = make_analytical_solver(e_analytical_solver::B2B);
     // This for loop always starts at iteration 0
-    for (unsigned iteration = 0; iteration < 300; iteration++) {
+    for (unsigned iteration = 0; iteration < 100; iteration++) {
         VTR_LOG("iteration: %ld\n", iteration);
         solver->solve(iteration, p_placement);
         VTR_ASSERT(p_placement.is_valid_partial_placement() && "placement not valid after solve!");
-        p_placement.print_stats();
         double post_solve_hpwl = p_placement.get_HPWL();
         VTR_LOG("HPWL: %f\n", post_solve_hpwl);
         // Partial legalization using cut spreading algorithm
         FlowBasedLegalizer().legalize(p_placement);
         VTR_ASSERT(p_placement.is_valid_partial_placement() && "placement not valid after legalize!");
         double post_legalize_hpwl = p_placement.get_HPWL();
         VTR_LOG("Post-Legalized HPWL: %f\n", post_legalize_hpwl);
-        // if(std::abs(post_solve_hpwl - post_legalize_hpwl) < 20){
-        //     VTR_LOG("ended because of convergence\n");
-        //     break;
-        // }
+        if(std::abs(post_solve_hpwl - post_legalize_hpwl) < 20){
+            VTR_LOG("ended because of convergence\n");
+            break;
+        }
         // p_placement.unicode_art();
     }
     FullLegalizer().legalize(p_placement);