verilog-to-routing · vaughnbetz · Nov 11, 2024 · Nov 4, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/libs/libvtrutil/src/vtr_ndoffsetmatrix.h b/libs/libvtrutil/src/vtr_ndoffsetmatrix.h
@@ -2,6 +2,7 @@
 #define VTR_ND_OFFSET_MATRIX_H
 #include <array>
 #include <memory>
+#include <algorithm>
 
 #include "vtr_assert.h"
 
@@ -309,9 +310,8 @@ class NdOffsetMatrixBase {
 
     ///@brief Swap two NdOffsetMatrixBase objects
     friend void swap(NdOffsetMatrixBase<T, N>& m1, NdOffsetMatrixBase<T, N>& m2) {
-        using std::swap;
-        swap(m1.dim_ranges_, m2.dim_ranges_);
-        swap(m1.data_, m2.data_);
+        std::swap(m1.dim_ranges_, m2.dim_ranges_);
+        std::swap(m1.data_, m2.data_);
     }
 
   private:
@@ -441,7 +441,9 @@ class NdOffsetMatrix<T, 1> : public NdOffsetMatrixBase<T, 1> {
         VTR_ASSERT_SAFE_MSG(index >= this->dim_ranges_[0].begin_index(), "Index out of range (below dimension minimum)");
         VTR_ASSERT_SAFE_MSG(index < this->dim_ranges_[0].end_index(), "Index out of range (above dimension maximum)");
 
-        return this->data_[index];
+        int effective_index = index - this->dim_ranges_[0].begin_index();
+
+        return this->data_[effective_index];
     }
 
     ///@brief Access an element (mutable)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
@@ -153,81 +153,36 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
 }
 
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_cost_exp) {
-    auto& device_ctx = g_vpr_ctx.device();
+    const auto& device_ctx = g_vpr_ctx.device();
 
     const int grid_height = device_ctx.grid.height();
     const int grid_width = device_ctx.grid.width();
 
     /* Access arrays below as chan?_place_cost_fac_(subhigh, sublow). Since subhigh must be greater than or
      * equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway
      * for simplicity, so we can use the vtr utility matrix functions. */
-    chanx_place_cost_fac_ = vtr::NdOffsetMatrix<float, 2>({{{-1, grid_height}, {-1, grid_height}}});
-    chany_place_cost_fac_ = vtr::NdOffsetMatrix<float, 2>({{{-1, grid_width}, {-1, grid_width}}});
+    acc_chanx_width_ = vtr::NdOffsetMatrix<int, 1>({{{-2, grid_height}}});
+    acc_chany_width_ = vtr::NdOffsetMatrix<int, 1>({{{-2, grid_width}}});
 
     // First compute the number of tracks between channel high and channel low, inclusive.
-    chanx_place_cost_fac_[-1][-1] = 0;
-
-    for (int high = 0; high < grid_height; high++) {
-        chanx_place_cost_fac_[high][high] = (float)device_ctx.chan_width.x_list[high];
-        for (int low = -1; low < high; low++) {
-            chanx_place_cost_fac_[high][low] = chanx_place_cost_fac_[high - 1][low] + (float)device_ctx.chan_width.x_list[high];
+    acc_chanx_width_[-2] = 0;
+    acc_chanx_width_[-1] = 1;
+    for (int y = 0; y < grid_height; y++) {
+        acc_chanx_width_[y] = acc_chanx_width_[y - 1] + device_ctx.chan_width.x_list[y];
+        if (acc_chanx_width_[y] == acc_chanx_width_[y - 1]) {
+            acc_chanx_width_[y]++;
         }
     }
 
-    /* Now compute the inverse of the average number of tracks per channel *
-     * between high and low. The cost function divides by the average      *
-     * number of tracks per channel, so by storing the inverse I convert   *
-     * this to a faster multiplication.  Take this final number to the     *
-     * place_cost_exp power -- numbers other than one mean this is no      *
-     * longer a simple "average number of tracks"; it is some power of     *
-     * that, allowing greater penalization of narrow channels.             */
-    for (int high = -1; high < grid_height; high++) {
-        for (int low = -1; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chanx_place_cost_fac_[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANX place cost fac is 0 at %d %d\n", high, low);
-                chanx_place_cost_fac_[high][low] = 1.0f;
-            }
-
-            chanx_place_cost_fac_[high][low] = (high - low + 1.) / chanx_place_cost_fac_[high][low];
-            chanx_place_cost_fac_[high][low] = pow((double)chanx_place_cost_fac_[high][low], (double)place_cost_exp);
+    acc_chany_width_[-2] = 0;
+    acc_chany_width_[-1] = 1;
+    for (int x = 0; x < grid_width; x++) {
+        acc_chany_width_[x] = acc_chany_width_[x - 1] + device_ctx.chan_width.y_list[x];
+        if (acc_chany_width_[x] == acc_chany_width_[x - 1]) {
+            acc_chany_width_[x]++;
         }
     }
 
-    /* Now do the same thing for the y-directed channels.  First get the
-     * number of tracks between channel high and channel low, inclusive. */
-    chany_place_cost_fac_[-1][-1] = 0;
-
-    for (int high = 0; high < grid_width; high++) {
-        chany_place_cost_fac_[high][high] = device_ctx.chan_width.y_list[high];
-        for (int low = -1; low < high; low++) {
-            chany_place_cost_fac_[high][low] = chany_place_cost_fac_[high - 1][low] + device_ctx.chan_width.y_list[high];
-        }
-    }
-
-    /* Now compute the inverse of the average number of tracks per channel
-     * between high and low.  Take to specified power. */
-    for (int high = -1; high < grid_width; high++) {
-        for (int low = -1; low <= high; low++) {
-            /* Since we will divide the wiring cost by the average channel *
-             * capacity between high and low, having only 0 width channels *
-             * will result in infinite wiring capacity normalization       *
-             * factor, and extremely bad placer behaviour. Hence we change *
-             * this to a small (1 track) channel capacity instead.         */
-            if (chany_place_cost_fac_[high][low] == 0.0f) {
-                VTR_LOG_WARN("CHANY place cost fac is 0 at %d %d\n", high, low);
-                chany_place_cost_fac_[high][low] = 1.0f;
-            }
-
-            chany_place_cost_fac_[high][low] = (high - low + 1.) / chany_place_cost_fac_[high][low];
-            chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp);
-        }
-    }
-
     if (device_ctx.grid.get_num_layers() > 1) {
         alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp);
     }
@@ -1475,15 +1430,17 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
      */
 
     double ncost;
-    ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1];
-    ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1];
+    const auto [chanx_cost_fac, chany_cost_fac] = get_chan_place_fac_(bb);
+    ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_cost_fac;
+    ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_cost_fac;
     if (is_multi_layer) {
         ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin];
     }
 
     return ncost;
 }
 
+
 double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use_ts) {
     const auto& move_ctx = placer_state_.move();
 
@@ -1520,11 +1477,10 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use
          * chan?_place_cost_fac_ objects can handle -1 indices internally.
          */
 
-        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing
-                 * chanx_place_cost_fac_[bb[layer_num].ymax][bb[layer_num].ymin - 1];
+        const auto[chanx_cost_fac, chany_cost_fac] = get_chan_place_fac_(bb[layer_num]);
+        ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * crossing * chanx_cost_fac;
 
-        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing
-                 * chany_place_cost_fac_[bb[layer_num].xmax][bb[layer_num].xmin - 1];
+        ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * crossing * chany_cost_fac;
     }
 
     return ncost;

diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
@@ -193,8 +193,9 @@ class NetCostHandler {
      * number of tracks in that direction; for other cost functions they
      * will never be used.
      */
-    vtr::NdOffsetMatrix<float, 2> chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1]
-    vtr::NdOffsetMatrix<float, 2> chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1]
+    vtr::NdOffsetMatrix<int, 1> acc_chanx_width_; // [-1...device_ctx.grid.width()-1]
+    vtr::NdOffsetMatrix<int, 1> acc_chany_width_; // [-1...device_ctx.grid.height()-1]
+
     /**
       @brief This data structure functions similarly to the matrices described above 
       but is applied to 3D connections linking different FPGA layers. It is used in the 
@@ -511,4 +512,17 @@ class NetCostHandler {
      */
     double get_net_wirelength_from_layer_bb_(ClusterNetId net_id);
 
+    template<typename BBT>
+    std::pair<double, double> get_chan_place_fac_(const BBT& bb) {
+        const int total_chanx_width = acc_chanx_width_[bb.ymax] - acc_chanx_width_[bb.ymin - 2];
+        const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 2.0) / total_chanx_width;
+        const double inverse_average_chanx_width_sharpened = std::pow(inverse_average_chanx_width, (double)placer_opts_.place_cost_exp);
+
+        const int total_chany_width = acc_chany_width_[bb.xmax] - acc_chany_width_[bb.xmin - 2];
+        const double inverse_average_chany_width = (bb.xmax - bb.xmin + 2.0) / total_chany_width;
+        const double inverse_average_chany_width_sharpened = std::pow(inverse_average_chany_width, (double)placer_opts_.place_cost_exp);
+
+        return {inverse_average_chanx_width_sharpened, inverse_average_chany_width_sharpened};
+    }
+
 };