diff --git a/libs/libvtrutil/src/vtr_prefix_sum.h b/libs/libvtrutil/src/vtr_prefix_sum.h
new file mode 100644
index 00000000000..030cce9d3d3
--- /dev/null
+++ b/libs/libvtrutil/src/vtr_prefix_sum.h
@@ -0,0 +1,283 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    February 2025
+ * @brief   Definition of the Prefix Sum class which enables O(1) time-complexity
+ *          sums over regions of an unchanging grid of values.
+ */
+
+#pragma once
+
+#include <functional>
+#include <vector>
+#include "vtr_assert.h"
+#include "vtr_ndmatrix.h"
+
+namespace vtr {
+
+/**
+ * @brief 1D Prefix Sum manager class.
+ *
+ * Given an array of values, it may be necessary to find the sum of values
+ * within a continuous sub-section of the array. If this operation needs to be
+ * performed many times, this may be expensive in runtime to calculate.
+ *
+ * If the array of values does not change, we can create a prefix sum which will
+ * allow us to get the sum of values in some continuous sub-section of the array
+ * in O(1) time, instead of O(k) time where k is the number of values in the
+ * sub-section.
+ *
+ * This class has a space complexity of O(l) where l is the length of the array
+ * of values.
+ *
+ *
+ * Static Array of Values Example (values stored in a vector):
+ *
+ * std::vector<float> vals = {...};
+ *
+ * // Build the Prefix Sum
+ * vtr::PrefixSum1D<float> prefix_sum(vals);
+ *
+ * // Compute the sum of the values between index 3 and 7 of the array (inclusive)
+ * float sum = prefix_sum.get_sum(3, 7);
+ *
+ *
+ * Dynamic Vector of Values Example (values derived at runtime):
+ *
+ * // Build the Prefix Sum using a lambda
+ * vtr::PrefixSum1D<float> prefix_sum(length, [&](size_t x) {
+ *      // This lambda returns the value that would be in the array at index x.
+ *      return static_cast<float>(x * x);
+ * });
+ *
+ * // Compute the sum of the values between index 0 and 5 of the array (inclusive)
+ * float sum = prefix_sum.get_sum(0, 5);
+ */
+template<typename T>
+class PrefixSum1D {
+public:
+    PrefixSum1D() = default;
+
+    /**
+     * @brief Construct the 1D prefix sum.
+     *
+     * This pre-computes the sums of values in the array, making it faster to
+     * get the sum of sub-regions of the array later.
+     *
+     * This constructor has a time complexity of O(length)
+     *
+     *  @param length
+     *          The length of the array to a make a prefix sum of.
+     *  @param lookup
+     *          A lambda function which will return the value in the array at
+     *          the given x index. This is a lambda to allow a prefix sum to be
+     *          created, even if the values in the array are not stored in a
+     *          vector (may be computed on the spot).
+     *  @param zero
+     *          What is zero for this data type. For most basic data types (like
+     *          int float, etc.) this parameter can be ignored; for more complex
+     *          data classes (like multi-dimensional vectors) this is necessary
+     *          to be passed in.
+     */
+    PrefixSum1D(size_t length, std::function<T(size_t)> lookup, T zero = T())
+            : prefix_sum_(length + 1, zero) {
+        // The first value in the prefix sum is already initialized to 0.
+
+        // Initialize the prefix sum. The prefix sum at position x is the sum
+        // of all values in the original array from 0 to x - 1.
+        for (size_t x = 1; x < length + 1; x++) {
+            prefix_sum_[x] = prefix_sum_[x - 1] + lookup(x - 1);
+        }
+    }
+
+    /**
+     * @brief Construct the 1D prefix sum from a vector.
+     */
+    PrefixSum1D(std::vector<T> vals, T zero = T())
+            : PrefixSum1D(vals.size(),
+                          [&](size_t x) noexcept {
+                            return vals[x];
+                          },
+                          zero) {}
+
+    /**
+     * @brief Get the sum of all values in the original array of values between
+     *        lower_x and upper_x (inclusive).
+     *
+     * Inclusive means that the sum will include the values at lower_x and
+     * upper_x.
+     *
+     * This method has O(1) time complexity.
+     */
+    T get_sum(size_t lower_x, size_t upper_x) const {
+        // Some safety asserts.
+        VTR_ASSERT_SAFE_MSG(lower_x <= upper_x, "lower_x is larger than upper_x");
+        VTR_ASSERT_SAFE_MSG(lower_x < prefix_sum_.size() - 1, "lower_x out of range");
+        VTR_ASSERT_SAFE_MSG(upper_x < prefix_sum_.size() - 1, "upper_x out of range");
+
+        // The sum of the region lower_x to upper_x inclusive is equal to
+        //      - The sum from 0 to upper_x
+        //      - Minus the sum from 0 to lower_x - 1
+        // Note: These are all offset by 1 since the first value is zero. This
+        //       saves us from having to do bound checking.
+        return prefix_sum_[upper_x + 1] - prefix_sum_[lower_x];
+    }
+
+private:
+    /**
+     * @brief The 1D prefix sum of the original array of values.
+     *
+     * Index x of the prefix sum contains the sum of all values in the original
+     * array from 0 to x - 1. The first value in this array is 0. By setting the
+     * first value in the array to 0, we can avoid bound checking. This data
+     * structure has the special property that the sum of any sub-array can be
+     * computed in O(1) time.
+     */
+    std::vector<T> prefix_sum_;
+};
+
+/**
+ * @brief 2D Prefix Sum manager class.
+ *
+ * Given a 2D grid of values, it may be necessary to find the sum of values
+ * within some rectangular sub-region of that grid. If this operation needs to
+ * be performed many times, this may be expensive in runtime to calculate.
+ *
+ * If the grid of values does not change, we can create a prefix sum which will
+ * allow us to get the sum of values in some rectangular sub-region of the
+ * grid in O(1) time, instead of O(k) time where k is the number of values
+ * in the region.
+ *
+ * This class has a space complexity of O(w * h) where w and h are the width
+ * and height of the grid of values.
+ *
+ *
+ * Static Matrix of Values Example (values stored in a matrix):
+ *
+ * vtr::NdMatrix<float, 2> vals({w, h});
+ *
+ * // ... Initialize vals
+ *
+ * // Build the Prefix Sum
+ * vtr::PrefixSum2D<float> prefix_sum(vals);
+ *
+ * // Compute the sum of the rectangular region from (1, 2) to (3, 4) inclusive.
+ * float sum = prefix_sum.get_sum(1, 2, 3, 4);
+ *
+ *
+ * Dynamic Matrix of Values Example (values derived at runtime):
+ *
+ * // Build the Prefix Sum using a lambda
+ * vtr::PrefixSum2D<float> prefix_sum(w, h, [&](size_t x, size_t y) {
+ *      // This lambda returns the value that would be in the matrix at (x, y)
+ *      return (x + y) / 2.f;
+ * });
+ *
+ * // Compute the sum of the rectangular region from (0, 4) to (3, 5) inclusive.
+ * float sum = prefix_sum.get_sum(0, 4, 3, 5);
+ */
+template<typename T>
+class PrefixSum2D {
+public:
+    PrefixSum2D() = default;
+
+    /**
+     * @brief Construct the 2D prefix sum.
+     *
+     * This pre-computes the sums of values in the grid, making it faster to
+     * get the sum of sub-regions of the grid later.
+     *
+     * This constructor has a time complexity of O(w * h).
+     *
+     *  @param w
+     *          The width of the grid of values to make a prefix sum over.
+     *  @param h
+     *          The height of the grid of values to make a prefix sum over.
+     *  @param lookup
+     *          A lambda function which will return the value in the grid at the
+     *          given x, y position. This is a lambda to allow a prefix sum to
+     *          be created, even if the values in the grid are not stored in
+     *          a matrix (may be computed at runtime).
+     *  @param zero
+     *          What is zero for this data type. For most basic data types (like
+     *          int, float, etc.) this parameter can be ignored; for more complex
+     *          data classes (like multi-dimensional vectors) this is necessary
+     *          to be passed in.
+     */
+    PrefixSum2D(size_t w, size_t h, std::function<T(size_t, size_t)> lookup, T zero = T())
+            : prefix_sum_({w + 1, h + 1}, zero) {
+        // The first row and first column should already be initialized to zero.
+
+        // Initialize the prefix sum. The prefix sum at position (x, y) is the
+        // sum of all values in the original matrix in the rectangle from (0, 0)
+        // to (x - 1, y - 1) inclusive.
+        for (size_t x = 1; x < w + 1; x++) {
+            for (size_t y = 1; y < h + 1; y++) {
+                prefix_sum_[x][y] = prefix_sum_[x - 1][y] +
+                                    prefix_sum_[x][y - 1] +
+                                    lookup(x - 1, y - 1) -
+                                    prefix_sum_[x - 1][y - 1];
+            }
+        }
+    } 
+
+    /**
+     * @brief Constructs a 2D prefix sum from a 2D grid of values.
+     */
+    PrefixSum2D(const vtr::NdMatrix<T, 2>& vals, T zero = T())
+            : PrefixSum2D(vals.dim_size(0),
+                          vals.dim_size(1),
+                          [&](size_t x, size_t y) {
+                            return vals[x][y];
+                          },
+                          zero) {}
+
+    /**
+     * @brief Get the sum of all values in the original grid of values between
+     *        x = [lower_x, upper_x] and y = [lower_y, upper_y].
+     *
+     * This sum is inclusive, so it also sums the values at (upper_x, upper_y).
+     *
+     * This method has O(1) time complexity.
+     */
+    T get_sum(size_t lower_x, size_t lower_y, size_t upper_x, size_t upper_y) const {
+        // Some safety asserts.
+        VTR_ASSERT_SAFE_MSG(lower_x <= upper_x, "lower_x is larger than upper_x");
+        VTR_ASSERT_SAFE_MSG(lower_y <= upper_y, "lower_y is larger than upper_y");
+        VTR_ASSERT_SAFE_MSG(lower_x < prefix_sum_.dim_size(0) - 1, "lower_x out of range");
+        VTR_ASSERT_SAFE_MSG(upper_x < prefix_sum_.dim_size(0) - 1, "upper_x out of range");
+        VTR_ASSERT_SAFE_MSG(lower_y < prefix_sum_.dim_size(1) - 1, "lower_y out of range");
+        VTR_ASSERT_SAFE_MSG(upper_y < prefix_sum_.dim_size(1) - 1, "upper_y out of range");
+
+        // The sum of the region (lower_x, lower_y) to (upper_x, upper_y)
+        // inclusive is equal to:
+        //      - The sum of the region (0, 0) to (upper_x, upper_y)
+        //      - Minus the sum of the region (0, 0) to (lower_x - 1, upper_y)
+        //          - Remove the part below the region
+        //      - Minus the sum of the region (0, 0) to (upper_x, lower_y - 1)
+        //          - Remove the part left of the region
+        //      - Plus the sum of the region (0, 0) to (lower_x - 1, lower_y - 1)
+        //          - Add back on the lower-left corner which was subtracted twice.
+        // Note: all of these are offset by 1 since the first row and column
+        //       are all zeros. This allows us to avoid bounds checking when
+        //       lower_x or lower_y are 0.
+        return prefix_sum_[upper_x + 1][upper_y + 1] - prefix_sum_[lower_x][upper_y + 1]
+                                                     - prefix_sum_[upper_x + 1][lower_y]
+                                                     + prefix_sum_[lower_x][lower_y];
+    }
+
+private:
+    /**
+     * @brief The 2D prefix sum of the original grid of values.
+     *
+     * Position (x, y) of the prefix sum contains the sum of all values in the
+     * rectangle (0, 0) -> (x - 1, y - 1) inclusive. The first row and column
+     * are all zeros. By setting these to zero, we can avoid bound checking.
+     * This data structure has the special property that the sum of any
+     * rectangular region can be computed in O(1) time.
+     */
+    vtr::NdMatrix<T, 2> prefix_sum_;
+};
+
+} // namespace vtr
+
diff --git a/libs/libvtrutil/test/test_prefix_sum.cpp b/libs/libvtrutil/test/test_prefix_sum.cpp
new file mode 100644
index 00000000000..6babecf9167
--- /dev/null
+++ b/libs/libvtrutil/test/test_prefix_sum.cpp
@@ -0,0 +1,99 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    February 2025
+ * @brief   Test cases for the Prefix Sum class in vtr_util.
+ */
+
+#include "catch2/catch_test_macros.hpp"
+
+#include "vtr_ndmatrix.h"
+#include "vtr_prefix_sum.h"
+
+using namespace Catch;
+
+TEST_CASE("PrefixSum1D", "[vtr_prefix_sum/PrefixSum1D]") {
+    // Construct a 1D array to compute the prefix sum over.
+    std::vector<float> vals = {1.f, 7.f, 2.f, 2.f, 5.f, 6.f, 1.f, 9.f, 1.f, 3.f};
+
+    // Construct the Prefix Sum.
+    vtr::PrefixSum1D<float> prefix_sum(vals);
+
+    // Check that the sum of each length 1 region is the original value.
+    SECTION("construction") {
+        for (size_t x = 0; x < vals.size(); x++) {
+            float sum_val = prefix_sum.get_sum(x, x);
+            REQUIRE(sum_val == vals[x]);
+        }
+    }
+
+    float sum_of_all_vals = 0.f;
+    for (size_t x = 0; x < vals.size(); x++) {
+        sum_of_all_vals += vals[x];
+    }
+
+    // Check that get_sum is working on some testcases.
+    SECTION("get_sum") {
+        REQUIRE(prefix_sum.get_sum(0, vals.size() - 1) == sum_of_all_vals);
+        REQUIRE(prefix_sum.get_sum(0, 2) == 10.f);
+        REQUIRE(prefix_sum.get_sum(7, 9) == 13.f);
+        REQUIRE(prefix_sum.get_sum(2, 5) == 15.f);
+    }
+}
+
+TEST_CASE("PrefixSum2D", "[vtr_prefix_sum/PrefixSum2D]") {
+    // Construct a 2D grid to compute the prefix sum over.
+    vtr::NdMatrix<float, 2> vals({4, 4});
+    /*
+     * [ 1 3 9 2 ]
+     * [ 2 4 0 8 ]
+     * [ 3 7 1 3 ]
+     * [ 5 6 9 2 ]
+     */
+    vals[0][0] = 5.f;
+    vals[1][0] = 6.f;
+    vals[2][0] = 9.f;
+    vals[3][0] = 2.f;
+    vals[0][1] = 3.f;
+    vals[1][1] = 7.f;
+    vals[2][1] = 1.f;
+    vals[3][1] = 3.f;
+    vals[0][2] = 2.f;
+    vals[1][2] = 4.f;
+    vals[2][2] = 0.f;
+    vals[3][2] = 8.f;
+    vals[0][3] = 1.f;
+    vals[1][3] = 3.f;
+    vals[2][3] = 9.f;
+    vals[3][3] = 2.f;
+
+    // Construct the Prefix Sum.
+    vtr::PrefixSum2D<float> prefix_sum(vals);
+
+    // Check that the sum of each 1x1 region is the original value.
+    SECTION("construction") {
+        for (size_t x = 0; x < 4; x++) {
+            for (size_t y = 0; y < 4; y++) {
+                float sum_val = prefix_sum.get_sum(x, y, x, y);
+                REQUIRE(sum_val == vals[x][y]);
+            }
+        }
+    }
+
+    float sum_of_all_vals = 0;
+    for (size_t x = 0; x < 4; x++) {
+        for (size_t y = 0; y < 4; y++) {
+            sum_of_all_vals += vals[x][y];
+        }
+    }
+
+    // Check that get_sum is working on some testcases.
+    SECTION("get_sum") {
+        REQUIRE(prefix_sum.get_sum(0, 0, 3, 3) == sum_of_all_vals);
+        REQUIRE(prefix_sum.get_sum(1, 1, 2, 2) == 12.f);
+        REQUIRE(prefix_sum.get_sum(0, 0, 3, 0) == 22.f);
+        REQUIRE(prefix_sum.get_sum(0, 0, 0, 3) == 11.f);
+        REQUIRE(prefix_sum.get_sum(1, 2, 2, 3) == 16.f);
+    }
+}
+
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7e4d36cf54c..cebdcd1762e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -33,8 +33,8 @@
 #include "place_timing_update.h"
 #include "vtr_math.h"
 #include "vtr_ndmatrix.h"
-#include "vtr_ndoffsetmatrix.h"
 #include "PlacerCriticalities.h"
+#include "vtr_prefix_sum.h"
 
 #include <array>
 
@@ -154,8 +154,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
     const auto& device_ctx = g_vpr_ctx.device();
 
-    const int grid_height = (int)device_ctx.grid.height();
-    const int grid_width = (int)device_ctx.grid.width();
+    const size_t grid_height = device_ctx.grid.height();
+    const size_t grid_width = device_ctx.grid.width();
 
     /* These arrays contain accumulative channel width between channel zero and
      * the channel specified by the given index. The accumulated channel width
@@ -165,37 +165,28 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
      *      acc_chan?_width_[high] - acc_chan?_width_[low - 1]
      * This returns the total number of tracks between channels 'low' and 'high',
      * including tracks in these channels.
-     *
-     * Channel -1 doesn't exist, so we can say it has zero tracks. We need to be able
-     * to access these arrays with index -1 to handle cases where the lower channel is 0.
      */
-    acc_chanx_width_ = vtr::NdOffsetMatrix<int, 1>({{{-1, grid_height}}});
-    acc_chany_width_ = vtr::NdOffsetMatrix<int, 1>({{{-1, grid_width}}});
-
-    // initialize the first element (index -1) with zero
-    acc_chanx_width_[-1] = 0;
-    for (int y = 0; y < grid_height; y++) {
-        acc_chanx_width_[y] = acc_chanx_width_[y - 1] + device_ctx.chan_width.x_list[y];
+    acc_chanx_width_ = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept {
+        int chan_x_width = device_ctx.chan_width.x_list[y];
 
         /* If the number of tracks in a channel is zero, two consecutive elements take the same
          * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
          * potential issue, we assume that the channel width is at least 1.
          */
-        if (acc_chanx_width_[y] == acc_chanx_width_[y - 1]) {
-            acc_chanx_width_[y]++;
-        }
-    }
+        if (chan_x_width == 0)
+            return 1;
 
-    // initialize the first element (index -1) with zero
-    acc_chany_width_[-1] = 0;
-    for (int x = 0; x < grid_width; x++) {
-        acc_chany_width_[x] = acc_chany_width_[x - 1] + device_ctx.chan_width.y_list[x];
+        return chan_x_width;
+    });
+    acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
+        int chan_y_width = device_ctx.chan_width.y_list[x];
 
         // to avoid a division by zero
-        if (acc_chany_width_[x] == acc_chany_width_[x - 1]) {
-            acc_chany_width_[x]++;
-        }
-    }
+        if (chan_y_width == 0)
+            return 1;
+
+        return chan_y_width;
+    });
     
     if (is_multi_layer_) {
         alloc_and_load_for_fast_vertical_cost_update_();
@@ -209,8 +200,6 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
     const size_t grid_height = device_ctx.grid.height();
     const size_t grid_width = device_ctx.grid.width();
 
-    acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int, 2>({grid_width, grid_height}, 0);
-
     vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.);         
 
     /*
@@ -255,26 +244,11 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
     }
 
     // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y).
-    acc_tile_num_inter_die_conn_[0][0] = tile_num_inter_die_conn[0][0];
-    // Initialize the first row and column
-    for (size_t x = 1; x < device_ctx.grid.width(); x++) {
-        acc_tile_num_inter_die_conn_[x][0] = acc_tile_num_inter_die_conn_[x-1][0] +
-                                             tile_num_inter_die_conn[x][0];
-    }
-
-    for (size_t y = 1; y < device_ctx.grid.height(); y++) {
-        acc_tile_num_inter_die_conn_[0][y] = acc_tile_num_inter_die_conn_[0][y-1] +
-                                             tile_num_inter_die_conn[0][y];
-    }
-    
-    for (size_t x_high = 1; x_high < device_ctx.grid.width(); x_high++) {
-        for (size_t y_high = 1; y_high < device_ctx.grid.height(); y_high++) {
-            acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1][y_high] +
-                                                           acc_tile_num_inter_die_conn_[x_high][y_high-1] +
-                                                           tile_num_inter_die_conn[x_high][y_high] -
-                                                           acc_tile_num_inter_die_conn_[x_high-1][y_high-1];
-        }
-    }
+    acc_tile_num_inter_die_conn_ = vtr::PrefixSum2D<int>(grid_width,
+                                                         grid_height,
+                                                         [&](size_t x, size_t y) {
+                                                            return (int)tile_num_inter_die_conn[x][y];
+                                                         });
 }
 
 std::pair<double, double> NetCostHandler::comp_bb_cost(e_cost_methods method) {
@@ -1504,22 +1478,10 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) {
 }
 
 float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) {
-    int num_inter_dir_conn;
-
-    if (bb.xmin == 0 && bb.ymin == 0) {
-        num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax];
-    } else if (bb.xmin == 0) {
-        num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] -
-                             acc_tile_num_inter_die_conn_[bb.xmax][bb.ymin-1];
-    } else if (bb.ymin == 0) {
-        num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] -
-                             acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymax];
-    } else {
-        num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] -
-                             acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymax] -
-                             acc_tile_num_inter_die_conn_[bb.xmax][bb.ymin-1] +
-                             acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymin-1];
-    }
+    int num_inter_dir_conn = acc_tile_num_inter_die_conn_.get_sum(bb.xmin,
+                                                                  bb.ymin,
+                                                                  bb.xmax,
+                                                                  bb.ymax);
     
     float z_cost_factor;
     if (num_inter_dir_conn == 0) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 8049cd42c64..ffc04f6f3c5 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -9,7 +9,7 @@
 #include "place_delay_model.h"
 #include "move_transactions.h"
 #include "place_util.h"
-#include "vtr_ndoffsetmatrix.h"
+#include "vtr_prefix_sum.h"
 
 #include <functional>
 
@@ -197,8 +197,8 @@ class NetCostHandler {
      * number of tracks in that direction; for other cost functions they
      * will never be used.
      */
-    vtr::NdOffsetMatrix<int, 1> acc_chanx_width_; // [-1...device_ctx.grid.width()-1]
-    vtr::NdOffsetMatrix<int, 1> acc_chany_width_; // [-1...device_ctx.grid.height()-1]
+    vtr::PrefixSum1D<int> acc_chanx_width_; // [0..device_ctx.grid.width()-1]
+    vtr::PrefixSum1D<int> acc_chany_width_; // [0..device_ctx.grid.height()-1]
 
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
@@ -208,7 +208,7 @@ class NetCostHandler {
      * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) 
      * region in O(1) (by adding and subtracting 4 entries)
      */
-    vtr::NdMatrix<int, 2> acc_tile_num_inter_die_conn_; // [0..grid_width-1][0..grid_height-1]
+    vtr::PrefixSum2D<int> acc_tile_num_inter_die_conn_; // [0..grid_width-1][0..grid_height-1]
 
 
   private:
@@ -526,10 +526,10 @@ class NetCostHandler {
      */
     template<typename BBT>
     std::pair<double, double> get_chanxy_cost_fac_(const BBT& bb) {
-        const int total_chanx_width = acc_chanx_width_[bb.ymax] - acc_chanx_width_[bb.ymin - 1];
+        const int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax);
         const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width;
 
-        const int total_chany_width = acc_chany_width_[bb.xmax] - acc_chany_width_[bb.xmin - 1];
+        const int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax);
         const double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width;
 
         return {inverse_average_chanx_width, inverse_average_chany_width};