Skip to content

Commit 2fd0bdc

Browse files
[VTR][Util] Added Prefix Sum Class
A prefix sum is a classic data structure for quickly finding the sum of any sub-region of fixed N-Dimensional array of values. It pre-computes the partial sums of regions within an array and uses that information later to save time. This is currently used in the net_cost_handler to pre-compute number of inter-die connections. This class can be useful in other algorithms. For example, in a Partial Legalizer in an AP flow, it can allow for quick lookups into the capacity and utilization of regions on the FPGA. Moved the Prefix Sum class from the net_cost_handler into the VTR Util.
1 parent 5a22313 commit 2fd0bdc

File tree

4 files changed

+413
-69
lines changed

4 files changed

+413
-69
lines changed

libs/libvtrutil/src/vtr_prefix_sum.h

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date February 2025
5+
* @brief Definition of the Prefix Sum class which enables O(1) time-complexity
6+
* sums over regions of an unchanging grid of values.
7+
*/
8+
9+
#pragma once
10+
11+
#include <functional>
12+
#include <vector>
13+
#include "vtr_assert.h"
14+
#include "vtr_ndmatrix.h"
15+
16+
namespace vtr {
17+
18+
/**
19+
* @brief 1D Prefix Sum manager class.
20+
*
21+
* Given an array of values, it may be necessary to find the sum of values
22+
* within a continuous sub-section of the array. If this operation needs to be
23+
* performed many times, this may be expensive in runtime to calculate.
24+
*
25+
* If the array of values does not change, we can create a prefix sum which will
26+
* allow us to get the sum of values in some continuous sub-section of the array
27+
* in O(1) time, instead of O(k) time where k is the number of values in the
28+
* sub-section.
29+
*
30+
* This class has a space complexity of O(l) where l is the length of the array
31+
* of values.
32+
*
33+
*
34+
* Static Array of Values Example (values stored in a vector):
35+
*
36+
* std::vector<float> vals = {...};
37+
*
38+
* // Build the Prefix Sum
39+
* vtr::PrefixSum1D<float> prefix_sum(vals);
40+
*
41+
* // Compute the sum of the values between index 3 and 7 of the array (inclusive)
42+
* float sum = prefix_sum.get_sum(3, 7);
43+
*
44+
*
45+
* Dynamic Vector of Values Example (values derived at runtime):
46+
*
47+
* // Build the Prefix Sum using a lambda
48+
* vtr::PrefixSum1D<float> prefix_sum(length, [&](size_t x) {
49+
* // This lambda returns the value that would be in the array at index x.
50+
* return static_cast<float>(x * x);
51+
* });
52+
*
53+
* // Compute the sum of the values between index 0 and 5 of the array (inclusive)
54+
* float sum = prefix_sum.get_sum(0, 5);
55+
*/
56+
template<typename T>
57+
class PrefixSum1D {
58+
public:
59+
PrefixSum1D() = default;
60+
61+
/**
62+
* @brief Construct the 1D prefix sum.
63+
*
64+
* This pre-computes the sums of values in the array, making it faster to
65+
* get the sum of sub-regions of the array later.
66+
*
67+
* This constructor has a time complexity of O(length)
68+
*
69+
* @param length
70+
* The length of the array to a make a prefix sum of.
71+
* @param lookup
72+
* A lambda function which will return the value in the array at
73+
* the given x index. This is a lambda to allow a prefix sum to be
74+
* created, even if the values in the array are not stored in a
75+
* vector (may be computed on the spot).
76+
* @param zero
77+
* What is zero for this data type. For most basic data types (like
78+
* int float, etc.) this parameter can be ignored; for more complex
79+
* data classes (like multi-dimensional vectors) this is necessary
80+
* to be passed in.
81+
*/
82+
PrefixSum1D(size_t length, std::function<T(size_t)> lookup, T zero = T())
83+
: prefix_sum_(length + 1, zero) {
84+
// The first value in the prefix sum is already initialized to 0.
85+
86+
// Initialize the prefix sum. The prefix sum at position x is the sum
87+
// of all values in the original array from 0 to x - 1.
88+
for (size_t x = 1; x < length + 1; x++) {
89+
prefix_sum_[x] = prefix_sum_[x - 1] + lookup(x - 1);
90+
}
91+
}
92+
93+
/**
94+
* @brief Construct the 1D prefix sum from a vector.
95+
*/
96+
PrefixSum1D(std::vector<T> vals, T zero = T())
97+
: PrefixSum1D(vals.size(),
98+
[&](size_t x) noexcept {
99+
return vals[x];
100+
},
101+
zero) {}
102+
103+
/**
104+
* @brief Get the sum of all values in the original array of values between
105+
* lower_x and upper_x (inclusive).
106+
*
107+
* Inclusive means that the sum will include the values at lower_x and
108+
* upper_x.
109+
*
110+
* This method has O(1) time complexity.
111+
*/
112+
T get_sum(size_t lower_x, size_t upper_x) const {
113+
// Some safety asserts.
114+
VTR_ASSERT_SAFE_MSG(lower_x <= upper_x, "lower_x is larger than upper_x");
115+
VTR_ASSERT_SAFE_MSG(lower_x < prefix_sum_.size() - 1, "lower_x out of range");
116+
VTR_ASSERT_SAFE_MSG(upper_x < prefix_sum_.size() - 1, "upper_x out of range");
117+
118+
// The sum of the region lower_x to upper_x inclusive is equal to
119+
// - The sum from 0 to upper_x
120+
// - Minus the sum from 0 to lower_x - 1
121+
// Note: These are all offset by 1 since the first value is zero. This
122+
// saves us from having to do bound checking.
123+
return prefix_sum_[upper_x + 1] - prefix_sum_[lower_x];
124+
}
125+
126+
private:
127+
/**
128+
* @brief The 1D prefix sum of the original array of values.
129+
*
130+
* Index x of the prefix sum contains the sum of all values in the original
131+
* array from 0 to x - 1. The first value in this array is 0. By setting the
132+
* first value in the array to 0, we can avoid bound checking. This data
133+
* structure has the special property that the sum of any sub-array can be
134+
* computed in O(1) time.
135+
*/
136+
std::vector<T> prefix_sum_;
137+
};
138+
139+
/**
140+
* @brief 2D Prefix Sum manager class.
141+
*
142+
* Given a 2D grid of values, it may be necessary to find the sum of values
143+
* within some rectangular sub-region of that grid. If this operation needs to
144+
* be performed many times, this may be expensive in runtime to calculate.
145+
*
146+
* If the grid of values does not change, we can create a prefix sum which will
147+
* allow us to get the sum of values in some rectangular sub-region of the
148+
* grid in O(1) time, instead of O(k) time where k is the number of values
149+
* in the region.
150+
*
151+
* This class has a space complexity of O(w * h) where w and h are the width
152+
* and height of the grid of values.
153+
*
154+
*
155+
* Static Matrix of Values Example (values stored in a matrix):
156+
*
157+
* vtr::NdMatrix<float, 2> vals({w, h});
158+
*
159+
* // ... Initialize vals
160+
*
161+
* // Build the Prefix Sum
162+
* vtr::PrefixSum2D<float> prefix_sum(vals);
163+
*
164+
* // Compute the sum of the rectangular region from (1, 2) to (3, 4) inclusive.
165+
* float sum = prefix_sum.get_sum(1, 2, 3, 4);
166+
*
167+
*
168+
* Dynamic Matrix of Values Example (values derived at runtime):
169+
*
170+
* // Build the Prefix Sum using a lambda
171+
* vtr::PrefixSum2D<float> prefix_sum(w, h, [&](size_t x, size_t y) {
172+
* // This lambda returns the value that would be in the matrix at (x, y)
173+
* return (x + y) / 2.f;
174+
* });
175+
*
176+
* // Compute the sum of the rectangular region from (0, 4) to (3, 5) inclusive.
177+
* float sum = prefix_sum.get_sum(0, 4, 3, 5);
178+
*/
179+
template<typename T>
180+
class PrefixSum2D {
181+
public:
182+
PrefixSum2D() = default;
183+
184+
/**
185+
* @brief Construct the 2D prefix sum.
186+
*
187+
* This pre-computes the sums of values in the grid, making it faster to
188+
* get the sum of sub-regions of the grid later.
189+
*
190+
* This constructor has a time complexity of O(w * h).
191+
*
192+
* @param w
193+
* The width of the grid of values to make a prefix sum over.
194+
* @param h
195+
* The height of the grid of values to make a prefix sum over.
196+
* @param lookup
197+
* A lambda function which will return the value in the grid at the
198+
* given x, y position. This is a lambda to allow a prefix sum to
199+
* be created, even if the values in the grid are not stored in
200+
* a matrix (may be computed at runtime).
201+
* @param zero
202+
* What is zero for this data type. For most basic data types (like
203+
* int, float, etc.) this parameter can be ignored; for more complex
204+
* data classes (like multi-dimensional vectors) this is necessary
205+
* to be passed in.
206+
*/
207+
PrefixSum2D(size_t w, size_t h, std::function<T(size_t, size_t)> lookup, T zero = T())
208+
: prefix_sum_({w + 1, h + 1}, zero) {
209+
// The first row and first column should already be initialized to zero.
210+
211+
// Initialize the prefix sum. The prefix sum at position (x, y) is the
212+
// sum of all values in the original matrix in the rectangle from (0, 0)
213+
// to (x - 1, y - 1) inclusive.
214+
for (size_t x = 1; x < w + 1; x++) {
215+
for (size_t y = 1; y < h + 1; y++) {
216+
prefix_sum_[x][y] = prefix_sum_[x - 1][y] +
217+
prefix_sum_[x][y - 1] +
218+
lookup(x - 1, y - 1) -
219+
prefix_sum_[x - 1][y - 1];
220+
}
221+
}
222+
}
223+
224+
/**
225+
* @brief Constructs a 2D prefix sum from a 2D grid of values.
226+
*/
227+
PrefixSum2D(const vtr::NdMatrix<T, 2>& vals, T zero = T())
228+
: PrefixSum2D(vals.dim_size(0),
229+
vals.dim_size(1),
230+
[&](size_t x, size_t y) {
231+
return vals[x][y];
232+
},
233+
zero) {}
234+
235+
/**
236+
* @brief Get the sum of all values in the original grid of values between
237+
* x = [lower_x, upper_x] and y = [lower_y, upper_y].
238+
*
239+
* This sum is inclusive, so it also sums the values at (upper_x, upper_y).
240+
*
241+
* This method has O(1) time complexity.
242+
*/
243+
T get_sum(size_t lower_x, size_t lower_y, size_t upper_x, size_t upper_y) const {
244+
// Some safety asserts.
245+
VTR_ASSERT_SAFE_MSG(lower_x <= upper_x, "lower_x is larger than upper_x");
246+
VTR_ASSERT_SAFE_MSG(lower_y <= upper_y, "lower_y is larger than upper_y");
247+
VTR_ASSERT_SAFE_MSG(lower_x < prefix_sum_.dim_size(0) - 1, "lower_x out of range");
248+
VTR_ASSERT_SAFE_MSG(upper_x < prefix_sum_.dim_size(0) - 1, "upper_x out of range");
249+
VTR_ASSERT_SAFE_MSG(lower_y < prefix_sum_.dim_size(1) - 1, "lower_y out of range");
250+
VTR_ASSERT_SAFE_MSG(upper_y < prefix_sum_.dim_size(1) - 1, "upper_y out of range");
251+
252+
// The sum of the region (lower_x, lower_y) to (upper_x, upper_y)
253+
// inclusive is equal to:
254+
// - The sum of the region (0, 0) to (upper_x, upper_y)
255+
// - Minus the sum of the region (0, 0) to (lower_x - 1, upper_y)
256+
// - Remove the part below the region
257+
// - Minus the sum of the region (0, 0) to (upper_x, lower_y - 1)
258+
// - Remove the part left of the region
259+
// - Plus the sum of the region (0, 0) to (lower_x - 1, lower_y - 1)
260+
// - Add back on the lower-left corner which was subtracted twice.
261+
// Note: all of these are offset by 1 since the first row and column
262+
// are all zeros. This allows us to avoid bounds checking when
263+
// lower_x or lower_y are 0.
264+
return prefix_sum_[upper_x + 1][upper_y + 1] - prefix_sum_[lower_x][upper_y + 1]
265+
- prefix_sum_[upper_x + 1][lower_y]
266+
+ prefix_sum_[lower_x][lower_y];
267+
}
268+
269+
private:
270+
/**
271+
* @brief The 2D prefix sum of the original grid of values.
272+
*
273+
* Position (x, y) of the prefix sum contains the sum of all values in the
274+
* rectangle (0, 0) -> (x - 1, y - 1) inclusive. The first row and column
275+
* are all zeros. By setting these to zero, we can avoid bound checking.
276+
* This data structure has the special property that the sum of any
277+
* rectangular region can be computed in O(1) time.
278+
*/
279+
vtr::NdMatrix<T, 2> prefix_sum_;
280+
};
281+
282+
} // namespace vtr
283+
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date February 2025
5+
* @brief Test cases for the Prefix Sum class in vtr_util.
6+
*/
7+
8+
#include "catch2/catch_test_macros.hpp"
9+
10+
#include "vtr_ndmatrix.h"
11+
#include "vtr_prefix_sum.h"
12+
13+
using namespace Catch;
14+
15+
TEST_CASE("PrefixSum1D", "[vtr_prefix_sum/PrefixSum1D]") {
16+
// Construct a 1D array to compute the prefix sum over.
17+
std::vector<float> vals = {1.f, 7.f, 2.f, 2.f, 5.f, 6.f, 1.f, 9.f, 1.f, 3.f};
18+
19+
// Construct the Prefix Sum.
20+
vtr::PrefixSum1D<float> prefix_sum(vals);
21+
22+
// Check that the sum of each length 1 region is the original value.
23+
SECTION("construction") {
24+
for (size_t x = 0; x < vals.size(); x++) {
25+
float sum_val = prefix_sum.get_sum(x, x);
26+
REQUIRE(sum_val == vals[x]);
27+
}
28+
}
29+
30+
float sum_of_all_vals = 0.f;
31+
for (size_t x = 0; x < vals.size(); x++) {
32+
sum_of_all_vals += vals[x];
33+
}
34+
35+
// Check that get_sum is working on some testcases.
36+
SECTION("get_sum") {
37+
REQUIRE(prefix_sum.get_sum(0, vals.size() - 1) == sum_of_all_vals);
38+
REQUIRE(prefix_sum.get_sum(0, 2) == 10.f);
39+
REQUIRE(prefix_sum.get_sum(7, 9) == 13.f);
40+
REQUIRE(prefix_sum.get_sum(2, 5) == 15.f);
41+
}
42+
}
43+
44+
TEST_CASE("PrefixSum2D", "[vtr_prefix_sum/PrefixSum2D]") {
45+
// Construct a 2D grid to compute the prefix sum over.
46+
vtr::NdMatrix<float, 2> vals({4, 4});
47+
/*
48+
* [ 1 3 9 2 ]
49+
* [ 2 4 0 8 ]
50+
* [ 3 7 1 3 ]
51+
* [ 5 6 9 2 ]
52+
*/
53+
vals[0][0] = 5.f;
54+
vals[1][0] = 6.f;
55+
vals[2][0] = 9.f;
56+
vals[3][0] = 2.f;
57+
vals[0][1] = 3.f;
58+
vals[1][1] = 7.f;
59+
vals[2][1] = 1.f;
60+
vals[3][1] = 3.f;
61+
vals[0][2] = 2.f;
62+
vals[1][2] = 4.f;
63+
vals[2][2] = 0.f;
64+
vals[3][2] = 8.f;
65+
vals[0][3] = 1.f;
66+
vals[1][3] = 3.f;
67+
vals[2][3] = 9.f;
68+
vals[3][3] = 2.f;
69+
70+
// Construct the Prefix Sum.
71+
vtr::PrefixSum2D<float> prefix_sum(vals);
72+
73+
// Check that the sum of each 1x1 region is the original value.
74+
SECTION("construction") {
75+
for (size_t x = 0; x < 4; x++) {
76+
for (size_t y = 0; y < 4; y++) {
77+
float sum_val = prefix_sum.get_sum(x, y, x, y);
78+
REQUIRE(sum_val == vals[x][y]);
79+
}
80+
}
81+
}
82+
83+
float sum_of_all_vals = 0;
84+
for (size_t x = 0; x < 4; x++) {
85+
for (size_t y = 0; y < 4; y++) {
86+
sum_of_all_vals += vals[x][y];
87+
}
88+
}
89+
90+
// Check that get_sum is working on some testcases.
91+
SECTION("get_sum") {
92+
REQUIRE(prefix_sum.get_sum(0, 0, 3, 3) == sum_of_all_vals);
93+
REQUIRE(prefix_sum.get_sum(1, 1, 2, 2) == 12.f);
94+
REQUIRE(prefix_sum.get_sum(0, 0, 3, 0) == 22.f);
95+
REQUIRE(prefix_sum.get_sum(0, 0, 0, 3) == 11.f);
96+
REQUIRE(prefix_sum.get_sum(1, 2, 2, 3) == 16.f);
97+
}
98+
}
99+

0 commit comments

Comments
 (0)