Skip to content

Commit 7396d6f

Browse files
committed
lookahead: enhanced node sampling method
Signed-off-by: Dusty DeWeese <[email protected]> Signed-off-by: Alessandro Comodi <[email protected]> Signed-off-by: Keith Rothman <[email protected]>
1 parent 1b90651 commit 7396d6f

File tree

2 files changed

+264
-0
lines changed

2 files changed

+264
-0
lines changed
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#include "router_lookahead_sampling.h"
2+
3+
#include <vector>
4+
5+
#include "globals.h"
6+
#include "vtr_math.h"
7+
#include "vtr_geometry.h"
8+
#include "vtr_time.h"
9+
10+
// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE
11+
static constexpr int SAMPLE_GRID_SIZE = 2;
12+
13+
// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria
14+
// choose locations with higher, but not extreme, counts of each segment type
15+
static constexpr double kSamplingCountLowerQuantile = 0.5;
16+
static constexpr double kSamplingCountUpperQuantile = 0.7;
17+
18+
// also known as the L1 norm
19+
static int manhattan_distance(const vtr::Point<int>& a, const vtr::Point<int>& b) {
20+
return abs(b.x() - a.x()) + abs(b.y() - a.y());
21+
}
22+
23+
// the smallest bounding box containing a node
24+
static vtr::Rect<int> bounding_box_for_node(int node_ind) {
25+
auto& device_ctx = g_vpr_ctx.device();
26+
auto& rr_graph = device_ctx.rr_nodes;
27+
int x = rr_graph.node_xlow(RRNodeId(node_ind));
28+
int y = rr_graph.node_ylow(RRNodeId(node_ind));
29+
30+
return vtr::Rect<int>(vtr::Point<int>(x, y));
31+
}
32+
33+
static vtr::Rect<int> sample_window(const vtr::Rect<int>& bounding_box, int sx, int sy, int n) {
34+
return vtr::Rect<int>(sample(bounding_box, sx, sy, n),
35+
sample(bounding_box, sx + 1, sy + 1, n));
36+
}
37+
38+
static std::vector<SamplePoint> choose_points(const vtr::Matrix<int>& counts,
39+
const vtr::Rect<int>& window,
40+
int min_count,
41+
int max_count) {
42+
VTR_ASSERT(min_count <= max_count);
43+
std::vector<SamplePoint> points;
44+
for (int y = window.ymin(); y < window.ymax(); y++) {
45+
for (int x = window.xmin(); x < window.xmax(); x++) {
46+
if (counts[x][y] >= min_count && counts[x][y] <= max_count) {
47+
points.push_back(SamplePoint{/* .location = */ vtr::Point<int>(x, y),
48+
/* .nodes = */ {}});
49+
}
50+
}
51+
}
52+
53+
vtr::Point<int> center = sample(window, 1, 1, 2);
54+
55+
// sort by distance from center
56+
std::sort(points.begin(), points.end(),
57+
[&](const SamplePoint& a, const SamplePoint& b) {
58+
return manhattan_distance(a.location, center) < manhattan_distance(b.location, center);
59+
});
60+
61+
return points;
62+
}
63+
64+
// histogram is a map from segment count to number of locations having that count
65+
static int quantile(const std::map<int, int>& histogram, float ratio) {
66+
if (histogram.empty()) {
67+
return 0;
68+
}
69+
int sum = 0;
70+
for (const auto& entry : histogram) {
71+
sum += entry.second;
72+
}
73+
int limit = std::ceil(sum * ratio);
74+
for (const auto& entry : histogram) {
75+
limit -= entry.second;
76+
if (limit <= 0) {
77+
return entry.first;
78+
}
79+
}
80+
return 0;
81+
}
82+
83+
// select a good number of segments to find
84+
static std::map<int, int> count_histogram(const vtr::Rect<int>& box, const vtr::Matrix<int>& counts) {
85+
std::map<int, int> histogram;
86+
for (int y = box.ymin(); y < box.ymax(); y++) {
87+
for (int x = box.xmin(); x < box.xmax(); x++) {
88+
int count = counts[x][y];
89+
if (count > 0) {
90+
++histogram[count];
91+
}
92+
}
93+
}
94+
return histogram;
95+
}
96+
97+
// Used to calculate each region's `order.'
98+
// A space-filling curve will order the regions so that
99+
// nearby points stay close in order. A Hilbert curve might
100+
// be better, but a Morton (Z)-order curve is easy to compute,
101+
// because it's just interleaving binary bits, so this
102+
// function interleaves with 0's so that the X and Y
103+
// dimensions can then be OR'ed together.
104+
static uint64_t interleave(uint32_t x) {
105+
uint64_t i = x;
106+
i = (i ^ (i << 16)) & 0x0000ffff0000ffff;
107+
i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff;
108+
i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f;
109+
i = (i ^ (i << 2)) & 0x3333333333333333;
110+
i = (i ^ (i << 1)) & 0x5555555555555555;
111+
return i;
112+
}
113+
114+
// for each segment type, find the nearest nodes to an equally spaced grid of points
115+
// within the bounding box for that segment type
116+
std::vector<SampleRegion> find_sample_regions(int num_segments) {
117+
vtr::ScopedStartFinishTimer timer("finding sample regions");
118+
std::vector<SampleRegion> sample_regions;
119+
auto& device_ctx = g_vpr_ctx.device();
120+
auto& rr_nodes = device_ctx.rr_nodes;
121+
std::vector<vtr::Matrix<int>> segment_counts(num_segments);
122+
123+
// compute bounding boxes for each segment type
124+
std::vector<vtr::Rect<int>> bounding_box_for_segment(num_segments, vtr::Rect<int>());
125+
for (size_t i = 0; i < rr_nodes.size(); i++) {
126+
auto& node = rr_nodes[i];
127+
if (node.type() != CHANX && node.type() != CHANY) continue;
128+
if (node.capacity() == 0 || node.num_edges() == 0) continue;
129+
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
130+
131+
VTR_ASSERT(seg_index != OPEN);
132+
VTR_ASSERT(seg_index < num_segments);
133+
134+
bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(i));
135+
}
136+
137+
// initialize counts
138+
for (int seg = 0; seg < num_segments; seg++) {
139+
const auto& box = bounding_box_for_segment[seg];
140+
segment_counts[seg] = vtr::Matrix<int>({size_t(box.width()), size_t(box.height())}, 0);
141+
}
142+
143+
// count sample points
144+
for (size_t i = 0; i < rr_nodes.size(); i++) {
145+
auto& node = rr_nodes[i];
146+
if (node.type() != CHANX && node.type() != CHANY) continue;
147+
if (node.capacity() == 0 || node.num_edges() == 0) continue;
148+
int x = rr_nodes.node_xlow(RRNodeId(i));
149+
int y = rr_nodes.node_ylow(RRNodeId(i));
150+
151+
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
152+
segment_counts[seg_index][x][y] += 1;
153+
154+
VTR_ASSERT(seg_index != OPEN);
155+
VTR_ASSERT(seg_index < num_segments);
156+
}
157+
158+
// select sample points
159+
for (int i = 0; i < num_segments; i++) {
160+
const auto& counts = segment_counts[i];
161+
const auto& bounding_box = bounding_box_for_segment[i];
162+
if (bounding_box.empty()) continue;
163+
for (int y = 0; y < SAMPLE_GRID_SIZE; y++) {
164+
for (int x = 0; x < SAMPLE_GRID_SIZE; x++) {
165+
vtr::Rect<int> window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE);
166+
if (window.empty()) continue;
167+
168+
auto histogram = count_histogram(window, segment_counts[i]);
169+
SampleRegion region = {
170+
/* .segment_type = */ i,
171+
/* .grid_location = */ vtr::Point<int>(x, y),
172+
/* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)),
173+
/* .order = */ 0};
174+
if (!region.points.empty()) {
175+
/* In order to improve caching, the list of sample points are
176+
* sorted to keep points that are nearby on the Euclidean plane also
177+
* nearby in the vector of sample points.
178+
*
179+
* This means subsequent expansions on the same thread are likely
180+
* to cover a similar set of nodes, so they are more likely to be
181+
* cached. This improves performance by about 7%, which isn't a lot,
182+
* but not a bad improvement for a few lines of code. */
183+
vtr::Point<int> location = region.points[0].location;
184+
185+
// interleave bits of X and Y for a Z-curve ordering.
186+
region.order = interleave(location.x()) | (interleave(location.y()) << 1);
187+
188+
sample_regions.push_back(region);
189+
}
190+
}
191+
}
192+
}
193+
194+
// sort regions
195+
std::sort(sample_regions.begin(), sample_regions.end(),
196+
[](const SampleRegion& a, const SampleRegion& b) {
197+
return a.order < b.order;
198+
});
199+
200+
// build an index of sample points on segment type and location
201+
std::map<std::tuple<int, int, int>, SamplePoint*> sample_point_index;
202+
for (auto& region : sample_regions) {
203+
for (auto& point : region.points) {
204+
sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point;
205+
}
206+
}
207+
208+
// collect the node indices for each segment type at the selected sample points
209+
for (size_t i = 0; i < rr_nodes.size(); i++) {
210+
auto& node = rr_nodes[i];
211+
if (node.type() != CHANX && node.type() != CHANY) continue;
212+
if (node.capacity() == 0 || node.num_edges() == 0) continue;
213+
214+
int x = rr_nodes.node_xlow(RRNodeId(i));
215+
int y = rr_nodes.node_ylow(RRNodeId(i));
216+
217+
int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index;
218+
219+
VTR_ASSERT(seg_index != OPEN);
220+
VTR_ASSERT(seg_index < num_segments);
221+
222+
auto point = sample_point_index.find(std::make_tuple(seg_index, x, y));
223+
if (point != sample_point_index.end()) {
224+
point->second->nodes.push_back(i);
225+
}
226+
}
227+
228+
return sample_regions;
229+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#ifndef ROUTER_LOOKAHEAD_SAMPLING_H
2+
#define ROUTER_LOOKAHEAD_SAMPLING_H
3+
4+
#include <vector>
5+
#include "vtr_geometry.h"
6+
#include "globals.h"
7+
8+
// a sample point for a segment type, contains all segments at the VPR location
9+
struct SamplePoint {
10+
// canonical location
11+
vtr::Point<int> location;
12+
13+
// nodes to expand
14+
std::vector<ssize_t> nodes;
15+
};
16+
17+
struct SampleRegion {
18+
// all nodes in `points' have this segment type
19+
int segment_type;
20+
21+
// location on the sample grid
22+
vtr::Point<int> grid_location;
23+
24+
// locations to try
25+
// The computation will keep expanding each of the points
26+
// until a number of paths (segment -> connection box) are found.
27+
std::vector<SamplePoint> points;
28+
29+
// used to sort the regions to improve caching
30+
uint64_t order;
31+
};
32+
33+
std::vector<SampleRegion> find_sample_regions(int num_segments);
34+
35+
#endif

0 commit comments

Comments
 (0)