Skip to content

Commit 1086783

Browse files
[AP] Analytical Solver
This introduces the Analytical Solver class to the AP flow. This is an integral part of the Global Placement stage and what gives Analytical Placement its name. This PR introduces the QP_HYBRID analytical solver which uses a hybrid Clique and Star net model to optimize the quadratic HPWL objective. The code is designed to allow for other analytical solvers to be implemented and interchanged without issue. A B2B solver will be coming in a future PR.
1 parent ea8695a commit 1086783

File tree

10 files changed

+537
-16
lines changed

10 files changed

+537
-16
lines changed

vpr/src/analytical_place/analytical_placement_flow.cpp

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
*/
77

88
#include "analytical_placement_flow.h"
9+
#include <memory>
10+
#include "analytical_solver.h"
911
#include "ap_netlist.h"
1012
#include "atom_netlist.h"
1113
#include "full_legalizer.h"
@@ -19,6 +21,40 @@
1921
#include "vtr_assert.h"
2022
#include "vtr_time.h"
2123

24+
/**
25+
* @brief A helper method to log statistics on the APNetlist.
26+
*/
27+
static void print_ap_netlist_stats(const APNetlist& netlist) {
28+
// Get the number of moveable and fixed blocks
29+
size_t num_moveable_blocks = 0;
30+
size_t num_fixed_blocks = 0;
31+
for (APBlockId blk_id : netlist.blocks()) {
32+
if (netlist.block_mobility(blk_id) == APBlockMobility::MOVEABLE)
33+
num_moveable_blocks++;
34+
else
35+
num_fixed_blocks++;
36+
}
37+
// Get the fanout information of nets
38+
size_t highest_fanout = 0;
39+
float average_fanout = 0.f;
40+
for (APNetId net_id : netlist.nets()) {
41+
size_t net_fanout = netlist.net_pins(net_id).size();
42+
if (net_fanout > highest_fanout)
43+
highest_fanout = net_fanout;
44+
average_fanout += static_cast<float>(net_fanout);
45+
}
46+
average_fanout /= static_cast<float>(netlist.nets().size());
47+
// Print the statistics
48+
VTR_LOG("Analytical Placement Netlist Statistics:\n");
49+
VTR_LOG("\tBlocks: %zu\n", netlist.blocks().size());
50+
VTR_LOG("\t\tMoveable Blocks: %zu\n", num_moveable_blocks);
51+
VTR_LOG("\t\tFixed Blocks: %zu\n", num_fixed_blocks);
52+
VTR_LOG("\tNets: %zu\n", netlist.nets().size());
53+
VTR_LOG("\t\tAverage Fanout: %.2f\n", average_fanout);
54+
VTR_LOG("\t\tHighest Fanout: %zu\n", highest_fanout);
55+
VTR_LOG("\tPins: %zu\n", netlist.pins().size());
56+
}
57+
2258
void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
2359
(void)vpr_setup;
2460
// Start an overall timer for the Analytical Placement flow.
@@ -38,22 +74,19 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
3874
APNetlist ap_netlist = gen_ap_netlist_from_atoms(atom_nlist,
3975
prepacker,
4076
constraints);
77+
print_ap_netlist_stats(ap_netlist);
4178

4279
// Run the Global Placer
43-
// For now, just put all the moveable blocks at the center of the device
44-
// grid. This will be replaced later. This is just for testing.
80+
// For now, just runs the solver.
4581
PartialPlacement p_placement(ap_netlist);
82+
std::unique_ptr<AnalyticalSolver> solver = make_analytical_solver(e_analytical_solver::QP_HYBRID,
83+
ap_netlist);
84+
solver->solve(0, p_placement);
85+
86+
// Verify that the partial placement is valid before running the full
87+
// legalizer.
4688
const size_t device_width = device_ctx.grid.width();
4789
const size_t device_height = device_ctx.grid.height();
48-
double device_center_x = static_cast<double>(device_width) / 2.0;
49-
double device_center_y = static_cast<double>(device_height) / 2.0;
50-
for (APBlockId ap_blk_id : ap_netlist.blocks()) {
51-
if (ap_netlist.block_mobility(ap_blk_id) != APBlockMobility::MOVEABLE)
52-
continue;
53-
// If the APBlock is moveable, put it on the center for the device.
54-
p_placement.block_x_locs[ap_blk_id] = device_center_x;
55-
p_placement.block_y_locs[ap_blk_id] = device_center_y;
56-
}
5790
VTR_ASSERT(p_placement.verify(ap_netlist,
5891
device_width,
5992
device_height,
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
/**
2+
* @file
3+
* @author Alex Singer and Robert Luo
4+
* @date October 2024
5+
* @brief The definitions of the analytical solvers used in the AP flow and
6+
* their base class.
7+
*/
8+
9+
#include "analytical_solver.h"
10+
#include <Eigen/src/SparseCore/SparseMatrix.h>
11+
#include <Eigen/SVD>
12+
#include <Eigen/Sparse>
13+
#include <Eigen/Eigenvalues>
14+
#include <Eigen/IterativeLinearSolvers>
15+
#include <cstddef>
16+
#include <cstdio>
17+
#include <memory>
18+
#include <utility>
19+
#include <vector>
20+
#include "partial_placement.h"
21+
#include "ap_netlist.h"
22+
#include "vpr_error.h"
23+
#include "vtr_assert.h"
24+
#include "vtr_vector.h"
25+
26+
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type,
27+
const APNetlist& netlist) {
28+
// Based on the solver type passed in, build the solver.
29+
switch (solver_type) {
30+
case e_analytical_solver::QP_HYBRID:
31+
return std::make_unique<QPHybridSolver>(netlist);
32+
default:
33+
VPR_FATAL_ERROR(VPR_ERROR_AP,
34+
"Unrecognized analytical solver type");
35+
break;
36+
}
37+
return nullptr;
38+
}
39+
40+
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist)
41+
: netlist_(netlist),
42+
blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID()),
43+
row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID()) {
44+
// Get the number of moveable blocks in the netlist and create a unique
45+
// row ID from [0, num_moveable_blocks) for each moveable block in the
46+
// netlist.
47+
num_moveable_blocks_ = 0;
48+
size_t current_row_id = 0;
49+
for (APBlockId blk_id : netlist.blocks()) {
50+
if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE)
51+
continue;
52+
APRowId new_row_id = APRowId(current_row_id);
53+
blk_id_to_row_id_[blk_id] = new_row_id;
54+
row_id_to_blk_id_[new_row_id] = blk_id;
55+
current_row_id++;
56+
num_moveable_blocks_++;
57+
}
58+
}
59+
60+
void QPHybridSolver::init_linear_system() {
61+
// Count the number of star nodes that the netlist will have.
62+
size_t num_star_nodes = 0;
63+
for (APNetId net_id : netlist_.nets()) {
64+
if (netlist_.net_pins(net_id).size() > star_num_pins_threshold)
65+
num_star_nodes++;
66+
}
67+
68+
// Initialize the linear system with zeros.
69+
size_t num_variables = num_moveable_blocks_ + num_star_nodes;
70+
A_sparse = Eigen::SparseMatrix<double>(num_variables, num_variables);
71+
b_x = Eigen::VectorXd::Zero(num_variables);
72+
b_y = Eigen::VectorXd::Zero(num_variables);
73+
74+
// Create a list of triplets that will be used to create the sparse
75+
// coefficient matrix. This is the method recommended by Eigen to initialize
76+
// this matrix.
77+
std::vector<Eigen::Triplet<double>> tripletList;
78+
// Reserve enough space for the triplets. This is just to help with
79+
// performance.
80+
size_t num_nets = netlist_.nets().size();
81+
tripletList.reserve(num_moveable_blocks_ * num_nets);
82+
83+
// Lambda expression to add a connection to the linear system from the src
84+
// to the target with the given weight. The src_row_id may represent a star
85+
// node (so it does not represent an APBlock) or a moveable APBlock. The
86+
// target_blk_id may be a fixed or moveable block.
87+
auto add_connection_to_system = [&](size_t src_row_id,
88+
APBlockId target_blk_id,
89+
double weight) {
90+
// Verify that this is a valid row.
91+
VTR_ASSERT_DEBUG(src_row_id < A_sparse.rows());
92+
// Verify that this is a valid block id.
93+
VTR_ASSERT_DEBUG(target_blk_id.is_valid());
94+
// The src_row_id is always a moveable block (rows in the matrix always
95+
// coorespond to a moveable APBlock or a star node.
96+
if (netlist_.block_mobility(target_blk_id) == APBlockMobility::MOVEABLE) {
97+
// If the target is also moveable, update the coefficient matrix.
98+
size_t target_row_id = (size_t)blk_id_to_row_id_[target_blk_id];
99+
VTR_ASSERT_DEBUG(target_row_id < A_sparse.rows());
100+
tripletList.emplace_back(src_row_id, src_row_id, weight);
101+
tripletList.emplace_back(target_row_id, target_row_id, weight);
102+
tripletList.emplace_back(src_row_id, target_row_id, -weight);
103+
tripletList.emplace_back(target_row_id, src_row_id, -weight);
104+
} else {
105+
// If the target is fixed, update the coefficient matrix and the
106+
// constant vectors.
107+
tripletList.emplace_back(src_row_id, src_row_id, weight);
108+
VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).x >= 0);
109+
VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).y >= 0);
110+
// FIXME: These fixed block locations are aligned to the anchor of
111+
// the tiles they are in. This is not correct. A method
112+
// should be added to the netlist class or to a util file
113+
// which can get a more accurate position.
114+
double blk_loc_x = netlist_.block_loc(target_blk_id).x;
115+
double blk_loc_y = netlist_.block_loc(target_blk_id).y;
116+
b_x(src_row_id) += weight * blk_loc_x;
117+
b_y(src_row_id) += weight * blk_loc_y;
118+
}
119+
};
120+
121+
// Create the connections using a hybrid connection model of the star and
122+
// clique connnection models.
123+
size_t star_node_offset = 0;
124+
for (APNetId net_id : netlist_.nets()) {
125+
size_t num_pins = netlist_.net_pins(net_id).size();
126+
VTR_ASSERT_DEBUG(num_pins > 1);
127+
if (num_pins > star_num_pins_threshold) {
128+
// Create a star node and connect each block in the net to the star
129+
// node.
130+
// Using the weight from FastPlace
131+
double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
132+
size_t star_node_id = num_moveable_blocks_ + star_node_offset;
133+
for (APPinId pin_id : netlist_.net_pins(net_id)) {
134+
APBlockId blk_id = netlist_.pin_block(pin_id);
135+
add_connection_to_system(star_node_id, blk_id, w);
136+
}
137+
star_node_offset++;
138+
} else {
139+
// Create a clique connection where every block in a net connects
140+
// exactly once to every other block in the net.
141+
// Using the weight from FastPlace
142+
double w = 1.0 / static_cast<double>(num_pins - 1);
143+
for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) {
144+
APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx);
145+
APBlockId first_blk_id = netlist_.pin_block(first_pin_id);
146+
for (size_t jpin_idx = ipin_idx + 1; jpin_idx < num_pins; jpin_idx++) {
147+
APPinId second_pin_id = netlist_.net_pin(net_id, jpin_idx);
148+
APBlockId second_blk_id = netlist_.pin_block(second_pin_id);
149+
// Make sure that the first node is moveable. This makes
150+
// creating the connection easier.
151+
if (netlist_.block_mobility(first_blk_id) == APBlockMobility::FIXED) {
152+
// If both blocks are fixed, no connection needs to be
153+
// made; just continue.
154+
if (netlist_.block_mobility(second_blk_id) == APBlockMobility::FIXED) {
155+
continue;
156+
}
157+
// If the second block is moveable, swap the first and
158+
// second block so the first block is the moveable one.
159+
std::swap(first_blk_id, second_blk_id);
160+
}
161+
size_t first_row_id = (size_t)blk_id_to_row_id_[first_blk_id];
162+
add_connection_to_system(first_row_id, second_blk_id, w);
163+
}
164+
}
165+
}
166+
}
167+
168+
// Make sure that the number of star nodes created matches the number of
169+
// star nodes we pre-calculated we would have.
170+
VTR_ASSERT_SAFE(num_star_nodes == star_node_offset);
171+
172+
// Populate the A_sparse matrix using the triplets.
173+
A_sparse.setFromTriplets(tripletList.begin(), tripletList.end());
174+
}
175+
176+
/**
177+
* @brief Helper method to update the linear system with anchors to the current
178+
* partial placement.
179+
*
180+
* For each moveable block (with row = i) in the netlist:
181+
* A[i][i] = A[i][i] + coeff_pseudo_anchor;
182+
* b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor;
183+
* Where coeff_pseudo_anchor grows with each iteration.
184+
*
185+
* This is basically a fast way of adding a connection between a moveable block
186+
* and a fixed block.
187+
*/
188+
static inline void update_linear_system_with_anchors(
189+
Eigen::SparseMatrix<double> &A_sparse_diff,
190+
Eigen::VectorXd &b_x_diff,
191+
Eigen::VectorXd &b_y_diff,
192+
PartialPlacement& p_placement,
193+
size_t num_moveable_blocks,
194+
vtr::vector<APRowId, APBlockId> row_id_to_blk_id,
195+
unsigned iteration) {
196+
// Anchor weights grow exponentially with iteration.
197+
double coeff_pseudo_anchor = 0.01 * std::exp((double)iteration/5);
198+
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks; row_id_idx++) {
199+
APRowId row_id = APRowId(row_id_idx);
200+
APBlockId blk_id = row_id_to_blk_id[row_id];
201+
double pseudo_w = coeff_pseudo_anchor;
202+
A_sparse_diff.coeffRef(row_id_idx, row_id_idx) += pseudo_w;
203+
b_x_diff(row_id_idx) += pseudo_w * p_placement.block_x_locs[blk_id];
204+
b_y_diff(row_id_idx) += pseudo_w * p_placement.block_y_locs[blk_id];
205+
}
206+
}
207+
208+
void QPHybridSolver::solve(unsigned iteration, PartialPlacement &p_placement) {
209+
// Create a temporary linear system which will contain the original linear
210+
// system which may be updated to include the anchor points.
211+
Eigen::SparseMatrix<double> A_sparse_diff = Eigen::SparseMatrix<double>(A_sparse);
212+
Eigen::VectorXd b_x_diff = Eigen::VectorXd(b_x);
213+
Eigen::VectorXd b_y_diff = Eigen::VectorXd(b_y);
214+
// In the first iteration, the orginal linear system is used.
215+
// In any other iteration, use the moveable APBlocks current placement as
216+
// anchor-points (fixed block positions).
217+
if (iteration != 0) {
218+
update_linear_system_with_anchors(A_sparse_diff, b_x_diff, b_y_diff,
219+
p_placement, num_moveable_blocks_,
220+
row_id_to_blk_id_, iteration);
221+
}
222+
// Verify that the constant vectors are valid.
223+
VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!");
224+
VTR_ASSERT_DEBUG(!b_y_diff.hasNaN() && "b_y has NaN!");
225+
226+
// Set up the ConjugateGradient Solver using the coefficient matrix.
227+
// TODO: can change cg.tolerance to increase performance when needed
228+
// - This tolerance may need to be a function of the number of nets.
229+
// - Instead of normalizing the fixed blocks, the tolerance can be scaled
230+
// by the size of the device.
231+
Eigen::ConjugateGradient<Eigen::SparseMatrix<double>, Eigen::Lower|Eigen::Upper> cg;
232+
cg.compute(A_sparse_diff);
233+
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at compute!");
234+
// Use the solver to solve for x and y using the constant vectors
235+
// TODO: Use solve with guess to make this faster. Use the previous placement
236+
// as a guess.
237+
Eigen::VectorXd x = cg.solve(b_x_diff);
238+
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_x!");
239+
Eigen::VectorXd y = cg.solve(b_y_diff);
240+
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!");
241+
242+
// Write the results back into the partial placement object.
243+
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
244+
APRowId row_id = APRowId(row_id_idx);
245+
APBlockId blk_id = row_id_to_blk_id_[row_id];
246+
p_placement.block_x_locs[blk_id] = x[row_id_idx];
247+
p_placement.block_y_locs[blk_id] = y[row_id_idx];
248+
}
249+
}
250+

0 commit comments

Comments
 (0)