Skip to content

Commit 41fa4dd

Browse files
[AP][Solver] Ignored Disconnected Blocks in AP Solver
After investigating some of the slowest running testcases, I realized that we were not handling disconnected blocks in the solver. Especially after we started thresholding out high-fanout nets, some circuits were taking far longer to solve than they should. They especially took a long time to set up the matrices. After investigating, I realized that there were many blocks which we completely disconnected from the rest of the circuit. There is no reason to optimize the location of these blocks since the AP objective is formulated based on net connectivity. As such, these disconnected blocks should be completely ignored during placement. Ignoring these blocks reduces the number of variables in the A matrix, which can greatly improve runtime. Early results on Titan show up to a 3.5x improvement in GP runtime and a 20% improvement in GP runtime on average. Future work is to be more methodical on what nets to mark as ignored. The AP flow currently does not directly set signals like clocks as ignored, which may be able to allow us to label more blocks as disconnected.
1 parent 7dfdcc7 commit 41fa4dd

File tree

2 files changed

+102
-17
lines changed

2 files changed

+102
-17
lines changed

vpr/src/analytical_place/analytical_solver.cpp

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,32 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
111111
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
112112
const AtomNetlist& atom_netlist,
113113
const PreClusterTimingManager& pre_cluster_timing_manager,
114+
const DeviceGrid& device_grid,
114115
float ap_timing_tradeoff,
115116
int log_verbosity)
116117
: netlist_(netlist)
117118
, atom_netlist_(atom_netlist)
118119
, blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
119120
, row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
120121
, net_weights_(netlist.nets().size(), 1.0f)
122+
, device_grid_width_(device_grid.width())
123+
, device_grid_height_(device_grid.height())
121124
, ap_timing_tradeoff_(ap_timing_tradeoff)
122125
, log_verbosity_(log_verbosity) {
126+
127+
// Mark completely disconnected blocks. Since these blocks are not connected
128+
// to any nets that we care about for AP, we should not pass them into the
129+
// AP solver.
130+
vtr::vector<APBlockId, bool> block_is_used(netlist.blocks().size(), false);
131+
for (APNetId net_id : netlist.nets()) {
132+
if (netlist.net_is_ignored(net_id))
133+
continue;
134+
for (APPinId pin_id : netlist.net_pins(net_id)) {
135+
APBlockId blk_id = netlist.pin_block(pin_id);
136+
block_is_used[blk_id] = true;
137+
}
138+
}
139+
123140
// Get the number of moveable blocks in the netlist and create a unique
124141
// row ID from [0, num_moveable_blocks) for each moveable block in the
125142
// netlist.
@@ -131,6 +148,12 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
131148
num_fixed_blocks_++;
132149
if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE)
133150
continue;
151+
// If this block is disconnected (unused), add it to the disconnected
152+
// blocks vector and skip creating a row ID for it.
153+
if (!block_is_used[blk_id]) {
154+
disconnected_blocks_.push_back(blk_id);
155+
continue;
156+
}
134157
APRowId new_row_id = APRowId(current_row_id);
135158
blk_id_to_row_id_[blk_id] = new_row_id;
136159
row_id_to_blk_id_[new_row_id] = blk_id;
@@ -404,6 +427,15 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
404427
// solution to the zero vector if we do not set it to the guess directly.
405428
if (iteration == 0 && num_fixed_blocks_ == 0) {
406429
store_solution_into_placement(guess_x, guess_y, p_placement);
430+
431+
// Store disconnected blocks into solution at the center of the device
432+
for (APBlockId blk_id : disconnected_blocks_) {
433+
// All disconnected blocks should not have row IDs or be fixed blocks.
434+
VTR_ASSERT_SAFE(!blk_id_to_row_id_[blk_id].is_valid() && netlist_.block_mobility(blk_id) != APBlockMobility::FIXED);
435+
p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0f;
436+
p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0f;
437+
}
438+
407439
return;
408440
}
409441

@@ -442,6 +474,18 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
442474
// Write the results back into the partial placement object.
443475
store_solution_into_placement(x, y, p_placement);
444476

477+
// In the very first iteration, the solver must provide a location for all
478+
// of the blocks. The disconnected blocks will not be given a placement by
479+
// the solver above. Just put them in the middle of the device and let the
480+
// legalizer find good places for them. In future iterations, the prior
481+
// position of these blocks will already be in the p_placement object.
482+
if (iteration == 0) {
483+
for (APBlockId blk_id : disconnected_blocks_) {
484+
p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0;
485+
p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0;
486+
}
487+
}
488+
445489
// Update the guess. The guess for the next iteration is the solution in
446490
// this iteration.
447491
guess_x = x;
@@ -497,9 +541,8 @@ void B2BSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
497541
// tile location for each AP block. The center is just an
498542
// approximation.
499543
if (num_fixed_blocks_ == 0) {
500-
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
501-
APRowId row_id = APRowId(row_id_idx);
502-
APBlockId blk_id = row_id_to_blk_id_[row_id];
544+
for (APBlockId blk_id : netlist_.blocks()) {
545+
VTR_ASSERT_SAFE(netlist_.block_mobility(blk_id) != APBlockMobility::FIXED);
503546
p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0;
504547
p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0;
505548
}
@@ -559,6 +602,13 @@ void B2BSolver::initialize_placement_least_dense(PartialPlacement& p_placement)
559602
p_placement.block_y_locs[blk_id] = r * gap;
560603
}
561604
}
605+
606+
// Any blocks which are disconnected can be put anywhere. Just put them at
607+
// the center of the device for now.
608+
for (APBlockId blk_id : disconnected_blocks_) {
609+
p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0;
610+
p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0;
611+
}
562612
}
563613

564614
void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement) {
@@ -654,6 +704,24 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement
654704
x_guess = x;
655705
y_guess = y;
656706
}
707+
708+
// Disconnected blocks are not optimized by the solver.
709+
if (iteration == 0) {
710+
// In the first iteration of GP, just place the disconnected blocks at the
711+
// center of the device. The legalizer will find a good place to put
712+
// them.
713+
for (APBlockId blk_id : disconnected_blocks_) {
714+
p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0;
715+
p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0;
716+
}
717+
} else {
718+
// If a legalized solution is available (after the first iteration of GP), then
719+
// set the disconnected blocks to their legalized position.
720+
for (APBlockId blk_id : disconnected_blocks_) {
721+
p_placement.block_x_locs[blk_id] = block_x_locs_legalized[blk_id];
722+
p_placement.block_y_locs[blk_id] = block_y_locs_legalized[blk_id];
723+
}
724+
}
657725
}
658726

659727
namespace {
@@ -791,11 +859,11 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
791859

792860
// Create triplet lists to store the sparse positions to update and reserve
793861
// space for them.
794-
size_t num_nets = netlist_.nets().size();
862+
size_t total_num_pins_in_netlist = netlist_.pins().size();
795863
std::vector<Eigen::Triplet<double>> triplet_list_x;
796-
triplet_list_x.reserve(num_nets);
864+
triplet_list_x.reserve(total_num_pins_in_netlist);
797865
std::vector<Eigen::Triplet<double>> triplet_list_y;
798-
triplet_list_y.reserve(num_nets);
866+
triplet_list_y.reserve(total_num_pins_in_netlist);
799867

800868
for (APNetId net_id : netlist_.nets()) {
801869
if (netlist_.net_is_ignored(net_id))

vpr/src/analytical_place/analytical_solver.h

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class AnalyticalSolver {
6363
AnalyticalSolver(const APNetlist& netlist,
6464
const AtomNetlist& atom_netlist,
6565
const PreClusterTimingManager& pre_cluster_timing_manager,
66+
const DeviceGrid& device_grid,
6667
float ap_timing_tradeoff,
6768
int log_verbosity);
6869

@@ -136,6 +137,21 @@ class AnalyticalSolver {
136137
/// between 0 and 1.
137138
vtr::vector<APNetId, float> net_weights_;
138139

140+
/// @brief A vector of blocks in the netlist which are not connected to any
141+
/// non-ignored nets. Since the blocks have no connection to any
142+
/// other blocks in the netlist, the are not considered movable or
143+
/// fixed. The solver will just leave these blocks wherever they were
144+
/// legalized.
145+
std::vector<APBlockId> disconnected_blocks_;
146+
147+
/// @brief The width of the device grid. Used for randomly generating points
148+
/// on the grid.
149+
size_t device_grid_width_;
150+
151+
/// @brief The height of the device grid. Used for randomly generating points
152+
/// on the grid.
153+
size_t device_grid_height_;
154+
139155
/// @brief The AP timing tradeoff term used during global placement. Decides
140156
/// how much the solver cares about timing vs wirelength.
141157
float ap_timing_tradeoff_;
@@ -313,7 +329,12 @@ class QPHybridSolver : public AnalyticalSolver {
313329
const PreClusterTimingManager& pre_cluster_timing_manager,
314330
float ap_timing_tradeoff,
315331
int log_verbosity)
316-
: AnalyticalSolver(netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity) {
332+
: AnalyticalSolver(netlist,
333+
atom_netlist,
334+
pre_cluster_timing_manager,
335+
device_grid,
336+
ap_timing_tradeoff,
337+
log_verbosity) {
317338
// Initializing the linear system only depends on the netlist and fixed
318339
// block locations. Both are provided by the netlist, allowing this to
319340
// be initialized in the constructor.
@@ -449,9 +470,12 @@ class B2BSolver : public AnalyticalSolver {
449470
const PreClusterTimingManager& pre_cluster_timing_manager,
450471
float ap_timing_tradeoff,
451472
int log_verbosity)
452-
: AnalyticalSolver(ap_netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity)
453-
, device_grid_width_(device_grid.width())
454-
, device_grid_height_(device_grid.height()) {}
473+
: AnalyticalSolver(ap_netlist,
474+
atom_netlist,
475+
pre_cluster_timing_manager,
476+
device_grid,
477+
ap_timing_tradeoff,
478+
log_verbosity) {}
455479

456480
/**
457481
* @brief Perform an iteration of the B2B solver, storing the result into
@@ -603,13 +627,6 @@ class B2BSolver : public AnalyticalSolver {
603627
vtr::vector<APBlockId, double> block_x_locs_legalized;
604628
vtr::vector<APBlockId, double> block_y_locs_legalized;
605629

606-
/// @brief The width of the device grid. Used for randomly generating points
607-
/// on the grid.
608-
size_t device_grid_width_;
609-
/// @brief The height of the device grid. Used for randomly generating points
610-
/// on the grid.
611-
size_t device_grid_height_;
612-
613630
/// @brief The total number of CG iterations that this solver has performed
614631
/// so far. This can be a useful metric for the amount of work the
615632
/// solver performs.

0 commit comments

Comments
 (0)