Skip to content

Commit 14b562e

Browse files
Merge branch 'master' into ingest_per_edge_delay
2 parents 4110acd + 4157a48 commit 14b562e

File tree

38 files changed

+351
-164
lines changed

38 files changed

+351
-164
lines changed

doc/src/vpr/command_line_usage.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ Analytical Placement is generally split into three stages:
12141214
Uses the legalized solution as anchor-points to pull the solution to a
12151215
more legal solution (similar to the approach from SimPL :cite:`Kim2013_SimPL`).
12161216

1217-
**Default:** ``qp-hybrid``
1217+
**Default:** ``lp-b2b``
12181218

12191219
.. option:: --ap_partial_legalizer {bipartitioning | flow-based}
12201220

libs/EXTERNAL/libcatch2

vpr/src/analytical_place/analytical_solver.cpp

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,8 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
335335
p_placement, iteration);
336336
}
337337
// Verify that the constant vectors are valid.
338-
VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!");
339-
VTR_ASSERT_DEBUG(!b_y_diff.hasNaN() && "b_y has NaN!");
338+
VTR_ASSERT_SAFE_MSG(!b_x_diff.hasNaN(), "b_x has NaN!");
339+
VTR_ASSERT_SAFE_MSG(!b_y_diff.hasNaN(), "b_y has NaN!");
340340

341341
// Set up the ConjugateGradient Solver using the coefficient matrix.
342342
// TODO: can change cg.tolerance to increase performance when needed
@@ -479,8 +479,11 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement
479479
// p_placement.
480480
// 3) Repeat. Note: We need to repeat step 1 and 2 iteratively since
481481
// the bounds are likely to have changed after step 2.
482-
// TODO: As well as having a maximum number of bound updates, should also
483-
// investigate stopping when the HPWL converges.
482+
// We stop when it looks like the placement is converging (the change in
483+
// HPWL is sufficiently small for a few iterations).
484+
double prev_hpwl = std::numeric_limits<double>::max();
485+
double curr_hpwl = prev_hpwl;
486+
unsigned num_convergence = 0;
484487
for (unsigned counter = 0; counter < max_num_bound_updates_; counter++) {
485488
VTR_LOGV(log_verbosity_ >= 10,
486489
"\tPlacement HPWL in b2b loop: %f\n",
@@ -490,7 +493,7 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement
490493
float build_linear_system_start_time = runtime_timer.elapsed_sec();
491494
init_linear_system(p_placement);
492495
if (iteration != 0)
493-
update_linear_system_with_anchors(p_placement, iteration);
496+
update_linear_system_with_anchors(iteration);
494497
total_time_spent_building_linear_system_ += runtime_timer.elapsed_sec() - build_linear_system_start_time;
495498
VTR_ASSERT_SAFE_MSG(!b_x.hasNaN(), "b_x has NaN!");
496499
VTR_ASSERT_SAFE_MSG(!b_y.hasNaN(), "b_y has NaN!");
@@ -524,22 +527,24 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement
524527
total_time_spent_solving_linear_system_ += runtime_timer.elapsed_sec() - solve_linear_system_start_time;
525528

526529
// Save the result into the partial placement object.
527-
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
528-
// Since we are capping the number of iterations, the solver may not
529-
// have enough time to converge on a solution that is on the device.
530-
// We just clamp the solution to zero for now.
531-
// TODO: Should handle this better. If the solution is very negative
532-
// it may indicate a bug.
533-
if (x[row_id_idx] < 0.0)
534-
x[row_id_idx] = 0.0;
535-
if (y[row_id_idx] < 0.0)
536-
y[row_id_idx] = 0.0;
537-
538-
APRowId row_id = APRowId(row_id_idx);
539-
APBlockId blk_id = row_id_to_blk_id_[row_id];
540-
p_placement.block_x_locs[blk_id] = x[row_id_idx];
541-
p_placement.block_y_locs[blk_id] = y[row_id_idx];
542-
}
530+
store_solution_into_placement(x, y, p_placement);
531+
532+
// If the current HPWL is larger than the previous HPWL (i.e. the HPWL
533+
// got worst since last B2B iter) or the gap between the two solutions
534+
// is small. Increment a counter.
535+
// TODO: Since, in theory, the HPWL could get worst due to numerical
536+
// reasons, should we save the best result? May not be worth it...
537+
curr_hpwl = p_placement.get_hpwl(netlist_);
538+
double target_gap = b2b_convergence_gap_fac_ * curr_hpwl;
539+
if (curr_hpwl > prev_hpwl || std::abs(curr_hpwl - prev_hpwl) < target_gap)
540+
num_convergence++;
541+
542+
// If the HPWL got close enough times, stop. This is to allow the HPWL
543+
// to "bounce", which can happen as it converges.
544+
// This trades-off quality for run time.
545+
if (num_convergence >= target_num_b2b_convergences_)
546+
break;
547+
prev_hpwl = curr_hpwl;
543548

544549
// Update the guesses with the most recent answer
545550
x_guess = x;
@@ -723,8 +728,7 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
723728

724729
// This function adds anchors for legalized solution. Anchors are treated as fixed node,
725730
// each connecting to a movable node. Number of nodes in a anchor net is always 2.
726-
void B2BSolver::update_linear_system_with_anchors(PartialPlacement& p_placement,
727-
unsigned iteration) {
731+
void B2BSolver::update_linear_system_with_anchors(unsigned iteration) {
728732
VTR_ASSERT_SAFE_MSG(iteration != 0,
729733
"no fixed solution to anchor to in the first iteration");
730734
// Get the anchor weight based on the iteration number. We want the anchor
@@ -733,23 +737,39 @@ void B2BSolver::update_linear_system_with_anchors(PartialPlacement& p_placement,
733737
double coeff_pseudo_anchor = anchor_weight_mult_ * std::exp((double)iteration / anchor_weight_exp_fac_);
734738

735739
// Add an anchor for each moveable block to its solved position.
736-
// Note: We treat anchors as being a 2-pin net between a moveable block
737-
// and a fixed block where both are the bounds of the net.
738740
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
739741
APRowId row_id = APRowId(row_id_idx);
740742
APBlockId blk_id = row_id_to_blk_id_[row_id];
741-
double dx = std::abs(p_placement.block_x_locs[blk_id] - block_x_locs_legalized[blk_id]);
742-
double dy = std::abs(p_placement.block_y_locs[blk_id] - block_y_locs_legalized[blk_id]);
743-
// Anchor node are always 2 pins.
744-
double pseudo_w_x = coeff_pseudo_anchor * 2.0 / std::max(dx, distance_epsilon_);
745-
double pseudo_w_y = coeff_pseudo_anchor * 2.0 / std::max(dy, distance_epsilon_);
743+
double pseudo_w_x = coeff_pseudo_anchor * 2.0;
744+
double pseudo_w_y = coeff_pseudo_anchor * 2.0;
746745
A_sparse_x.coeffRef(row_id_idx, row_id_idx) += pseudo_w_x;
747746
A_sparse_y.coeffRef(row_id_idx, row_id_idx) += pseudo_w_y;
748747
b_x(row_id_idx) += pseudo_w_x * block_x_locs_legalized[blk_id];
749748
b_y(row_id_idx) += pseudo_w_y * block_y_locs_legalized[blk_id];
750749
}
751750
}
752751

752+
void B2BSolver::store_solution_into_placement(Eigen::VectorXd& x_soln,
753+
Eigen::VectorXd& y_soln,
754+
PartialPlacement& p_placement) {
755+
for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) {
756+
// Since we are capping the number of iterations, the solver may not
757+
// have enough time to converge on a solution that is on the device.
758+
// We just clamp the solution to zero for now.
759+
// TODO: Should handle this better. If the solution is very negative
760+
// it may indicate a bug.
761+
if (x_soln[row_id_idx] < 0.0)
762+
x_soln[row_id_idx] = 0.0;
763+
if (y_soln[row_id_idx] < 0.0)
764+
y_soln[row_id_idx] = 0.0;
765+
766+
APRowId row_id = APRowId(row_id_idx);
767+
APBlockId blk_id = row_id_to_blk_id_[row_id];
768+
p_placement.block_x_locs[blk_id] = x_soln[row_id_idx];
769+
p_placement.block_y_locs[blk_id] = y_soln[row_id_idx];
770+
}
771+
}
772+
753773
void B2BSolver::print_statistics() {
754774
VTR_LOG("B2B Solver Statistics:\n");
755775
VTR_LOG("\tTotal number of CG iterations: %u\n", total_num_cg_iters_);

vpr/src/analytical_place/analytical_solver.h

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -359,11 +359,26 @@ class B2BSolver : public AnalyticalSolver {
359359
/// than some epsilon.
360360
/// Decreasing this number may lead to more instability, but can yield
361361
/// a higher quality solution.
362-
static constexpr double distance_epsilon_ = 0.5;
362+
static constexpr double distance_epsilon_ = 0.01;
363+
364+
/// @brief The gap between the HPWL of the current solved solution in the
365+
/// B2B loop and the previous solved solution that is considered to
366+
/// be close-enough to be converged (as a fraction of the current
367+
/// solved solution HPWL).
368+
/// Decreasing this number toward zero would cause the B2B solver to run
369+
/// more iterations to try and reduce the HPWL further.
370+
static constexpr double b2b_convergence_gap_fac_ = 0.001;
371+
372+
/// @brief The number of times the B2B loop should "converge" before stopping
373+
/// the loop. Due to numerical inaccuracies, it is possible for the
374+
/// HPWL to bounce up and down as it converges. Increasing this number
375+
/// will allow more bounces which may get better quality; however
376+
/// more iterations will need to be run.
377+
static constexpr unsigned target_num_b2b_convergences_ = 2;
363378

364379
/// @brief Max number of bound update / solve iterations. Increasing this
365380
/// number will yield better quality at the expense of runtime.
366-
static constexpr unsigned max_num_bound_updates_ = 6;
381+
static constexpr unsigned max_num_bound_updates_ = 24;
367382

368383
/// @brief Max number of iterations the Conjugate Gradient solver can perform.
369384
/// Due to the weights getting very large in the early iterations of
@@ -376,7 +391,7 @@ class B2BSolver : public AnalyticalSolver {
376391
/// to prevent this behaviour and get good runtime.
377392
// TODO: Need to investigate this more to find a good number for this.
378393
// TODO: Should this be a proportion of the design size?
379-
static constexpr unsigned max_cg_iterations_ = 200;
394+
static constexpr unsigned max_cg_iterations_ = 150;
380395

381396
// The following constants are used to configure the anchor weighting.
382397
// The weights of anchors grow exponentially each iteration by the following
@@ -509,8 +524,18 @@ class B2BSolver : public AnalyticalSolver {
509524
* @brief Updates the linear system with anchor-blocks from the legalized
510525
* solution.
511526
*/
512-
void update_linear_system_with_anchors(PartialPlacement& p_placement,
513-
unsigned iteration);
527+
void update_linear_system_with_anchors(unsigned iteration);
528+
529+
/**
530+
* @brief Store the x and y solutions in Eigen's vectors into the partial
531+
* placement object.
532+
*
533+
* Note: The x_soln and y_soln may be modified if it is found that the
534+
* solution is imposible (i.e. has negative positions).
535+
*/
536+
void store_solution_into_placement(Eigen::VectorXd& x_soln,
537+
Eigen::VectorXd& y_soln,
538+
PartialPlacement& p_placement);
514539

515540
// The following are variables used to store the system of equations to be
516541
// solved in the x and y dimensions. The equations are of the form:

vpr/src/analytical_place/full_legalizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ void APPack::legalize(const PartialPlacement& p_placement) {
518518
arch_,
519519
vpr_setup_.RoutingArch,
520520
vpr_setup_.PackerRRGraph,
521+
prepacker_,
521522
flat_placement_info);
522523

523524
// The Packer stores the clusters into a .net file. Load the packing file.

vpr/src/analytical_place/global_placer.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "global_placer.h"
1010
#include <cstdio>
11+
#include <limits>
1112
#include <memory>
1213
#include <vector>
1314
#include "analytical_solver.h"
@@ -207,6 +208,12 @@ PartialPlacement SimPLGlobalPlacer::place() {
207208
float total_time_spent_in_solver = 0.0f;
208209
float total_time_spent_in_legalizer = 0.0f;
209210

211+
// Create a partial placement object to store the best placement found during
212+
// global placement. It is possible for the global placement to hit a minimum
213+
// in the middle of its iterations, this lets us keep that solution.
214+
PartialPlacement best_p_placement(ap_netlist_);
215+
double best_ub_hpwl = std::numeric_limits<double>::max();
216+
210217
// Run the global placer.
211218
for (size_t i = 0; i < max_num_iterations_; i++) {
212219
float iter_start_time = runtime_timer.elapsed_sec();
@@ -235,6 +242,12 @@ PartialPlacement SimPLGlobalPlacer::place() {
235242
iter_end_time - iter_start_time);
236243
}
237244

245+
// If this placement is better than the best we have seen, save it.
246+
if (ub_hpwl < best_ub_hpwl) {
247+
best_ub_hpwl = ub_hpwl;
248+
best_p_placement = p_placement;
249+
}
250+
238251
// Exit condition: If the upper-bound and lower-bound HPWLs are
239252
// sufficiently close together then stop.
240253
double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl;
@@ -254,12 +267,12 @@ PartialPlacement SimPLGlobalPlacer::place() {
254267

255268
// Print some statistics on the final placement.
256269
VTR_LOG("Placement after Global Placement:\n");
257-
print_placement_stats(p_placement,
270+
print_placement_stats(best_p_placement,
258271
ap_netlist_,
259272
*density_manager_);
260273

261274
// Return the placement from the final iteration.
262275
// TODO: investigate saving the best solution found so far. It should be
263276
// cheap to save a copy of the PartialPlacement object.
264-
return p_placement;
277+
return best_p_placement;
265278
}

vpr/src/analytical_place/partial_legalizer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1490,12 +1490,17 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
14901490
// windows. To do this we sort the unplaced blocks by largest mass to
14911491
// smallest mass. Then we place each block in the bin with the highest
14921492
// underfill.
1493+
// FIXME: Above was the intuition; however, after experimentation, found that
1494+
// sorting by smallest mass to largest mass worked better...
1495+
// FIXME: I think large blocks (like carry chains) need to be handled special
1496+
// early on. If they are put into a partition too late, they may have
1497+
// to create overfill! Perhaps the partitions can hold two lists.
14931498
std::sort(unplaced_blocks.begin(),
14941499
unplaced_blocks.end(),
14951500
[&](APBlockId a, APBlockId b) {
14961501
const auto& blk_a_mass = density_manager_->mass_calculator().get_block_mass(a);
14971502
const auto& blk_b_mass = density_manager_->mass_calculator().get_block_mass(b);
1498-
return blk_a_mass.manhattan_norm() > blk_b_mass.manhattan_norm();
1503+
return blk_a_mass.manhattan_norm() < blk_b_mass.manhattan_norm();
14991504
});
15001505
for (APBlockId blk_id : unplaced_blocks) {
15011506
// Project the underfill from each window onto the mass. This gives us

vpr/src/analytical_place/partial_legalizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
367367
/// create large windows; decreasing this number will put more pressure on
368368
/// the window generation code, which can increase window size and runtime.
369369
/// TODO: Should this be distance instead of number of bins?
370-
static constexpr int max_bin_cluster_gap_ = 1;
370+
static constexpr int max_bin_cluster_gap_ = 2;
371371

372372
public:
373373
/**

vpr/src/base/read_options.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
19071907
"Controls which Analytical Solver the Global Placer will use in the AP Flow.\n"
19081908
" * qp-hybrid: olves for a placement that minimizes the quadratic HPWL of the flat placement using a hybrid clique/star net model.\n"
19091909
" * lp-b2b: Solves for a placement that minimizes the linear HPWL of theflat placement using the Bound2Bound net model.")
1910-
.default_value("qp-hybrid")
1910+
.default_value("lp-b2b")
19111911
.show_in(argparse::ShowIn::HELP_ONLY);
19121912

19131913
ap_grp.add_argument<e_ap_partial_legalizer, ParseAPPartialLegalizer>(args.ap_partial_legalizer, "--ap_partial_legalizer")

vpr/src/base/vpr_api.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,9 +614,17 @@ bool vpr_pack(t_vpr_setup& vpr_setup, const t_arch& arch) {
614614
g_vpr_ctx.atom().netlist());
615615
}
616616

617+
// Run the prepacker, packing the atoms into molecules.
618+
// The Prepacker object performs prepacking and stores the pack molecules.
619+
// As long as the molecules are used, this object must persist.
620+
const Prepacker prepacker(g_vpr_ctx.atom().netlist(),
621+
g_vpr_ctx.device().logical_block_types);
622+
617623
return try_pack(&vpr_setup.PackerOpts, &vpr_setup.AnalysisOpts,
618624
arch, vpr_setup.RoutingArch,
619-
vpr_setup.PackerRRGraph, g_vpr_ctx.atom().flat_placement_info());
625+
vpr_setup.PackerRRGraph,
626+
prepacker,
627+
g_vpr_ctx.atom().flat_placement_info());
620628
}
621629

622630
void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {

0 commit comments

Comments
 (0)