Skip to content

[AP][Solver] Supporting Unfixed Blocks #2944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 91 additions & 12 deletions vpr/src/analytical_place/analytical_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <memory>
#include <utility>
#include <vector>
#include "device_grid.h"
#include "flat_placement_types.h"
#include "partial_placement.h"
#include "ap_netlist.h"
#include "vpr_error.h"
Expand All @@ -36,14 +38,16 @@
#endif // EIGEN_INSTALLED

std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type,
const APNetlist& netlist) {
const APNetlist& netlist,
const DeviceGrid& device_grid) {
// Based on the solver type passed in, build the solver.
switch (solver_type) {
case e_analytical_solver::QP_HYBRID:
#ifdef EIGEN_INSTALLED
return std::make_unique<QPHybridSolver>(netlist);
return std::make_unique<QPHybridSolver>(netlist, device_grid);
#else
(void)netlist;
(void)device_grid;
VPR_FATAL_ERROR(VPR_ERROR_AP,
"QP Hybrid Solver requires the Eigen library");
break;
Expand All @@ -64,8 +68,11 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist)
// row ID from [0, num_moveable_blocks) for each moveable block in the
// netlist.
num_moveable_blocks_ = 0;
num_fixed_blocks_ = 0;
size_t current_row_id = 0;
for (APBlockId blk_id : netlist.blocks()) {
if (netlist.block_mobility(blk_id) == APBlockMobility::FIXED)
num_fixed_blocks_++;
if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE)
continue;
APRowId new_row_id = APRowId(current_row_id);
Expand Down Expand Up @@ -155,10 +162,10 @@ void QPHybridSolver::init_linear_system() {
}

// Initialize the linear system with zeros.
size_t num_variables = num_moveable_blocks_ + num_star_nodes;
A_sparse = Eigen::SparseMatrix<double>(num_variables, num_variables);
b_x = Eigen::VectorXd::Zero(num_variables);
b_y = Eigen::VectorXd::Zero(num_variables);
num_variables_ = num_moveable_blocks_ + num_star_nodes;
A_sparse = Eigen::SparseMatrix<double>(num_variables_, num_variables_);
b_x = Eigen::VectorXd::Zero(num_variables_);
b_y = Eigen::VectorXd::Zero(num_variables_);

// Create a list of triplets that will be used to create the sparse
// coefficient matrix. This is the method recommended by Eigen to initialize
Expand Down Expand Up @@ -254,7 +261,54 @@ void QPHybridSolver::update_linear_system_with_anchors(
}
}

void QPHybridSolver::init_guesses(const DeviceGrid& device_grid) {
// If the number of fixed blocks is zero, initialized the guesses to the
// center of the device.
if (num_fixed_blocks_ == 0) {
guess_x = Eigen::VectorXd::Constant(num_variables_, device_grid.width() / 2.0);
guess_y = Eigen::VectorXd::Constant(num_variables_, device_grid.height() / 2.0);
return;
}

// Compute the centroid of all fixed blocks in the netlist.
t_flat_pl_loc centroid({0.0f, 0.0f, 0.0f});
unsigned num_blks_summed = 0;
for (APBlockId blk_id : netlist_.blocks()) {
// We only get the centroid of fixed blocks since these are the only
// blocks with positions that we know.
if (netlist_.block_mobility(blk_id) != APBlockMobility::FIXED)
continue;
// Get the flat location of the fixed block.
APFixedBlockLoc fixed_blk_loc = netlist_.block_loc(blk_id);
VTR_ASSERT_SAFE(fixed_blk_loc.x != APFixedBlockLoc::UNFIXED_DIM);
VTR_ASSERT_SAFE(fixed_blk_loc.y != APFixedBlockLoc::UNFIXED_DIM);
VTR_ASSERT_SAFE(fixed_blk_loc.layer_num != APFixedBlockLoc::UNFIXED_DIM);
t_flat_pl_loc flat_blk_loc;
flat_blk_loc.x = fixed_blk_loc.x;
flat_blk_loc.y = fixed_blk_loc.y;
flat_blk_loc.layer = fixed_blk_loc.layer_num;
// Accumulate into the centroid.
centroid += flat_blk_loc;
num_blks_summed++;
}
// Divide the sum by the number of fixed blocks.
VTR_ASSERT_SAFE(num_blks_summed == num_fixed_blocks_);
centroid /= static_cast<float>(num_blks_summed);

// Set the guesses to the centroid location.
guess_x = Eigen::VectorXd::Constant(num_variables_, centroid.x);
guess_y = Eigen::VectorXd::Constant(num_variables_, centroid.y);
}

void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
// In the first iteration, if the number of fixed blocks is 0, set the
// placement to be equal to the guess. The solver below will just set the
// solution to the zero vector if we do not set it to the guess directly.
if (iteration == 0 && num_fixed_blocks_ == 0) {
store_solution_into_placement(guess_x, guess_y, p_placement);
return;
}

// Create a temporary linear system which will contain the original linear
// system which may be updated to include the anchor points.
Eigen::SparseMatrix<double> A_sparse_diff = Eigen::SparseMatrix<double>(A_sparse);
Expand All @@ -280,14 +334,24 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
cg.compute(A_sparse_diff);
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at compute!");
// Use the solver to solve for x and y using the constant vectors
// TODO: Use solve with guess to make this faster. Use the previous placement
// as a guess.
Eigen::VectorXd x = cg.solve(b_x_diff);
Eigen::VectorXd x = cg.solveWithGuess(b_x_diff, guess_x);
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_x!");
Eigen::VectorXd y = cg.solve(b_y_diff);
Eigen::VectorXd y = cg.solveWithGuess(b_y_diff, guess_y);
VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!");

// Write the results back into the partial placement object.
store_solution_into_placement(x, y, p_placement);

// Update the guess. The guess for the next iteration is the solution in
// this iteration.
guess_x = x;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, did this update affect the results? Given how fast the solver already is, I wouldn’t expect to see any significant improvement in runtime, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It improved the runtime. The QoR does change, but I would not expect it to change by much. I will run Titan to see!

guess_y = y;
}

void QPHybridSolver::store_solution_into_placement(const Eigen::VectorXd& x_soln,
const Eigen::VectorXd& y_soln,
PartialPlacement& p_placement) {

// NOTE: The first [0, num_moveable_blocks_) rows always represent the
// moveable APBlocks. The star nodes always come after and are ignored
// in the solution.
Expand All @@ -296,8 +360,23 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) {
APBlockId blk_id = row_id_to_blk_id_[row_id];
VTR_ASSERT_DEBUG(blk_id.is_valid());
VTR_ASSERT_DEBUG(netlist_.block_mobility(blk_id) == APBlockMobility::MOVEABLE);
p_placement.block_x_locs[blk_id] = x[row_id_idx];
p_placement.block_y_locs[blk_id] = y[row_id_idx];
// Due to the iterative nature of CG, it is possible for the solver to
// overstep 0 and return a negative number by an incredibly small margin.
// Clamp the number to 0 in this case.
// TODO: Should investigate good bounds on this, the bounds below were
// chosen since any difference higher than 1e-9 would concern me.
double x_pos = x_soln[row_id_idx];
if (x_pos < 0.0) {
VTR_ASSERT_SAFE(std::abs(x_pos) < negative_soln_tolerance_);
x_pos = 0.0;
}
double y_pos = y_soln[row_id_idx];
if (y_pos < 0.0) {
VTR_ASSERT_SAFE(std::abs(y_pos) < negative_soln_tolerance_);
y_pos = 0.0;
}
p_placement.block_x_locs[blk_id] = x_pos;
p_placement.block_y_locs[blk_id] = y_pos;
}
}

Expand Down
47 changes: 44 additions & 3 deletions vpr/src/analytical_place/analytical_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#pragma once

#include <memory>
#include "ap_netlist_fwd.h"
#include "ap_netlist.h"
#include "device_grid.h"
#include "vtr_strong_id.h"
#include "vtr_vector.h"

Expand Down Expand Up @@ -98,6 +99,9 @@ class AnalyticalSolver {
/// when allocating matrices.
size_t num_moveable_blocks_ = 0;

/// @brief The number of fixed blocks in the netlist.
size_t num_fixed_blocks_ = 0;

/// @brief A lookup between a moveable APBlock and its linear ID from
/// [0, num_moveable_blocks). Fixed blocks will return an invalid row
/// ID. This is useful when knowing which row in the matrix
Expand All @@ -114,7 +118,8 @@ class AnalyticalSolver {
* @brief A factory method which creates an Analytical Solver of the given type.
*/
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_analytical_solver solver_type,
const APNetlist& netlist);
const APNetlist& netlist,
const DeviceGrid& device_grid);

// The Eigen library is used to solve matrix equations in the following solvers.
// The solver cannot be built if Eigen is not installed.
Expand Down Expand Up @@ -170,6 +175,14 @@ class QPHybridSolver : public AnalyticalSolver {
/// weights to grow slower.
static constexpr double anchor_weight_exp_fac_ = 5.0;

/// @brief Due to the iterative nature of Conjugate Gradient method, the
/// solver may overstep 0 to give a slightly negative solution. This
/// is ok, and we can just clamp the position to 0. However, negative
/// values that are too large may be indicative of an issue in the
/// formulation. This value is how negative we tolerate the positions
/// to be.
static constexpr double negative_soln_tolerance_ = 1e-9;

/**
* @brief Initializes the linear system of Ax = b_x and Ay = b_y based on
* the APNetlist and the fixed APBlock locations.
Expand All @@ -180,6 +193,14 @@ class QPHybridSolver : public AnalyticalSolver {
*/
void init_linear_system();

/**
* @brief Intializes the guesses which will be used in the solver.
*
* The guesses will be used as starting points for the CG solver. The better
* these guesses are, the faster the solver will converge.
*/
void init_guesses(const DeviceGrid& device_grid);

/**
* @brief Helper method to update the linear system with anchors to the
* current partial placement.
Expand Down Expand Up @@ -209,6 +230,14 @@ class QPHybridSolver : public AnalyticalSolver {
PartialPlacement& p_placement,
unsigned iteration);

/**
* @brief Store the x and y solutions in Eigen's vectors into the partial
* placement object.
*/
void store_solution_into_placement(const Eigen::VectorXd& x_soln,
const Eigen::VectorXd& y_soln,
PartialPlacement& p_placement);

// The following variables represent the linear system without any anchor
// points. These are filled in the constructor and never modified.
// When the anchor-points are taken into consideration, the diagonal of the
Expand All @@ -224,19 +253,31 @@ class QPHybridSolver : public AnalyticalSolver {
Eigen::VectorXd b_x;
/// @brief The constant vector in the y dimension for the linear system.
Eigen::VectorXd b_y;
/// @brief The number of variables in the solver. This is the sum of the
/// number of moveable blocks in the netlist and the number of star
/// nodes that exist.
size_t num_variables_ = 0;

/// @brief The current guess for the x positions of the blocks.
Eigen::VectorXd guess_x;
/// @brief The current guess for the y positions of the blocks.
Eigen::VectorXd guess_y;

public:
/**
* @brief Constructor of the QPHybridSolver
*
* Initializes internal data and constructs the initial linear system.
*/
QPHybridSolver(const APNetlist& netlist)
QPHybridSolver(const APNetlist& netlist, const DeviceGrid& device_grid)
: AnalyticalSolver(netlist) {
// Initializing the linear system only depends on the netlist and fixed
// block locations. Both are provided by the netlist, allowing this to
// be initialized in the constructor.
init_linear_system();

// Initialize the guesses for the first iteration.
init_guesses(device_grid);
}

/**
Expand Down
3 changes: 2 additions & 1 deletion vpr/src/analytical_place/global_placer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type,
// Build the solver.
VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the solver...\n");
solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID,
ap_netlist_);
ap_netlist_,
device_grid);

// Build the density manager used by the partial legalizer.
VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the density manager...\n");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
###############################################################################
# Configuration file for running the MCNC benchmarks through the AP flow.
#
# The AP flow requires that each circuit contains fixed blocks and is fixed
# to a specific device size. The device sizes here were chosen to match the
# device sizes of the default VTR flow.
###############################################################################

# Path to directory of circuits to use
circuits_dir=benchmarks/verilog

# Path to directory of architectures to use
archs_dir=arch/timing

# Add architectures to list to sweep
arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml

# Add circuits to list to sweep
circuit_list_add=boundtop.v
circuit_list_add=ch_intrinsics.v
circuit_list_add=or1200.v
circuit_list_add=spree.v
circuit_list_add=stereovision3.v

# Constrain the circuits to their devices
circuit_constraint_list_add=(stereovision3.v, device=vtr_extra_small)
circuit_constraint_list_add=(ch_intrinsics.v, device=vtr_extra_small)
circuit_constraint_list_add=(spree.v, device=vtr_extra_small)
circuit_constraint_list_add=(boundtop.v, device=vtr_extra_small)
circuit_constraint_list_add=(or1200.v, device=vtr_small)

# Constrain the circuits to their channel widths
# 1.3 * minW
circuit_constraint_list_add=(stereovision3.v, route_chan_width=44)
circuit_constraint_list_add=(ch_intrinsics.v, route_chan_width=52)
circuit_constraint_list_add=(spree.v, route_chan_width=78)
circuit_constraint_list_add=(boundtop.v, route_chan_width=50)
circuit_constraint_list_add=(or1200.v, route_chan_width=118)

# Parse info and how to parse
parse_file=vpr_fixed_chan_width.txt

# How to parse QoR info
qor_parse_file=qor_ap_fixed_chan_width.txt

# Pass requirements
pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt

# Pass the script params while writing the vpr constraints.
script_params=-track_memory_usage -crit_path_router_iterations 100 --analytical_place --route

Loading