Skip to content

[AP][Timing] Added Basic Net Weighting #2969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/src/vpr/command_line_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,15 @@ Analytical Placement is generally split into three stages:

**Default:** ``annealer``

.. option:: --ap_timing_tradeoff <float>

Controls the trade-off between wirelength (HPWL) and delay minimization in the AP flow.

A value of 0.0 makes the AP flow focus completely on wirelength minimization,
while a value of 1.0 makes the AP flow focus completely on timing optimization.

**Default:** ``0.5``

.. option:: --ap_verbosity <int>

Controls the verbosity of the AP flow output.
Expand Down
19 changes: 19 additions & 0 deletions vpr/src/analytical_place/analytical_placement_flow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

#include "analytical_placement_flow.h"
#include <memory>
#include "PreClusterTimingManager.h"
#include "analytical_solver.h"
#include "ap_netlist.h"
#include "atom_netlist.h"
#include "cluster_util.h"
#include "detailed_placer.h"
#include "full_legalizer.h"
#include "gen_ap_netlist_from_atoms.h"
Expand Down Expand Up @@ -120,6 +122,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
const AtomNetlist& atom_nlist,
const APNetlist& ap_netlist,
const Prepacker& prepacker,
const PreClusterTimingManager& pre_cluster_timing_manager,
const DeviceContext& device_ctx) {
if (g_vpr_ctx.atom().flat_placement_info().valid) {
VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n");
Expand All @@ -139,6 +142,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
device_ctx.grid,
device_ctx.logical_block_types,
device_ctx.physical_tile_types,
pre_cluster_timing_manager,
ap_opts.ap_timing_tradeoff,
ap_opts.log_verbosity);
return global_placer->place();
}
Expand All @@ -163,12 +168,25 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
constraints);
print_ap_netlist_stats(ap_netlist);

// Pre-compute the pre-clustering timing delays. This object will be passed
// into the global placer and the full legalizer to make them timing driven.
PreClusterTimingManager pre_cluster_timing_manager(vpr_setup.PackerOpts.timing_driven,
atom_nlist,
g_vpr_ctx.atom().lookup(),
prepacker,
vpr_setup.PackerOpts.timing_update_type,
*device_ctx.arch,
vpr_setup.RoutingArch,
vpr_setup.PackerOpts.device_layout,
vpr_setup.AnalysisOpts);

// Run the Global Placer.
const t_ap_opts& ap_opts = vpr_setup.APOpts;
PartialPlacement p_placement = run_global_placer(ap_opts,
atom_nlist,
ap_netlist,
prepacker,
pre_cluster_timing_manager,
device_ctx);

// Verify that the partial placement is valid before running the full
Expand All @@ -185,6 +203,7 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
ap_netlist,
atom_nlist,
prepacker,
pre_cluster_timing_manager,
vpr_setup,
*device_ctx.arch,
device_ctx.grid);
Expand Down
69 changes: 57 additions & 12 deletions vpr/src/analytical_place/analytical_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#include <memory>
#include <utility>
#include <vector>
#include "PreClusterTimingManager.h"
#include "atom_netlist.h"
#include "atom_netlist_fwd.h"
#include "device_grid.h"
#include "flat_placement_types.h"
#include "partial_placement.h"
Expand Down Expand Up @@ -42,23 +45,39 @@
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
const APNetlist& netlist,
const DeviceGrid& device_grid,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity) {
// Based on the solver type passed in, build the solver.
switch (solver_type) {
case e_ap_analytical_solver::QP_Hybrid:
#ifdef EIGEN_INSTALLED
return std::make_unique<QPHybridSolver>(netlist, device_grid, log_verbosity);
return std::make_unique<QPHybridSolver>(netlist,
device_grid,
atom_netlist,
pre_cluster_timing_manager,
ap_timing_tradeoff,
log_verbosity);
#else
(void)netlist;
(void)device_grid;
(void)atom_netlist;
(void)pre_cluster_timing_manager;
(void)ap_timing_tradeoff;
(void)log_verbosity;
VPR_FATAL_ERROR(VPR_ERROR_AP,
"QP Hybrid Solver requires the Eigen library");
break;
#endif // EIGEN_INSTALLED
case e_ap_analytical_solver::LP_B2B:
#ifdef EIGEN_INSTALLED
return std::make_unique<B2BSolver>(netlist, device_grid, log_verbosity);
return std::make_unique<B2BSolver>(netlist,
device_grid,
atom_netlist,
pre_cluster_timing_manager,
ap_timing_tradeoff,
log_verbosity);
#else
VPR_FATAL_ERROR(VPR_ERROR_AP,
"LP B2B Solver requires the Eigen library");
Expand All @@ -72,10 +91,15 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
return nullptr;
}

AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity)
: netlist_(netlist)
, blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
, row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
, net_weights_(netlist.nets().size(), 1.0f)
, log_verbosity_(log_verbosity) {
// Get the number of moveable blocks in the netlist and create a unique
// row ID from [0, num_moveable_blocks) for each moveable block in the
Expand All @@ -94,6 +118,21 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist, int log_verbosity)
current_row_id++;
num_moveable_blocks_++;
}

if (pre_cluster_timing_manager.is_valid()) {
for (APNetId net_id : netlist.nets()) {
// Get the atom net associated with the given AP net. When
// constructing the AP netlist, we happen to set the name of each
// AP net to the same name as the atom net that generated them!
// TODO: Create a proper lookup structure to go from the AP Netlist
// back to the Atom Netlist.
AtomNetId atom_net_id = atom_netlist.find_net(netlist.net_name(net_id));
VTR_ASSERT(atom_net_id.is_valid());
float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist);

net_weights_[net_id] = ap_timing_tradeoff * crit + (1.0f - ap_timing_tradeoff);
}
}
}

#ifdef EIGEN_INSTALLED
Expand Down Expand Up @@ -201,12 +240,15 @@ void QPHybridSolver::init_linear_system() {
for (APNetId net_id : netlist_.nets()) {
size_t num_pins = netlist_.net_pins(net_id).size();
VTR_ASSERT_DEBUG(num_pins > 1);

double net_weight = net_weights_[net_id];

if (num_pins > star_num_pins_threshold) {
// Create a star node and connect each block in the net to the star
// node.
// Using the weight from FastPlace
// TODO: Investigate other weight terms.
double w = static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
double w = net_weight * static_cast<double>(num_pins) / static_cast<double>(num_pins - 1);
size_t star_node_id = num_moveable_blocks_ + star_node_offset;
for (APPinId pin_id : netlist_.net_pins(net_id)) {
APBlockId blk_id = netlist_.pin_block(pin_id);
Expand All @@ -220,7 +262,7 @@ void QPHybridSolver::init_linear_system() {
// exactly once to every other block in the net.
// Using the weight from FastPlace
// TODO: Investigate other weight terms.
double w = 1.0 / static_cast<double>(num_pins - 1);
double w = net_weight * 1.0 / static_cast<double>(num_pins - 1);
for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) {
APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx);
APBlockId first_blk_id = netlist_.pin_block(first_pin_id);
Expand Down Expand Up @@ -638,6 +680,7 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id,
void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
APBlockId second_blk_id,
size_t num_pins,
double net_w,
const vtr::vector<APBlockId, double>& blk_locs,
std::vector<Eigen::Triplet<double>>& triplet_list,
Eigen::VectorXd& b) {
Expand All @@ -660,7 +703,7 @@ void B2BSolver::add_connection_to_system(APBlockId first_blk_id,
// The denominator of weight is zero, which causes infinity term in the matrix. Another way of
// interpreting epsilon is the minimum distance two nodes are considered to be in placement.
double dist = std::max(std::abs(blk_locs[first_blk_id] - blk_locs[second_blk_id]), distance_epsilon_);
double w = (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);
double w = net_w * (2.0 / static_cast<double>(num_pins - 1)) * (1.0 / dist);

// Update the connectivity matrix and the constant vector.
// This is similar to how connections are added for the quadratic formulation.
Expand Down Expand Up @@ -696,6 +739,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
size_t num_pins = netlist_.net_pins(net_id).size();
VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");

double net_w = net_weights_[net_id];

// Find the bounding blocks
APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_);

Expand All @@ -706,19 +751,19 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
for (APPinId pin_id : netlist_.net_pins(net_id)) {
APBlockId blk_id = netlist_.pin_block(pin_id);
if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) {
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
}
if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) {
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
}
}

// Connect the bounds to each other. Its just easier to put these here
// instead of in the for loop above.
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, p_placement.block_x_locs, triplet_list_x, b_x);
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, p_placement.block_y_locs, triplet_list_y, b_y);
add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, net_w, p_placement.block_x_locs, triplet_list_x, b_x);
add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, net_w, p_placement.block_y_locs, triplet_list_y, b_y);
}

// Build the sparse connectivity matrices from the triplets.
Expand Down
28 changes: 25 additions & 3 deletions vpr/src/analytical_place/analytical_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
// Forward declarations
class PartialPlacement;
class APNetlist;
class AtomNetlist;
class PreClusterTimingManager;

/**
* @brief A strong ID for the rows in a matrix used during solving.
Expand Down Expand Up @@ -60,7 +62,11 @@ class AnalyticalSolver {
* Initializes the internal data members of the base class which are useful
* for all solvers.
*/
AnalyticalSolver(const APNetlist& netlist, int log_verbosity);
AnalyticalSolver(const APNetlist& netlist,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity);

/**
* @brief Run an iteration of the solver using the given partial placement
Expand Down Expand Up @@ -113,6 +119,12 @@ class AnalyticalSolver {
/// solver.
vtr::vector<APRowId, APBlockId> row_id_to_blk_id_;

/// @brief The base weight of each net in the AP netlist. This weight can
/// be used to make the solver more interested in some nets over
/// others. These weights can be any positive value, but are often
/// between 0 and 1.
vtr::vector<APNetId, float> net_weights_;

/// @brief The verbosity of log messages in the Analytical Solver.
int log_verbosity_;
};
Expand All @@ -123,6 +135,9 @@ class AnalyticalSolver {
std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver solver_type,
const APNetlist& netlist,
const DeviceGrid& device_grid,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity);

// The Eigen library is used to solve matrix equations in the following solvers.
Expand Down Expand Up @@ -278,8 +293,11 @@ class QPHybridSolver : public AnalyticalSolver {
*/
QPHybridSolver(const APNetlist& netlist,
const DeviceGrid& device_grid,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity)
: AnalyticalSolver(netlist, log_verbosity) {
: AnalyticalSolver(netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity) {
// Initializing the linear system only depends on the netlist and fixed
// block locations. Both are provided by the netlist, allowing this to
// be initialized in the constructor.
Expand Down Expand Up @@ -411,8 +429,11 @@ class B2BSolver : public AnalyticalSolver {
public:
B2BSolver(const APNetlist& ap_netlist,
const DeviceGrid& device_grid,
const AtomNetlist& atom_netlist,
const PreClusterTimingManager& pre_cluster_timing_manager,
float ap_timing_tradeoff,
int log_verbosity)
: AnalyticalSolver(ap_netlist, log_verbosity)
: AnalyticalSolver(ap_netlist, atom_netlist, pre_cluster_timing_manager, ap_timing_tradeoff, log_verbosity)
, device_grid_width_(device_grid.width())
, device_grid_height_(device_grid.height()) {}

Expand Down Expand Up @@ -503,6 +524,7 @@ class B2BSolver : public AnalyticalSolver {
void add_connection_to_system(APBlockId first_blk_id,
APBlockId second_blk_id,
size_t num_pins,
double net_w,
const vtr::vector<APBlockId, double>& blk_locs,
std::vector<Eigen::Triplet<double>>& triplet_list,
Eigen::VectorXd& b);
Expand Down
Loading