Skip to content

Commit b9a610e

Browse files
[AP][Timing] Used Flat Placement Info to Compute Setup Criticalities
When timing analysis was turned on for AP, we originally only used the pre-cluster timing analyzer which was very high-level and innacurate. It practically just counted the number of hops between launch and capture registers to approximate criticality. Improved this by using flat placement information provided by AP. During global placement, the criticality of all edges are recomputed using the upper bound solution from the prior iteration of GP. The place delay model from the placement flow was used to get an mostly-accurate delay estimation for distances between tiles. The slacks computed each GP iteration are used to update the net weights between iterations to better optimize CPD and sTNS. This improved estimation of setup slacks is then passed into the full legalizer, which it is then used by the packer to better pack critical atoms together. This change required some changes to the APNetlist. Notably, we need all atom nets to be located somewhere in the AP netlist such that their delays can be calculated properly. Instead of removing nets we do not care about for AP, marked them as ignored.
1 parent d504b0a commit b9a610e

12 files changed

+380
-72
lines changed

vpr/src/analytical_place/analytical_placement_flow.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "analytical_placement_flow.h"
99
#include <memory>
10+
#include "PlacementDelayModelCreator.h"
1011
#include "PreClusterTimingManager.h"
1112
#include "analytical_solver.h"
1213
#include "ap_netlist.h"
@@ -17,8 +18,11 @@
1718
#include "gen_ap_netlist_from_atoms.h"
1819
#include "global_placer.h"
1920
#include "globals.h"
21+
#include "netlist_fwd.h"
2022
#include "partial_legalizer.h"
2123
#include "partial_placement.h"
24+
#include "physical_types.h"
25+
#include "place_delay_model.h"
2226
#include "prepack.h"
2327
#include "user_place_constraints.h"
2428
#include "vpr_context.h"
@@ -43,19 +47,23 @@ static void print_ap_netlist_stats(const APNetlist& netlist) {
4347
// Get the fanout information of nets
4448
size_t highest_fanout = 0;
4549
float average_fanout = 0.f;
50+
unsigned net_count = 0;
4651
for (APNetId net_id : netlist.nets()) {
52+
if (netlist.net_is_global(net_id) || netlist.net_is_ignored(net_id))
53+
continue;
4754
size_t net_fanout = netlist.net_pins(net_id).size();
4855
if (net_fanout > highest_fanout)
4956
highest_fanout = net_fanout;
5057
average_fanout += static_cast<float>(net_fanout);
58+
net_count++;
5159
}
52-
average_fanout /= static_cast<float>(netlist.nets().size());
60+
average_fanout /= static_cast<float>(net_count);
5361
// Print the statistics
5462
VTR_LOG("Analytical Placement Netlist Statistics:\n");
5563
VTR_LOG("\tBlocks: %zu\n", netlist.blocks().size());
5664
VTR_LOG("\t\tMoveable Blocks: %zu\n", num_moveable_blocks);
5765
VTR_LOG("\t\tFixed Blocks: %zu\n", num_fixed_blocks);
58-
VTR_LOG("\tNets: %zu\n", netlist.nets().size());
66+
VTR_LOG("\tNets: %zu\n", net_count);
5967
VTR_LOG("\t\tAverage Fanout: %.2f\n", average_fanout);
6068
VTR_LOG("\t\tHighest Fanout: %zu\n", highest_fanout);
6169
VTR_LOG("\tPins: %zu\n", netlist.pins().size());
@@ -122,7 +130,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
122130
const AtomNetlist& atom_nlist,
123131
const APNetlist& ap_netlist,
124132
const Prepacker& prepacker,
125-
const PreClusterTimingManager& pre_cluster_timing_manager,
133+
PreClusterTimingManager& pre_cluster_timing_manager,
134+
std::shared_ptr<PlaceDelayModel> place_delay_model,
126135
const DeviceContext& device_ctx) {
127136
if (g_vpr_ctx.atom().flat_placement_info().valid) {
128137
VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n");
@@ -143,6 +152,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
143152
device_ctx.logical_block_types,
144153
device_ctx.physical_tile_types,
145154
pre_cluster_timing_manager,
155+
place_delay_model,
146156
ap_opts.ap_timing_tradeoff,
147157
ap_opts.num_threads,
148158
ap_opts.log_verbosity);
@@ -181,13 +191,28 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
181191
vpr_setup.PackerOpts.device_layout,
182192
vpr_setup.AnalysisOpts);
183193

194+
// Pre-compute the place delay model. This will be passed into the global
195+
// placer to create a more accurate timing model.
196+
std::shared_ptr<PlaceDelayModel> place_delay_model;
197+
if (pre_cluster_timing_manager.is_valid()) {
198+
place_delay_model = PlacementDelayModelCreator::create_delay_model(vpr_setup.PlacerOpts,
199+
vpr_setup.RouterOpts,
200+
(const Netlist<>&)atom_nlist,
201+
vpr_setup.RoutingArch,
202+
vpr_setup.Segments,
203+
device_ctx.arch->Chans,
204+
device_ctx.arch->directs,
205+
false /*is_flat*/);
206+
}
207+
184208
// Run the Global Placer.
185209
const t_ap_opts& ap_opts = vpr_setup.APOpts;
186210
PartialPlacement p_placement = run_global_placer(ap_opts,
187211
atom_nlist,
188212
ap_netlist,
189213
prepacker,
190214
pre_cluster_timing_manager,
215+
place_delay_model,
191216
device_ctx);
192217

193218
// Verify that the partial placement is valid before running the full

vpr/src/analytical_place/analytical_solver.cpp

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,11 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
114114
float ap_timing_tradeoff,
115115
int log_verbosity)
116116
: netlist_(netlist)
117+
, atom_netlist_(atom_netlist)
117118
, blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID())
118119
, row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID())
119120
, net_weights_(netlist.nets().size(), 1.0f)
121+
, ap_timing_tradeoff_(ap_timing_tradeoff)
120122
, log_verbosity_(log_verbosity) {
121123
// Get the number of moveable blocks in the netlist and create a unique
122124
// row ID from [0, num_moveable_blocks) for each moveable block in the
@@ -136,19 +138,29 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist,
136138
num_moveable_blocks_++;
137139
}
138140

139-
if (pre_cluster_timing_manager.is_valid()) {
140-
for (APNetId net_id : netlist.nets()) {
141-
// Get the atom net associated with the given AP net. When
142-
// constructing the AP netlist, we happen to set the name of each
143-
// AP net to the same name as the atom net that generated them!
144-
// TODO: Create a proper lookup structure to go from the AP Netlist
145-
// back to the Atom Netlist.
146-
AtomNetId atom_net_id = atom_netlist.find_net(netlist.net_name(net_id));
147-
VTR_ASSERT(atom_net_id.is_valid());
148-
float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist);
149-
150-
net_weights_[net_id] = ap_timing_tradeoff * crit + (1.0f - ap_timing_tradeoff);
151-
}
141+
update_net_weights(pre_cluster_timing_manager);
142+
}
143+
144+
void AnalyticalSolver::update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager) {
145+
// If the pre-cluster timing manager has not been initialized (i.e. timing
146+
// analysis is off), no need to update.
147+
if (!pre_cluster_timing_manager.is_valid())
148+
return;
149+
150+
// For each of the nets, update the net weights.
151+
for (APNetId net_id : netlist_.nets()) {
152+
// Note: To save time, we do not compute the weights of nets that we
153+
// do not care about for AP. This leaves their weights at 1.0 just
154+
// in case they are accidentally used.
155+
if (netlist_.net_is_global(net_id) || netlist_.net_is_ignored(net_id))
156+
continue;
157+
158+
AtomNetId atom_net_id = netlist_.net_atom_net(net_id);
159+
VTR_ASSERT_SAFE(atom_net_id.is_valid());
160+
161+
float crit = pre_cluster_timing_manager.calc_net_setup_criticality(atom_net_id, atom_netlist_);
162+
163+
net_weights_[net_id] = ap_timing_tradeoff_ * crit + (1.0f - ap_timing_tradeoff_);
152164
}
153165
}
154166

@@ -225,7 +237,11 @@ static inline void add_connection_to_system(size_t src_row_id,
225237
void QPHybridSolver::init_linear_system() {
226238
// Count the number of star nodes that the netlist will have.
227239
size_t num_star_nodes = 0;
240+
unsigned num_nets = 0;
228241
for (APNetId net_id : netlist_.nets()) {
242+
if (netlist_.net_is_global(net_id) || netlist_.net_is_ignored(net_id))
243+
continue;
244+
num_nets++;
229245
if (netlist_.net_pins(net_id).size() > star_num_pins_threshold)
230246
num_star_nodes++;
231247
}
@@ -248,13 +264,14 @@ void QPHybridSolver::init_linear_system() {
248264
// TODO: This can be made more space-efficient by getting the average fanout
249265
// of all nets in the APNetlist. Ideally this should be not enough
250266
// space, but be within a constant factor.
251-
size_t num_nets = netlist_.nets().size();
252267
tripletList.reserve(num_nets);
253268

254269
// Create the connections using a hybrid connection model of the star and
255270
// clique connnection models.
256271
size_t star_node_offset = 0;
257272
for (APNetId net_id : netlist_.nets()) {
273+
if (netlist_.net_is_global(net_id) || netlist_.net_is_ignored(net_id))
274+
continue;
258275
size_t num_pins = netlist_.net_pins(net_id).size();
259276
VTR_ASSERT_DEBUG(num_pins > 1);
260277

@@ -772,6 +789,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement) {
772789
triplet_list_y.reserve(num_nets);
773790

774791
for (APNetId net_id : netlist_.nets()) {
792+
if (netlist_.net_is_global(net_id) || netlist_.net_is_ignored(net_id))
793+
continue;
775794
size_t num_pins = netlist_.net_pins(net_id).size();
776795
VTR_ASSERT_SAFE_MSG(num_pins > 1, "net must have at least 2 pins");
777796

vpr/src/analytical_place/analytical_solver.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,24 @@ class AnalyticalSolver {
9595
*/
9696
virtual void print_statistics() = 0;
9797

98+
/**
99+
* @brief Update the net weights according to the criticality of the nets.
100+
*
101+
* @param pre_cluster_timing_manager
102+
* The timing manager which manages the criticalities of the nets.
103+
*/
104+
void update_net_weights(const PreClusterTimingManager& pre_cluster_timing_manager);
105+
98106
protected:
99107
/// @brief The APNetlist the solver is optimizing over. It is implied that
100108
/// the netlist is not being modified during global placement.
101109
const APNetlist& netlist_;
102110

111+
/// @brief The Atom netlist the solver is optimizing over. It is implied
112+
/// that the atom netlist is not being modified during global
113+
/// placement.
114+
const AtomNetlist& atom_netlist_;
115+
103116
/// @brief The number of moveable blocks in the netlist. This is helpful
104117
/// when allocating matrices.
105118
size_t num_moveable_blocks_ = 0;
@@ -124,6 +137,10 @@ class AnalyticalSolver {
124137
/// between 0 and 1.
125138
vtr::vector<APNetId, float> net_weights_;
126139

140+
/// @brief The AP timing tradeoff term used during global placement. Decides
141+
/// how much the solver cares about timing vs wirelength.
142+
float ap_timing_tradeoff_;
143+
127144
/// @brief The verbosity of log messages in the Analytical Solver.
128145
int log_verbosity_;
129146
};

vpr/src/analytical_place/ap_netlist.cpp

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "ap_netlist.h"
99
#include <string>
10+
#include "atom_netlist_fwd.h"
1011
#include "netlist_fwd.h"
1112
#include "netlist_utils.h"
1213
#include "prepack.h"
@@ -34,6 +35,24 @@ const APFixedBlockLoc& APNetlist::block_loc(const APBlockId id) const {
3435
return block_locs_[id];
3536
}
3637

38+
/*
39+
* Pins
40+
*/
41+
AtomPinId APNetlist::pin_atom_pin(const APPinId id) const {
42+
VTR_ASSERT_SAFE(valid_pin_id(id));
43+
44+
return pin_atom_pin_[id];
45+
}
46+
47+
/*
48+
* Nets
49+
*/
50+
AtomNetId APNetlist::net_atom_net(const APNetId id) const {
51+
VTR_ASSERT_SAFE(valid_net_id(id));
52+
53+
return net_atom_net_[id];
54+
}
55+
3756
/*
3857
* Mutators
3958
*/
@@ -93,9 +112,12 @@ APPortId APNetlist::create_port(const APBlockId blk_id, const std::string& name,
93112
return port_id;
94113
}
95114

96-
APPinId APNetlist::create_pin(const APPortId port_id, BitIndex port_bit, const APNetId net_id, const PinType pin_type_, bool is_const) {
115+
APPinId APNetlist::create_pin(const APPortId port_id, BitIndex port_bit, const APNetId net_id, const PinType pin_type_, const AtomPinId atom_pin_id, bool is_const) {
97116
APPinId pin_id = Netlist::create_pin(port_id, port_bit, net_id, pin_type_, is_const);
98117

118+
// Initialize the pin data.
119+
pin_atom_pin_.insert(pin_id, atom_pin_id);
120+
99121
// Check post-conditions: size
100122
VTR_ASSERT(validate_pin_sizes());
101123

@@ -107,9 +129,12 @@ APPinId APNetlist::create_pin(const APPortId port_id, BitIndex port_bit, const A
107129
return pin_id;
108130
}
109131

110-
APNetId APNetlist::create_net(const std::string& name) {
132+
APNetId APNetlist::create_net(const std::string& name, const AtomNetId atom_net_id) {
111133
APNetId net_id = Netlist::create_net(name);
112134

135+
// Initialize the net data.
136+
net_atom_net_.insert(net_id, atom_net_id);
137+
113138
// Check post-conditions: size
114139
VTR_ASSERT(validate_net_sizes());
115140

@@ -132,12 +157,12 @@ void APNetlist::clean_ports_impl(const vtr::vector_map<APPortId, APPortId>& /*po
132157
// Unused
133158
}
134159

135-
void APNetlist::clean_pins_impl(const vtr::vector_map<APPinId, APPinId>& /*pin_id_map*/) {
136-
// Unused
160+
void APNetlist::clean_pins_impl(const vtr::vector_map<APPinId, APPinId>& pin_id_map) {
161+
pin_atom_pin_ = clean_and_reorder_values(pin_atom_pin_, pin_id_map);
137162
}
138163

139-
void APNetlist::clean_nets_impl(const vtr::vector_map<APNetId, APNetId>& /*net_id_map*/) {
140-
// Unused
164+
void APNetlist::clean_nets_impl(const vtr::vector_map<APNetId, APNetId>& net_id_map) {
165+
net_atom_net_ = clean_and_reorder_values(net_atom_net_, net_id_map);
141166
}
142167

143168
void APNetlist::rebuild_block_refs_impl(const vtr::vector_map<APPinId, APPinId>& /*pin_id_map*/,
@@ -162,6 +187,12 @@ void APNetlist::shrink_to_fit_impl() {
162187
block_molecules_.shrink_to_fit();
163188
block_mobilities_.shrink_to_fit();
164189
block_locs_.shrink_to_fit();
190+
191+
// Pin data
192+
pin_atom_pin_.shrink_to_fit();
193+
194+
// Net data
195+
net_atom_net_.shrink_to_fit();
165196
}
166197

167198
void APNetlist::remove_block_impl(const APBlockId /*blk_id*/) {
@@ -198,12 +229,14 @@ bool APNetlist::validate_port_sizes_impl(size_t /*num_ports*/) const {
198229
return true;
199230
}
200231

201-
bool APNetlist::validate_pin_sizes_impl(size_t /*num_pins*/) const {
202-
// No AP-specific pin data to check
232+
bool APNetlist::validate_pin_sizes_impl(size_t num_pins) const {
233+
if (pin_atom_pin_.size() != num_pins)
234+
return false;
203235
return true;
204236
}
205237

206-
bool APNetlist::validate_net_sizes_impl(size_t /*num_nets*/) const {
207-
// No AP-specific net data to check
238+
bool APNetlist::validate_net_sizes_impl(size_t num_nets) const {
239+
if (net_atom_net_.size() != num_nets)
240+
return false;
208241
return true;
209242
}

vpr/src/analytical_place/ap_netlist.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121

2222
#include <string>
23+
#include "atom_netlist_fwd.h"
2324
#include "netlist.h"
2425
#include "ap_netlist_fwd.h"
2526
#include "prepack.h"
@@ -90,6 +91,20 @@ class APNetlist : public Netlist<APBlockId, APPortId, APPinId, APNetId> {
9091
/// This method should not be used if the block is moveable.
9192
const APFixedBlockLoc& block_loc(const APBlockId id) const;
9293

94+
/*
95+
* Pins
96+
*/
97+
98+
/// @brief Returns the atom pin that corresponds with the given AP pin.
99+
AtomPinId pin_atom_pin(const APPinId id) const;
100+
101+
/*
102+
* Nets
103+
*/
104+
105+
/// @brief Returns the atom net that corresponds with the given AP net.
106+
AtomNetId net_atom_net(const APNetId id) const;
107+
93108
public: // Public Mutators
94109
/*
95110
* Note: all create_*() functions will silently return the appropriate ID
@@ -129,17 +144,19 @@ class APNetlist : public Netlist<APBlockId, APPortId, APPinId, APNetId> {
129144
* @param port_bit The bit index of the pin in the port
130145
* @param net_id The net the pin drives/sinks
131146
* @param pin_type The type of the pin (driver/sink)
147+
* @param atom_pin_id The atom pin that this pin is modeling.
132148
* @param is_const Indicates whether the pin holds a constant value (e.g.
133149
* vcc/gnd)
134150
*/
135-
APPinId create_pin(const APPortId port_id, BitIndex port_bit, const APNetId net_id, const PinType pin_type, bool is_const = false);
151+
APPinId create_pin(const APPortId port_id, BitIndex port_bit, const APNetId net_id, const PinType pin_type, const AtomPinId atom_pin_id, bool is_const = false);
136152

137153
/**
138154
* @brief Create an empty, or return an existing net in the netlist
139155
*
140156
* @param name The unique name of the net
157+
* @param atom_net_id The atom net that this net is modeling.
141158
*/
142-
APNetId create_net(const std::string& name);
159+
APNetId create_net(const std::string& name, const AtomNetId atom_net_id);
143160

144161
private: // Private Members
145162
/*
@@ -186,4 +203,8 @@ class APNetlist : public Netlist<APBlockId, APPortId, APPinId, APNetId> {
186203
/// @brief Location of each block (if fixed).
187204
/// NOTE: This vector will likely be quite sparse.
188205
vtr::vector_map<APBlockId, APFixedBlockLoc> block_locs_;
206+
/// @brief Atom pin associated with each AP pin.
207+
vtr::vector_map<APPinId, AtomPinId> pin_atom_pin_;
208+
/// @brief Atom net associated with each AP net.
209+
vtr::vector_map<APNetId, AtomNetId> net_atom_net_;
189210
};

0 commit comments

Comments
 (0)