Skip to content

Commit 3defccf

Browse files
[Packer] Updated Candidate Selector Class Based on VB Comments
1 parent 0607e79 commit 3defccf

File tree

4 files changed

+79
-63
lines changed

4 files changed

+79
-63
lines changed

vpr/src/pack/greedy_candidate_selector.cpp

Lines changed: 40 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
* total_block_gain
2626
* + molecule_base_gain*some_factor
2727
* - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor
28+
*
29+
* TODO: Confirm that this comment is correct.
2830
*/
2931
static float get_molecule_gain(t_pack_molecule* molecule,
3032
ClusterGainStats& cluster_gain_stats,
@@ -277,11 +279,11 @@ void GreedyCandidateSelector::mark_and_update_partial_gain(
277279
for (AtomPinId pin_id : pins) {
278280
AtomBlockId blk_id = atom_netlist_.pin_block(pin_id);
279281
if (!cluster_legalizer.is_atom_clustered(blk_id)) {
280-
if (cluster_gain_stats.sharinggain.count(blk_id) == 0) {
282+
if (cluster_gain_stats.sharing_gain.count(blk_id) == 0) {
281283
cluster_gain_stats.marked_blocks.push_back(blk_id);
282-
cluster_gain_stats.sharinggain[blk_id] = 1;
284+
cluster_gain_stats.sharing_gain[blk_id] = 1;
283285
} else {
284-
cluster_gain_stats.sharinggain[blk_id]++;
286+
cluster_gain_stats.sharing_gain[blk_id]++;
285287
}
286288
}
287289
}
@@ -333,7 +335,7 @@ void GreedyCandidateSelector::update_connection_gain_values(
333335
const ClusterLegalizer& cluster_legalizer,
334336
e_net_relation_to_clustered_block net_relation_to_clustered_block) {
335337

336-
/*This function is called when the connectiongain values on the net net_id*
338+
/*This function is called when the connection_gain values on the net net_id
337339
*require updating. */
338340

339341
// Atom Context used to lookup the atom pb.
@@ -368,33 +370,33 @@ void GreedyCandidateSelector::update_connection_gain_values(
368370
/* TODO: Gain function accurate only if net has one connection to block,
369371
* TODO: Should we handle case where net has multi-connection to block?
370372
* Gain computation is only off by a bit in this case */
371-
if (cluster_gain_stats.connectiongain.count(blk_id) == 0) {
372-
cluster_gain_stats.connectiongain[blk_id] = 0;
373+
if (cluster_gain_stats.connection_gain.count(blk_id) == 0) {
374+
cluster_gain_stats.connection_gain[blk_id] = 0;
373375
}
374376

375377
if (num_internal_connections > 1) {
376-
cluster_gain_stats.connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1);
378+
cluster_gain_stats.connection_gain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1);
377379
}
378-
cluster_gain_stats.connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1);
380+
cluster_gain_stats.connection_gain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1);
379381
}
380382
}
381383
}
382384

383385
if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::INPUT) {
384-
/*Calculate the connectiongain for the atom block which is driving *
386+
/*Calculate the connection_gain for the atom block which is driving *
385387
*the atom net that is an input to an atom block in the cluster */
386388

387389
AtomPinId driver_pin_id = atom_netlist_.net_driver(net_id);
388390
AtomBlockId blk_id = atom_netlist_.pin_block(driver_pin_id);
389391

390392
if (!cluster_legalizer.is_atom_clustered(blk_id)) {
391-
if (cluster_gain_stats.connectiongain.count(blk_id) == 0) {
392-
cluster_gain_stats.connectiongain[blk_id] = 0;
393+
if (cluster_gain_stats.connection_gain.count(blk_id) == 0) {
394+
cluster_gain_stats.connection_gain[blk_id] = 0;
393395
}
394396
if (num_internal_connections > 1) {
395-
cluster_gain_stats.connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1);
397+
cluster_gain_stats.connection_gain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1);
396398
}
397-
cluster_gain_stats.connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1);
399+
cluster_gain_stats.connection_gain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1);
398400
}
399401
}
400402
}
@@ -405,7 +407,7 @@ void GreedyCandidateSelector::update_timing_gain_values(
405407
const ClusterLegalizer& cluster_legalizer,
406408
e_net_relation_to_clustered_block net_relation_to_clustered_block) {
407409

408-
/*This function is called when the timing_gain values on the atom net*
410+
/*This function is called when the timing_gain values on the atom net
409411
*net_id requires updating. */
410412

411413
/* Check if this atom net lists its driving atom block twice. If so, avoid *
@@ -419,13 +421,13 @@ void GreedyCandidateSelector::update_timing_gain_values(
419421
for (AtomPinId pin_id : pins) {
420422
AtomBlockId blk_id = atom_netlist_.pin_block(pin_id);
421423
if (!cluster_legalizer.is_atom_clustered(blk_id)) {
422-
double timinggain = timing_info_.setup_pin_criticality(pin_id);
424+
double timing_gain = timing_info_.setup_pin_criticality(pin_id);
423425

424-
if (cluster_gain_stats.timinggain.count(blk_id) == 0) {
425-
cluster_gain_stats.timinggain[blk_id] = 0;
426+
if (cluster_gain_stats.timing_gain.count(blk_id) == 0) {
427+
cluster_gain_stats.timing_gain[blk_id] = 0;
426428
}
427-
if (timinggain > cluster_gain_stats.timinggain[blk_id])
428-
cluster_gain_stats.timinggain[blk_id] = timinggain;
429+
if (timing_gain > cluster_gain_stats.timing_gain[blk_id])
430+
cluster_gain_stats.timing_gain[blk_id] = timing_gain;
429431
}
430432
}
431433
}
@@ -439,13 +441,13 @@ void GreedyCandidateSelector::update_timing_gain_values(
439441

440442
if (!cluster_legalizer.is_atom_clustered(new_blk_id)) {
441443
for (AtomPinId pin_id : atom_netlist_.net_sinks(net_id)) {
442-
double timinggain = timing_info_.setup_pin_criticality(pin_id);
444+
double timing_gain = timing_info_.setup_pin_criticality(pin_id);
443445

444-
if (cluster_gain_stats.timinggain.count(new_blk_id) == 0) {
445-
cluster_gain_stats.timinggain[new_blk_id] = 0;
446+
if (cluster_gain_stats.timing_gain.count(new_blk_id) == 0) {
447+
cluster_gain_stats.timing_gain[new_blk_id] = 0;
446448
}
447-
if (timinggain > cluster_gain_stats.timinggain[new_blk_id])
448-
cluster_gain_stats.timinggain[new_blk_id] = timinggain;
449+
if (timing_gain > cluster_gain_stats.timing_gain[new_blk_id])
450+
cluster_gain_stats.timing_gain[new_blk_id] = timing_gain;
449451
}
450452
}
451453
}
@@ -456,13 +458,13 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s
456458
AttractGroupId cluster_att_grp_id = cluster_gain_stats.attraction_grp_id;
457459

458460
for (AtomBlockId blk_id : cluster_gain_stats.marked_blocks) {
459-
//Initialize connectiongain and sharinggain if
461+
//Initialize connection_gain and sharing_gain if
460462
//they have not previously been updated for the block
461-
if (cluster_gain_stats.connectiongain.count(blk_id) == 0) {
462-
cluster_gain_stats.connectiongain[blk_id] = 0;
463+
if (cluster_gain_stats.connection_gain.count(blk_id) == 0) {
464+
cluster_gain_stats.connection_gain[blk_id] = 0;
463465
}
464-
if (cluster_gain_stats.sharinggain.count(blk_id) == 0) {
465-
cluster_gain_stats.sharinggain[blk_id] = 0;
466+
if (cluster_gain_stats.sharing_gain.count(blk_id) == 0) {
467+
cluster_gain_stats.sharing_gain[blk_id] = 0;
466468
}
467469

468470
AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id);
@@ -484,18 +486,18 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s
484486
if (packer_opts_.connection_driven) {
485487
/*try to absorb as many connections as possible*/
486488
cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.beta)
487-
* (float)cluster_gain_stats.sharinggain[blk_id]
488-
+ packer_opts_.beta * (float)cluster_gain_stats.connectiongain[blk_id])
489+
* (float)cluster_gain_stats.sharing_gain[blk_id]
490+
+ packer_opts_.beta * (float)cluster_gain_stats.connection_gain[blk_id])
489491
/ (num_used_pins);
490492
} else {
491-
cluster_gain_stats.gain[blk_id] = ((float)cluster_gain_stats.sharinggain[blk_id])
493+
cluster_gain_stats.gain[blk_id] = ((float)cluster_gain_stats.sharing_gain[blk_id])
492494
/ (num_used_pins);
493495
}
494496

495497
/* Add in timing driven cost into cost function */
496498
if (packer_opts_.timing_driven) {
497499
cluster_gain_stats.gain[blk_id] = packer_opts_.alpha
498-
* cluster_gain_stats.timinggain[blk_id]
500+
* cluster_gain_stats.timing_gain[blk_id]
499501
+ (1.0 - packer_opts_.alpha) * (float)cluster_gain_stats.gain[blk_id];
500502
}
501503
}
@@ -761,7 +763,7 @@ void GreedyCandidateSelector::add_cluster_molecule_candidates_by_attraction_grou
761763
return;
762764
}
763765

764-
if (num_available_atoms < 500) {
766+
if (num_available_atoms < attraction_group_num_atoms_threshold_) {
765767
for (AtomBlockId atom_id : available_atoms) {
766768
//Only consider molecules that are unpacked and of the correct type
767769
t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id);
@@ -780,7 +782,10 @@ void GreedyCandidateSelector::add_cluster_molecule_candidates_by_attraction_grou
780782
int min = 0;
781783
int max = num_available_atoms - 1;
782784

783-
for (int j = 0; j < 500; j++) {
785+
for (int j = 0; j < attraction_group_num_atoms_threshold_; j++) {
786+
// FIXME: This is a non-deterministic random number generator and it is
787+
// overkill to what this needs to be. Should use vtr::irand which
788+
// would be faster.
784789
std::random_device rd;
785790
std::mt19937 gen(rd());
786791
std::uniform_int_distribution<> distr(min, max);

vpr/src/pack/greedy_candidate_selector.h

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,15 @@ struct ClusterGainStats {
3939
/// @brief Attraction (inverse of cost) function.
4040
std::unordered_map<AtomBlockId, float> gain;
4141

42-
/// @brief The timing criticality score of this atom cluster_ctx.blocks.
42+
/// @brief The timing criticality score of this atom.
4343
/// Determined by the most critical atom net between this atom
44-
/// cluster_ctx.blocks and any atom cluster_ctx.blocks in the current
45-
/// pb.
46-
std::unordered_map<AtomBlockId, float> timinggain;
44+
/// and any atom in the current pb.
45+
std::unordered_map<AtomBlockId, float> timing_gain;
4746
/// @brief Weighted sum of connections to attraction function.
48-
std::unordered_map<AtomBlockId, float> connectiongain;
49-
/// @brief How many nets on an atom cluster_ctx.blocks are already in the
50-
/// pb under consideration.
51-
std::unordered_map<AtomBlockId, float> sharinggain;
47+
std::unordered_map<AtomBlockId, float> connection_gain;
48+
/// @brief How many nets on an atom are already in the pb under
49+
/// consideration.
50+
std::unordered_map<AtomBlockId, float> sharing_gain;
5251

5352
/// @brief Stores the number of times atoms have failed to be packed into
5453
/// the cluster.
@@ -57,7 +56,9 @@ struct ClusterGainStats {
5756
/// has failed to be packed into the cluster.
5857
std::unordered_map<AtomBlockId, int> atom_failures;
5958

60-
/// @brief List of nets with the num_pins_of_net_in_pb and gain entries altered.
59+
/// @brief List of nets with the num_pins_of_net_in_pb and gain entries
60+
/// altered (i.e. have some gain-related connection to the current
61+
/// cluster).
6162
std::vector<AtomNetId> marked_nets;
6263
/// @brief List of blocks with the num_pins_of_net_in_pb and gain entries altered.
6364
std::vector<AtomBlockId> marked_blocks;
@@ -69,7 +70,7 @@ struct ClusterGainStats {
6970
/// determine next candidate molecule then explore molecules on
7071
/// transitive fanout.
7172
bool explore_transitive_fanout;
72-
/// @brief Holding trasitive fanout candidates key: root block id of the
73+
/// @brief Holding transitive fanout candidates key: root block id of the
7374
/// molecule, value: pointer to the molecule.
7475
// TODO: This should be an unordered map, unless stability is desired.
7576
std::map<AtomBlockId, t_pack_molecule*> transitive_fanout_candidates;
@@ -87,7 +88,7 @@ struct ClusterGainStats {
8788
/// Sorted in ascending gain order so that the last cluster_ctx.blocks is
8889
/// the most desirable (this makes it easy to pop blocks off the list.
8990
std::vector<t_pack_molecule*> feasible_blocks;
90-
int num_feasible_blocks; /* [0..num_marked_models-1] */
91+
int num_feasible_blocks;
9192
};
9293

9394
/**
@@ -139,10 +140,16 @@ class GreedyCandidateSelector {
139140
static constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10;
140141

141142
/// @brief When investigating transitive fanout connections in packing,
142-
/// consider a maximum of this many molecule s, must be less than
143+
/// consider a maximum of this many molecules, must be less than
143144
/// packer_opts.feasible_block_array_size.
144145
static constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40;
145146

147+
/// @brief When adding cluster molecule candidates by attraction groups,
148+
/// only investigate this many candidates. Some attraction groups can
149+
/// get very large; so this threshold decides when to explore all
150+
/// atoms in the group, or a randomly selected number of them.
151+
static constexpr int attraction_group_num_atoms_threshold_ = 500;
152+
146153
public:
147154
~GreedyCandidateSelector();
148155

@@ -181,9 +188,9 @@ class GreedyCandidateSelector {
181188
* @param net_output_feeds_driving_block_input
182189
* The set of nets whose output feeds the block that drives
183190
* itself. This may cause double-counting in the gain
184-
* calculations and need to be handled special.
191+
* calculations and needs special handling.
185192
* @param timing_info
186-
* Setup timing info for this Atom Netlist. Used to incorperate
193+
* Setup timing info for this Atom Netlist. Used to incorporate
187194
* timing / criticality into the gain calculation.
188195
* @param log_verbosity
189196
* The verbosity of log messages in the candidate selector.
@@ -265,8 +272,6 @@ class GreedyCandidateSelector {
265272
* @param failed_mol
266273
* The molecule that failed to pack into the cluster.
267274
*/
268-
// Update the cluster gain stats after a candidate was unsuccessfully
269-
// clustered into the current cluster.
270275
void update_cluster_gain_stats_candidate_failed(
271276
ClusterGainStats& cluster_gain_stats,
272277
t_pack_molecule* failed_mol);
@@ -304,7 +309,9 @@ class GreedyCandidateSelector {
304309
* This should be called after all molecules have been packed into a cluster.
305310
*
306311
* This updates internal lookup tables in the candidate selector. For
307-
* example, inter-clb nets.
312+
* example, what inter-clb nets exist on a cluster are stored by this
313+
* routine to make later transistive gain function calculations more
314+
* efficient.
308315
*
309316
* @param cluster_gain_stats
310317
* The cluster gain stats for the cluster to finalize.
@@ -351,7 +358,7 @@ class GreedyCandidateSelector {
351358
e_net_relation_to_clustered_block net_relation_to_clustered_block);
352359

353360
/**
354-
* @brief Updates the connectiongain in the cluster_gain_stats.
361+
* @brief Updates the connection_gain in the cluster_gain_stats.
355362
*/
356363
void update_connection_gain_values(ClusterGainStats& cluster_gain_stats,
357364
AtomNetId net_id,
@@ -360,7 +367,7 @@ class GreedyCandidateSelector {
360367
e_net_relation_to_clustered_block net_relation_to_clustered_block);
361368

362369
/**
363-
* Updates the timinggain in the cluster_gain_stats.
370+
* Updates the timing_gain in the cluster_gain_stats.
364371
*/
365372
void update_timing_gain_values(ClusterGainStats& cluster_gain_stats,
366373
AtomNetId net_id,
@@ -369,8 +376,8 @@ class GreedyCandidateSelector {
369376

370377
/**
371378
* @brief Updates the total gain array to reflect the desired tradeoff
372-
* between input sharing (sharinggain) and path_length minimization
373-
* (timinggain) input each time a new molecule is added to the
379+
* between input sharing (sharing_gain) and path_length minimization
380+
* (timing_gain) input each time a new molecule is added to the
374381
* cluster.
375382
*/
376383
void update_total_gain(ClusterGainStats& cluster_gain_stats,
@@ -439,8 +446,8 @@ class GreedyCandidateSelector {
439446
*
440447
* Attraction groups can be very large, so we only add some randomly
441448
* selected molecules for efficiency if the number of atoms in the group is
442-
* greater than 500. Therefore, the molecules added to the candidates will
443-
* vary each time you call this function.
449+
* greater than some threshold. Therefore, the molecules added to the
450+
* candidates will vary each time you call this function.
444451
*/
445452
void add_cluster_molecule_candidates_by_attraction_group(
446453
ClusterGainStats& cluster_gain_stats,
@@ -450,7 +457,7 @@ class GreedyCandidateSelector {
450457
AttractionInfo& attraction_groups);
451458

452459
/**
453-
* @brief Finds a molecule to propose which is unrelated by may be good to
460+
* @brief Finds a molecule to propose which is unrelated but may be good to
454461
* cluster.
455462
*/
456463
t_pack_molecule* get_unrelated_candidate_for_cluster(
@@ -473,7 +480,8 @@ class GreedyCandidateSelector {
473480
/// @brief The verbosity of log messages in the candidate selector.
474481
const int log_verbosity_;
475482

476-
/// @brief Pre-computed logical block types for each model in the architecture.
483+
/// @brief Pre-computed vector of logical block types that could implement
484+
/// the given model in the architecture.
477485
const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types_;
478486

479487
/// @brief The high-fanout thresholds per logical block type. Used to ignore
@@ -500,7 +508,9 @@ class GreedyCandidateSelector {
500508
/// transitive candidates.
501509
vtr::vector<LegalizationClusterId, std::vector<AtomNetId>> clb_inter_blk_nets_;
502510

503-
/// @brief Data pre-computed to help select unrelated molecules.
511+
/// @brief Data pre-computed to help select unrelated molecules. This is a
512+
/// list of list of molecules sorted by their gain, where the first
513+
/// dimension is the number of external outputs of the molecule.
504514
std::vector<std::vector<t_pack_molecule *>> unrelated_clustering_data_;
505515

506516
/// @brief A count on the number of unrelated clustering attempts which

vpr/src/pack/prepack.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1729,7 +1729,7 @@ void Prepacker::init(const AtomNetlist& atom_nlist, const std::vector<t_logical_
17291729
}
17301730
}
17311731

1732-
// TODO: Since this is constant per moleucle, it may make sense to precompute
1732+
// TODO: Since this is constant per molecule, it may make sense to precompute
17331733
// this information and store it in the prepacker class. This may be
17341734
// expensive to calculate for large molecules.
17351735
t_molecule_stats Prepacker::calc_molecule_stats(const t_pack_molecule* molecule,

0 commit comments

Comments
 (0)