Skip to content

Commit dc9a32e

Browse files
Rongbo ZhangRonZ13
Rongbo Zhang
authored andcommitted
[packer] Changing the vector of candidate molecules into LazyPopUniquePriorityQueue.
The class LazyPopUniquePriorityQueue is a priority queue that allows for lazy deletion of elements. It is implemented using a vector and 2 sets, one set keeps track of the elements in the queue, and the other set keeps track of the elements that are pending deletion. The queue is sorted by the sort-value(SV) of the elements, and the elements are stored in a vector. The set is used to keep track of the elements that are pending deletion, so that they can be removed from the queue when they are popped. The class definiation can be found in vpr/src/util/lazy_pop_unique_priority_queue.h Currently, the class supports the following functions: LazyPopUniquePriorityQueue::push(): Pushes a key-sort-value (K-SV) pair into the priority queue and adds the key to the tracking set. LazyPopUniquePriorityQueue::pop(): Returns the K-SV pair with the highest SV whose key is not pending deletion. LazyPopUniquePriorityQueue::remove(): Removes an element from the priority queue immediately. LazyPopUniquePriorityQueue::remove_at_pop_time(): Removes an element from the priority queue when it is popped. LazyPopUniquePriorityQueue::empty(): Returns whether the queue is empty. LazyPopUniquePriorityQueue::clear(): Clears the priority queue vector and the tracking sets. LazyPopUniquePriorityQueue::size(): Returns the number of elements in the queue. LazyPopUniquePriorityQueue::contains(): Returns true if the key is in the queue, false otherwise.
1 parent 735448c commit dc9a32e

File tree

3 files changed

+240
-88
lines changed

3 files changed

+240
-88
lines changed

vpr/src/pack/greedy_candidate_selector.cpp

Lines changed: 45 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,11 @@ ClusterGainStats GreedyCandidateSelector::create_cluster_gain_stats(
216216
// Initialize the cluster gain stats.
217217
ClusterGainStats cluster_gain_stats;
218218
cluster_gain_stats.seed_molecule_id = cluster_seed_mol_id;
219-
cluster_gain_stats.num_feasible_blocks = NOT_VALID;
220219
cluster_gain_stats.has_done_connectivity_and_timing = false;
221-
// TODO: The reason this is being resized and not reserved is due to legacy
222-
// code which should be updated.
223-
cluster_gain_stats.feasible_blocks.resize(packer_opts_.feasible_block_array_size);
224-
for (int i = 0; i < packer_opts_.feasible_block_array_size; i++)
225-
cluster_gain_stats.feasible_blocks[i] = PackMoleculeId::INVALID();
220+
cluster_gain_stats.initial_search_for_feasible_blocks = true;
221+
cluster_gain_stats.num_search_for_feasible_blocks_occured = 0;
222+
cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit = packer_opts_.feasible_block_array_size;
223+
cluster_gain_stats.feasible_blocks.clear();
226224
cluster_gain_stats.tie_break_high_fanout_net = AtomNetId::INVALID();
227225
cluster_gain_stats.explore_transitive_fanout = true;
228226

@@ -285,8 +283,10 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success(
285283
AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id);
286284

287285
/* reset list of feasible blocks */
288-
cluster_gain_stats.num_feasible_blocks = NOT_VALID;
289286
cluster_gain_stats.has_done_connectivity_and_timing = false;
287+
cluster_gain_stats.initial_search_for_feasible_blocks = true;
288+
cluster_gain_stats.num_search_for_feasible_blocks_occured = 0;
289+
cluster_gain_stats.feasible_blocks.clear();
290290
/* TODO: Allow clusters to have more than one attraction group. */
291291
if (atom_grp_id.is_valid())
292292
cluster_gain_stats.attraction_grp_id = atom_grp_id;
@@ -681,8 +681,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
681681
*/
682682

683683
// 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster
684-
if (cluster_gain_stats.num_feasible_blocks == NOT_VALID) {
685-
cluster_gain_stats.num_feasible_blocks = 0;
684+
if (cluster_gain_stats.initial_search_for_feasible_blocks) {
686685
add_cluster_molecule_candidates_by_connectivity_and_timing(cluster_gain_stats,
687686
cluster_id,
688687
cluster_legalizer,
@@ -692,31 +691,31 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
692691

693692
if (packer_opts_.prioritize_transitive_connectivity) {
694693
// 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster
695-
if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) {
694+
if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.explore_transitive_fanout) {
696695
add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats,
697696
cluster_id,
698697
cluster_legalizer,
699698
attraction_groups);
700699
}
701700

702701
// 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster
703-
if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) {
702+
if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.tie_break_high_fanout_net) {
704703
add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats,
705704
cluster_id,
706705
cluster_legalizer,
707706
attraction_groups);
708707
}
709708
} else { //Reverse order
710709
// 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster
711-
if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) {
710+
if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.tie_break_high_fanout_net) {
712711
add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats,
713712
cluster_id,
714713
cluster_legalizer,
715714
attraction_groups);
716715
}
717716

718717
// 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster
719-
if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) {
718+
if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.explore_transitive_fanout) {
720719
add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats,
721720
cluster_id,
722721
cluster_legalizer,
@@ -725,21 +724,35 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
725724
}
726725

727726
// 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group)
728-
if (cluster_gain_stats.num_feasible_blocks == 0) {
727+
if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0) {
729728
add_cluster_molecule_candidates_by_attraction_group(cluster_gain_stats,
730729
cluster_id,
731730
cluster_legalizer,
732731
attraction_groups);
733732
}
734733

735734
/* Grab highest gain molecule */
736-
// If this was a vector, this would just be a pop_back.
737735
PackMoleculeId best_molecule = PackMoleculeId::INVALID();
738-
if (cluster_gain_stats.num_feasible_blocks > 0) {
739-
cluster_gain_stats.num_feasible_blocks--;
740-
int index = cluster_gain_stats.num_feasible_blocks;
741-
best_molecule = cluster_gain_stats.feasible_blocks[index];
742-
VTR_ASSERT(!cluster_legalizer.is_mol_clustered(best_molecule));
736+
// checking if there are feasible blocks being proposed
737+
// checking if number of suggestion reached the limit
738+
if (cluster_gain_stats.feasible_blocks.size() > 0 && cluster_gain_stats.num_search_for_feasible_blocks_occured < cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit) {
739+
best_molecule = cluster_gain_stats.feasible_blocks.pop().first;
740+
if (best_molecule != PackMoleculeId::INVALID()) {
741+
cluster_gain_stats.num_search_for_feasible_blocks_occured++;
742+
VTR_ASSERT(!cluster_legalizer.is_mol_clustered(best_molecule));
743+
}
744+
}
745+
746+
// If we have no feasible blocks, or we have reached the limit of number of pops,
747+
// then we need to clear the feasible blocks list and reset the number of pops.
748+
// This ensures that we can continue searching for feasible blocks for the remaining
749+
// steps (2.transitive, 3.high fanout, 4.attraction group).
750+
if (cluster_gain_stats.feasible_blocks.size() == 0 ||
751+
cluster_gain_stats.num_search_for_feasible_blocks_occured >= cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit ||
752+
cluster_gain_stats.feasible_blocks.delete_pending_set.size() == cluster_gain_stats.feasible_blocks.content_set.size()
753+
){
754+
cluster_gain_stats.feasible_blocks.clear();
755+
cluster_gain_stats.num_search_for_feasible_blocks_occured = 0;
743756
}
744757

745758
// If we are allowing unrelated clustering and no molecule has been found,
@@ -775,6 +788,9 @@ void GreedyCandidateSelector::add_cluster_molecule_candidates_by_connectivity_an
775788
LegalizationClusterId legalization_cluster_id,
776789
const ClusterLegalizer& cluster_legalizer,
777790
AttractionInfo& attraction_groups) {
791+
792+
VTR_ASSERT(cluster_gain_stats.initial_search_for_feasible_blocks);
793+
cluster_gain_stats.initial_search_for_feasible_blocks = false;
778794
cluster_gain_stats.explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */
779795

780796
for (AtomBlockId blk_id : cluster_gain_stats.marked_blocks) {
@@ -1001,45 +1017,17 @@ static void add_molecule_to_pb_stats_candidates(PackMoleculeId molecule_id,
10011017
}
10021018
}
10031019

1004-
for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) {
1005-
if (cluster_gain_stats.feasible_blocks[i] == molecule_id) {
1006-
return; // already in queue, do nothing
1007-
}
1020+
// if already in queue, do nothing
1021+
if (cluster_gain_stats.feasible_blocks.contains(molecule_id)) {
1022+
return;
10081023
}
10091024

1010-
if (cluster_gain_stats.num_feasible_blocks >= max_queue_size - 1) {
1011-
/* maximum size for array, remove smallest gain element and sort */
1012-
if (get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) > get_molecule_gain(cluster_gain_stats.feasible_blocks[0], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) {
1013-
/* single loop insertion sort */
1014-
int j;
1015-
for (j = 0; j < cluster_gain_stats.num_feasible_blocks - 1; j++) {
1016-
if (get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) <= get_molecule_gain(cluster_gain_stats.feasible_blocks[j + 1], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) {
1017-
cluster_gain_stats.feasible_blocks[j] = molecule_id;
1018-
break;
1019-
} else {
1020-
cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1];
1021-
}
1022-
}
1023-
if (j == cluster_gain_stats.num_feasible_blocks - 1) {
1024-
cluster_gain_stats.feasible_blocks[j] = molecule_id;
1025-
}
1026-
}
1027-
} else {
1028-
/* Expand array and single loop insertion sort */
1029-
int j;
1030-
for (j = cluster_gain_stats.num_feasible_blocks - 1; j >= 0; j--) {
1031-
if (get_molecule_gain(cluster_gain_stats.feasible_blocks[j], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) > get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) {
1032-
cluster_gain_stats.feasible_blocks[j + 1] = cluster_gain_stats.feasible_blocks[j];
1033-
} else {
1034-
cluster_gain_stats.feasible_blocks[j + 1] = molecule_id;
1035-
break;
1036-
}
1037-
}
1038-
if (j < 0) {
1039-
cluster_gain_stats.feasible_blocks[0] = molecule_id;
1040-
}
1041-
cluster_gain_stats.num_feasible_blocks++;
1025+
for (std::pair<PackMoleculeId, float>& feasible_block : cluster_gain_stats.feasible_blocks.heap) {
1026+
VTR_ASSERT_DEBUG(get_molecule_gain(feasible_block.first, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) == feasible_block.second);
10421027
}
1028+
1029+
// Insert the molecule into the queue sorted by gain, and maintain the heap property
1030+
cluster_gain_stats.feasible_blocks.push(molecule_id, get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx));
10431031
}
10441032

10451033
/*
@@ -1050,27 +1038,7 @@ static void add_molecule_to_pb_stats_candidates(PackMoleculeId molecule_id,
10501038
*/
10511039
static void remove_molecule_from_pb_stats_candidates(PackMoleculeId molecule_id,
10521040
ClusterGainStats& cluster_gain_stats) {
1053-
int molecule_index;
1054-
bool found_molecule = false;
1055-
1056-
//find the molecule index
1057-
for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) {
1058-
if (cluster_gain_stats.feasible_blocks[i] == molecule_id) {
1059-
found_molecule = true;
1060-
molecule_index = i;
1061-
}
1062-
}
1063-
1064-
//if it is not in the array, return
1065-
if (found_molecule == false) {
1066-
return;
1067-
}
1068-
1069-
//Otherwise, shift the molecules while removing the specified molecule
1070-
for (int j = molecule_index; j < cluster_gain_stats.num_feasible_blocks - 1; j++) {
1071-
cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1];
1072-
}
1073-
cluster_gain_stats.num_feasible_blocks--;
1041+
cluster_gain_stats.feasible_blocks.remove_at_pop_time(molecule_id);
10741042
}
10751043

10761044
/*

vpr/src/pack/greedy_candidate_selector.h

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "vtr_ndmatrix.h"
2222
#include "vtr_vector.h"
2323
#include "vtr_random.h"
24+
#include "lazy_pop_unique_priority_queue.h"
2425

2526
// Forward declarations
2627
class AtomNetlist;
@@ -33,6 +34,8 @@ struct t_model;
3334
struct t_molecule_stats;
3435
struct t_packer_opts;
3536

37+
38+
3639
/**
3740
* @brief Stats on the gain of a cluster.
3841
*
@@ -96,13 +99,6 @@ struct ClusterGainStats {
9699
/// with the cluster.
97100
AttractGroupId attraction_grp_id;
98101

99-
/// @brief Array of feasible blocks to select from [0..max_array_size-1]
100-
///
101-
/// Sorted in ascending gain order so that the last cluster_ctx.blocks is
102-
/// the most desirable (this makes it easy to pop blocks off the list.
103-
std::vector<PackMoleculeId> feasible_blocks;
104-
int num_feasible_blocks;
105-
106102
/// @brief The flat placement location of this cluster.
107103
///
108104
/// This is some function of the positions of the molecules which have been
@@ -125,6 +121,20 @@ struct ClusterGainStats {
125121
/// set when the stats are created based on the primitive pb type
126122
/// of the seed.
127123
bool is_memory = false;
124+
125+
/// @brief List of feasible block and its gain pairs.
126+
/// The list is maintained in heap structure with the highest gain block
127+
/// at the front.
128+
LazyPopUniquePriorityQueue<PackMoleculeId, float> feasible_blocks;
129+
130+
/// @brief Indicator for the initial search for feasible blocks.
131+
bool initial_search_for_feasible_blocks;
132+
133+
/// @brief Limit for the number of pop.
134+
unsigned num_search_for_feasible_blocks_occurred_limit;
135+
136+
/// @brief Counter for the number of pop.
137+
unsigned num_search_for_feasible_blocks_occured;
128138
};
129139

130140
/**
@@ -441,7 +451,7 @@ class GreedyCandidateSelector {
441451
// Cluster Candidate Selection
442452
// ===================================================================== //
443453

444-
/*
454+
/**
445455
* @brief Add molecules with strong connectedness to the current cluster to
446456
* the list of feasible blocks.
447457
*/
@@ -468,7 +478,7 @@ class GreedyCandidateSelector {
468478
LegalizationClusterId legalization_cluster_id,
469479
const ClusterLegalizer& cluster_legalizer);
470480

471-
/*
481+
/**
472482
* @brief Add molecules based on transitive connections (eg. 2 hops away)
473483
* with current cluster.
474484
*/
@@ -478,7 +488,7 @@ class GreedyCandidateSelector {
478488
const ClusterLegalizer& cluster_legalizer,
479489
AttractionInfo& attraction_groups);
480490

481-
/*
491+
/**
482492
* @brief Add molecules based on weak connectedness (connected by high
483493
* fanout nets) with current cluster.
484494
*/
@@ -488,7 +498,7 @@ class GreedyCandidateSelector {
488498
const ClusterLegalizer& cluster_legalizer,
489499
AttractionInfo& attraction_groups);
490500

491-
/*
501+
/**
492502
* @brief If the current cluster being packed has an attraction group
493503
* associated with it (i.e. there are atoms in it that belong to an
494504
* attraction group), this routine adds molecules from the associated

0 commit comments

Comments
 (0)