diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp index b202035ec59..e0212e1d457 100644 --- a/vpr/src/pack/greedy_candidate_selector.cpp +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -216,13 +216,11 @@ ClusterGainStats GreedyCandidateSelector::create_cluster_gain_stats( // Initialize the cluster gain stats. ClusterGainStats cluster_gain_stats; cluster_gain_stats.seed_molecule_id = cluster_seed_mol_id; - cluster_gain_stats.num_feasible_blocks = NOT_VALID; cluster_gain_stats.has_done_connectivity_and_timing = false; - // TODO: The reason this is being resized and not reserved is due to legacy - // code which should be updated. - cluster_gain_stats.feasible_blocks.resize(packer_opts_.feasible_block_array_size); - for (int i = 0; i < packer_opts_.feasible_block_array_size; i++) - cluster_gain_stats.feasible_blocks[i] = PackMoleculeId::INVALID(); + cluster_gain_stats.initial_search_for_feasible_blocks = true; + cluster_gain_stats.num_search_for_feasible_blocks_occured = 0; + cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit = packer_opts_.feasible_block_array_size; + cluster_gain_stats.feasible_blocks.clear(); cluster_gain_stats.tie_break_high_fanout_net = AtomNetId::INVALID(); cluster_gain_stats.explore_transitive_fanout = true; @@ -285,8 +283,10 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); /* reset list of feasible blocks */ - cluster_gain_stats.num_feasible_blocks = NOT_VALID; cluster_gain_stats.has_done_connectivity_and_timing = false; + cluster_gain_stats.initial_search_for_feasible_blocks = true; + cluster_gain_stats.num_search_for_feasible_blocks_occured = 0; + cluster_gain_stats.feasible_blocks.clear(); /* TODO: Allow clusters to have more than one attraction group. */ if (atom_grp_id.is_valid()) cluster_gain_stats.attraction_grp_id = atom_grp_id; @@ -681,8 +681,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( */ // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - if (cluster_gain_stats.num_feasible_blocks == NOT_VALID) { - cluster_gain_stats.num_feasible_blocks = 0; + if (cluster_gain_stats.initial_search_for_feasible_blocks) { add_cluster_molecule_candidates_by_connectivity_and_timing(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -692,7 +691,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( if (packer_opts_.prioritize_transitive_connectivity) { // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) { + if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.explore_transitive_fanout) { add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -700,7 +699,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( } // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) { + if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.tie_break_high_fanout_net) { add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -708,7 +707,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( } } else { //Reverse order // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) { + if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.tie_break_high_fanout_net) { add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -716,7 +715,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( } // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) { + if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0 && cluster_gain_stats.explore_transitive_fanout) { add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -725,7 +724,7 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( } // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - if (cluster_gain_stats.num_feasible_blocks == 0) { + if (!cluster_gain_stats.initial_search_for_feasible_blocks && cluster_gain_stats.feasible_blocks.size() == 0) { add_cluster_molecule_candidates_by_attraction_group(cluster_gain_stats, cluster_id, cluster_legalizer, @@ -733,13 +732,27 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster( } /* Grab highest gain molecule */ - // If this was a vector, this would just be a pop_back. PackMoleculeId best_molecule = PackMoleculeId::INVALID(); - if (cluster_gain_stats.num_feasible_blocks > 0) { - cluster_gain_stats.num_feasible_blocks--; - int index = cluster_gain_stats.num_feasible_blocks; - best_molecule = cluster_gain_stats.feasible_blocks[index]; - VTR_ASSERT(!cluster_legalizer.is_mol_clustered(best_molecule)); + // checking if there are feasible blocks being proposed + // checking if number of suggestion reached the limit + if (cluster_gain_stats.feasible_blocks.size() > 0 && cluster_gain_stats.num_search_for_feasible_blocks_occured < cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit) { + best_molecule = cluster_gain_stats.feasible_blocks.pop().first; + if (best_molecule != PackMoleculeId::INVALID()) { + cluster_gain_stats.num_search_for_feasible_blocks_occured++; + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(best_molecule)); + } + } + + // If we have no feasible blocks, or we have reached the limit of number of pops, + // then we need to clear the feasible blocks list and reset the number of pops. + // This ensures that we can continue searching for feasible blocks for the remaining + // steps (2.transitive, 3.high fanout, 4.attraction group). + if (cluster_gain_stats.feasible_blocks.size() == 0 || + cluster_gain_stats.num_search_for_feasible_blocks_occured >= cluster_gain_stats.num_search_for_feasible_blocks_occurred_limit || + cluster_gain_stats.feasible_blocks.delete_pending_set.size() == cluster_gain_stats.feasible_blocks.content_set.size() + ){ + cluster_gain_stats.feasible_blocks.clear(); + cluster_gain_stats.num_search_for_feasible_blocks_occured = 0; } // If we are allowing unrelated clustering and no molecule has been found, @@ -775,6 +788,9 @@ void GreedyCandidateSelector::add_cluster_molecule_candidates_by_connectivity_an LegalizationClusterId legalization_cluster_id, const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups) { + + VTR_ASSERT(cluster_gain_stats.initial_search_for_feasible_blocks); + cluster_gain_stats.initial_search_for_feasible_blocks = false; cluster_gain_stats.explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ for (AtomBlockId blk_id : cluster_gain_stats.marked_blocks) { @@ -1001,45 +1017,17 @@ static void add_molecule_to_pb_stats_candidates(PackMoleculeId molecule_id, } } - for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) { - if (cluster_gain_stats.feasible_blocks[i] == molecule_id) { - return; // already in queue, do nothing - } + // if already in queue, do nothing + if (cluster_gain_stats.feasible_blocks.contains(molecule_id)) { + return; } - if (cluster_gain_stats.num_feasible_blocks >= max_queue_size - 1) { - /* maximum size for array, remove smallest gain element and sort */ - if (get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) > get_molecule_gain(cluster_gain_stats.feasible_blocks[0], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) { - /* single loop insertion sort */ - int j; - for (j = 0; j < cluster_gain_stats.num_feasible_blocks - 1; j++) { - if (get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) <= get_molecule_gain(cluster_gain_stats.feasible_blocks[j + 1], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) { - cluster_gain_stats.feasible_blocks[j] = molecule_id; - break; - } else { - cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1]; - } - } - if (j == cluster_gain_stats.num_feasible_blocks - 1) { - cluster_gain_stats.feasible_blocks[j] = molecule_id; - } - } - } else { - /* Expand array and single loop insertion sort */ - int j; - for (j = cluster_gain_stats.num_feasible_blocks - 1; j >= 0; j--) { - if (get_molecule_gain(cluster_gain_stats.feasible_blocks[j], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) > get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)) { - cluster_gain_stats.feasible_blocks[j + 1] = cluster_gain_stats.feasible_blocks[j]; - } else { - cluster_gain_stats.feasible_blocks[j + 1] = molecule_id; - break; - } - } - if (j < 0) { - cluster_gain_stats.feasible_blocks[0] = molecule_id; - } - cluster_gain_stats.num_feasible_blocks++; + for (std::pair& feasible_block : cluster_gain_stats.feasible_blocks.heap) { + VTR_ASSERT_DEBUG(get_molecule_gain(feasible_block.first, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx) == feasible_block.second); } + + // Insert the molecule into the queue sorted by gain, and maintain the heap property + cluster_gain_stats.feasible_blocks.push(molecule_id, get_molecule_gain(molecule_id, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, prepacker, atom_netlist, appack_ctx)); } /* @@ -1050,27 +1038,7 @@ static void add_molecule_to_pb_stats_candidates(PackMoleculeId molecule_id, */ static void remove_molecule_from_pb_stats_candidates(PackMoleculeId molecule_id, ClusterGainStats& cluster_gain_stats) { - int molecule_index; - bool found_molecule = false; - - //find the molecule index - for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) { - if (cluster_gain_stats.feasible_blocks[i] == molecule_id) { - found_molecule = true; - molecule_index = i; - } - } - - //if it is not in the array, return - if (found_molecule == false) { - return; - } - - //Otherwise, shift the molecules while removing the specified molecule - for (int j = molecule_index; j < cluster_gain_stats.num_feasible_blocks - 1; j++) { - cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1]; - } - cluster_gain_stats.num_feasible_blocks--; + cluster_gain_stats.feasible_blocks.remove_at_pop_time(molecule_id); } /* diff --git a/vpr/src/pack/greedy_candidate_selector.h b/vpr/src/pack/greedy_candidate_selector.h index 2b3eb23a1f5..a45f32de0dc 100644 --- a/vpr/src/pack/greedy_candidate_selector.h +++ b/vpr/src/pack/greedy_candidate_selector.h @@ -21,6 +21,7 @@ #include "vtr_ndmatrix.h" #include "vtr_vector.h" #include "vtr_random.h" +#include "lazy_pop_unique_priority_queue.h" // Forward declarations class AtomNetlist; @@ -33,6 +34,8 @@ struct t_model; struct t_molecule_stats; struct t_packer_opts; + + /** * @brief Stats on the gain of a cluster. * @@ -96,13 +99,6 @@ struct ClusterGainStats { /// with the cluster. AttractGroupId attraction_grp_id; - /// @brief Array of feasible blocks to select from [0..max_array_size-1] - /// - /// Sorted in ascending gain order so that the last cluster_ctx.blocks is - /// the most desirable (this makes it easy to pop blocks off the list. - std::vector feasible_blocks; - int num_feasible_blocks; - /// @brief The flat placement location of this cluster. /// /// This is some function of the positions of the molecules which have been @@ -125,6 +121,20 @@ struct ClusterGainStats { /// set when the stats are created based on the primitive pb type /// of the seed. bool is_memory = false; + + /// @brief List of feasible block and its gain pairs. + /// The list is maintained in heap structure with the highest gain block + /// at the front. + LazyPopUniquePriorityQueue feasible_blocks; + + /// @brief Indicator for the initial search for feasible blocks. + bool initial_search_for_feasible_blocks; + + /// @brief Limit for the number of pop. + unsigned num_search_for_feasible_blocks_occurred_limit; + + /// @brief Counter for the number of pop. + unsigned num_search_for_feasible_blocks_occured; }; /** @@ -441,7 +451,7 @@ class GreedyCandidateSelector { // Cluster Candidate Selection // ===================================================================== // - /* + /** * @brief Add molecules with strong connectedness to the current cluster to * the list of feasible blocks. */ @@ -468,7 +478,7 @@ class GreedyCandidateSelector { LegalizationClusterId legalization_cluster_id, const ClusterLegalizer& cluster_legalizer); - /* + /** * @brief Add molecules based on transitive connections (eg. 2 hops away) * with current cluster. */ @@ -478,7 +488,7 @@ class GreedyCandidateSelector { const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups); - /* + /** * @brief Add molecules based on weak connectedness (connected by high * fanout nets) with current cluster. */ @@ -488,7 +498,7 @@ class GreedyCandidateSelector { const ClusterLegalizer& cluster_legalizer, AttractionInfo& attraction_groups); - /* + /** * @brief If the current cluster being packed has an attraction group * associated with it (i.e. there are atoms in it that belong to an * attraction group), this routine adds molecules from the associated diff --git a/vpr/src/util/lazy_pop_unique_priority_queue.h b/vpr/src/util/lazy_pop_unique_priority_queue.h new file mode 100644 index 00000000000..9be98000744 --- /dev/null +++ b/vpr/src/util/lazy_pop_unique_priority_queue.h @@ -0,0 +1,174 @@ +#ifndef VPR_LAZY_POP_UNIQUE_PRIORITY_QUEUE_H +#define VPR_LAZY_POP_UNIQUE_PRIORITY_QUEUE_H + +#include +#include +#include + +/** + * @brief Lazy Pop Unique Priority Queue + * + * This is a priority queue that is used to sort items which are identified by the key + * and sorted by the sort value. + * + * It uses a vector to store the key and sort value pair. + * It uses a set to store the keys that are in the vector for uniqueness checking + * and a set to store the delete pending keys which will be removed at pop time. + */ +template +class LazyPopUniquePriorityQueue { + public: + /** @brief The custom comparsion struct for sorting the items in the priority queue. + * A less than comparison will put the item with the highest sort value to the front of the queue. + * A greater than comparison will put the item with the lowest sort value to the front of the queue. + */ + struct LazyPopUniquePriorityQueueCompare { + bool operator()(const std::pair& a, + const std::pair& b) const { + return a.second < b.second; + } + }; + + /// @brief The vector maintained as heap to store the key and sort value pair. + std::vector> heap; + + /// @brief The set to store the keys that are in the queue. This is used to ensure uniqueness + std::unordered_set content_set; + + /// @brief The set to store the delete pending item from the queue refered by the key. + std::unordered_set delete_pending_set; + + /** + * @brief Push the key and the sort value as a pair into the priority queue. + * + * @param key + * The unique key for the item that will be pushed onto the queue. + * @param value + * The sort value used for sorting the item. + */ + void push(T_key key, T_sort value){ + // Insert the key and sort value pair into the queue if it is not already present + if (content_set.find(key) != content_set.end()) { + // If the key is already in the queue, do nothing + return; + } + heap.emplace_back(key, value); + std::push_heap(heap.begin(), + heap.end(), + LazyPopUniquePriorityQueueCompare()); + content_set.insert(key); + } + + /** + * @brief Pop the top item from the priority queue. + * + * @return The key and sort value pair. + */ + std::pair pop(){ + std::pair top_pair; + while (heap.size() > 0) { + top_pair = heap.front(); + // remove the key from the heap and the tracking set + std::pop_heap(heap.begin(), + heap.end(), + LazyPopUniquePriorityQueueCompare()); + heap.pop_back(); + content_set.erase(top_pair.first); + + // checking if the highest value's key is in the delete pending set + // if it is, remove it from the delete pending set and find the next best gain's key + if (delete_pending_set.find(top_pair.first) != delete_pending_set.end()) { + delete_pending_set.erase(top_pair.first); + top_pair = std::pair(); + } else { + break; + } + } + return top_pair; + } + + /** + * @brief Remove the item with matching key value from the priority queue + * This fill immediately remove the item and re-heapify the queue. + * + * @param key + * The key of the item to be delected from the queue + */ + void remove(T_key key){ + // If the key is in the priority queue, remove it from the heap and reheapify + // Otherwise, do nothing. + if (content_set.find(key) != content_set.end()) { + content_set.erase(key); + delete_pending_set.erase(key); + for (int i = 0; i < heap.size(); i++) { + if (heap[i].first == key) { + heap.erase(heap.begin() + i); + break; + } + } + std::make_heap(heap.begin(), heap.end(), LazyPopUniquePriorityQueueCompare()); + } + } + + + /** + * @brief Remove the item with matching key value from the priority queue at pop time. + * Add the key to the delete pending set for tracking, + * and it will be deleted when it is popped. + * + * This function will not immediately delete the key from the + * priority queue. It will be deleted when it is popped. Thus do not + * expect a size reduction in the priority queue immediately. + * @param key + * The key of the item to be delected from the queue at pop time. + */ + void remove_at_pop_time(T_key key){ + //if the key is in the list, start tracking it in the delete pending list. + // Otherwise, do nothing. + if (content_set.find(key) != content_set.end()) { + delete_pending_set.insert(key); + } + } + + /** + * @brief Check if the priority queue is empty. + * + * @return True if the priority queue is empty, false otherwise. + */ + bool empty(){ + return heap.empty(); + } + + /** + * @brief Clears the priority queue and the tracking sets. + * + * @return None + */ + void clear(){ + heap.clear(); + content_set.clear(); + delete_pending_set.clear(); + } + + /** + * @brief Get the size of the priority queue. + * + * @return The size of the priority queue. + */ + size_t size(){ + return heap.size(); + } + + /** + * @brief Check if the item refered by the key is in the priority queue. + * + * @param key + * The key of the item. + * @return True if the key is in the priority queue, false otherwise. + */ + bool contains(T_key key){ + return content_set.find(key) != content_set.end(); + } +}; + +#endif \ No newline at end of file