diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp index f85471e1636..399b0e271cc 100644 --- a/vpr/src/pack/cluster_legalizer.cpp +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -19,7 +19,6 @@ #include "atom_netlist.h" #include "cluster_placement.h" #include "cluster_router.h" -#include "cluster_util.h" #include "globals.h" #include "logic_types.h" #include "netlist_utils.h" @@ -80,7 +79,7 @@ static size_t calc_max_cluster_size(const std::vector& log * * Used to store information used during clustering. */ -static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { +static void alloc_and_load_pb_stats(t_pb* pb) { /* Call this routine when starting to fill up a new cluster. It resets * * the gain vector, etc. */ @@ -90,29 +89,8 @@ static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_siz pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->num_feasible_blocks = NOT_VALID; - pb->pb_stats->feasible_blocks = new t_pack_molecule*[feasible_block_array_size]; - - for (int i = 0; i < feasible_block_array_size; i++) - pb->pb_stats->feasible_blocks[i] = nullptr; - - pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); - - pb->pb_stats->pulled_from_atom_groups = 0; - pb->pb_stats->num_att_group_atoms_used = 0; - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->transitive_fanout_candidates.clear(); - - pb->pb_stats->num_pins_of_net_in_pb.clear(); pb->pb_stats->num_child_blocks_in_pb = 0; - - pb->pb_stats->explore_transitive_fanout = true; } /* @@ -176,22 +154,6 @@ static void free_pb_stats_recursive(t_pb* pb) { } } -/* Record the failure of the molecule in this cluster in the current pb stats. - * If a molecule fails repeatedly, it's gain will be penalized if packing with - * attraction groups on. */ -static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { - //Only have to record the failure for the first atom in the molecule. - //The convention when checking if a molecule has failed to pack in the cluster - //is to check whether the first atoms has been recorded as having failed - - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); - } else { - got->second++; - } -} - /** * @brief Checks whether an atom block can be added to a clustered block * without violating floorplanning constraints. It also updates the @@ -572,7 +534,7 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, *parent = pb; /* this pb is parent of it's child that called this function */ VTR_ASSERT(pb->pb_graph_node == pb_graph_node); if (pb->pb_stats == nullptr) { - alloc_and_load_pb_stats(pb, feasible_block_array_size); + alloc_and_load_pb_stats(pb); } const t_pb_type* pb_type = pb_graph_node->pb_type; @@ -1216,9 +1178,6 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul // macros that limit placement flexibility. if (cluster.placement_stats->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { VTR_LOGV(log_verbosity_ > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, cluster.pb); - // Free the allocated data. return e_block_pack_status::BLK_FAILED_FEASIBLE; } @@ -1240,8 +1199,6 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul log_verbosity_, cluster_pr_needs_update); if (!block_pack_floorplan_status) { - // Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, cluster.pb); return e_block_pack_status::BLK_FAILED_FLOORPLANNING; } @@ -1262,8 +1219,6 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul atom_noc_grp_id_, log_verbosity_); if (!block_pack_noc_grp_status) { - // Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, cluster.pb); return e_block_pack_status::BLK_FAILED_NOC_GROUP; } } @@ -1443,9 +1398,6 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul } reset_molecule_info(molecule); - // Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, cluster.pb); - /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. * Before trying to pack next molecule the unused pbs need to be freed and, the most important, * their modes reset. This task is performed by the cleanup_pb() function below. */ @@ -1481,7 +1433,7 @@ ClusterLegalizer::start_new_cluster(t_pack_molecule* molecule, // Create the physical block for this cluster based on the type. t_pb* cluster_pb = new t_pb; cluster_pb->pb_graph_node = cluster_type->pb_graph_head; - alloc_and_load_pb_stats(cluster_pb, feasible_block_array_size_); + alloc_and_load_pb_stats(cluster_pb); cluster_pb->parent_pb = nullptr; cluster_pb->mode = cluster_mode; @@ -1821,6 +1773,20 @@ bool ClusterLegalizer::is_molecule_compatible(t_pack_molecule* molecule, return true; } +size_t ClusterLegalizer::get_num_cluster_inputs_available( + LegalizationClusterId cluster_id) const { + VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size()); + const LegalizationCluster& cluster = legalization_clusters_[cluster_id]; + + // Count the number of inputs available per pin class. + size_t inputs_avail = 0; + for (int i = 0; i < cluster.pb->pb_graph_node->num_input_pin_class; i++) { + inputs_avail += cluster.pb->pb_stats->input_pins_used[i].size(); + } + + return inputs_avail; +} + void ClusterLegalizer::finalize() { for (LegalizationClusterId cluster_id : legalization_cluster_ids_) { if (!cluster_id.is_valid()) diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h index 61de0587a78..ef3ec64f7b2 100644 --- a/vpr/src/pack/cluster_legalizer.h +++ b/vpr/src/pack/cluster_legalizer.h @@ -428,6 +428,9 @@ class ClusterLegalizer { return cluster.molecules.size(); } + /// @brief Gets the total number of cluster inputs available. + size_t get_num_cluster_inputs_available(LegalizationClusterId cluster_id) const; + /// @brief Gets the ID of the cluster that contains the given atom block. inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const { VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size()); diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 0978817a0ce..736bde0ef59 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -5,8 +5,8 @@ #include "PreClusterTimingGraphResolver.h" #include "PreClusterDelayCalculator.h" #include "atom_netlist.h" +#include "attraction_groups.h" #include "cluster_legalizer.h" -#include "cluster_placement.h" #include "clustered_netlist.h" #include "concrete_timing_info.h" #include "output_clustering.h" @@ -15,10 +15,6 @@ #include "tatum/echo_writer.hpp" #include "vpr_context.h" -/**********************************/ -/* Global variables in clustering */ -/**********************************/ - /*Print the contents of each cluster to an echo file*/ static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legalizer) { FILE* fp; @@ -123,11 +119,6 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, } } -void free_clustering_data(t_clustering_data& clustering_data) { - delete[] clustering_data.unclustered_list_head; - delete[] clustering_data.memory_pool; -} - void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, const t_packer_opts& packer_opts, const std::unordered_set& is_clock, @@ -215,1132 +206,8 @@ void rebuild_attraction_groups(AttractionInfo& attraction_groups, } } -bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - while (cur_pb) { - if (cur_pb == pb) { - return true; - } - cur_pb = cur_pb->parent_pb; - } - return false; -} - -void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, - t_pb* pb) { - int molecule_index; - bool found_molecule = false; - - //find the molecule index - for (int i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { - if (pb->pb_stats->feasible_blocks[i] == molecule) { - found_molecule = true; - molecule_index = i; - } - } - - //if it is not in the array, return - if (found_molecule == false) { - return; - } - - //Otherwise, shift the molecules while removing the specified molecule - for (int j = molecule_index; j < pb->pb_stats->num_feasible_blocks - 1; j++) { - pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; - } - pb->pb_stats->num_feasible_blocks--; -} - -void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, - std::map& gain, - t_pb* pb, - int max_queue_size, - AttractionInfo& attraction_groups) { - int i, j; - int num_molecule_failures = 0; - - AttractGroupId cluster_att_grp = pb->pb_stats->attraction_grp_id; - - /* When the clusterer packs with attraction groups the goal is to - * pack more densely. Removing failed molecules to make room for the exploration of - * more molecules helps to achieve this purpose. - */ - if (attraction_groups.num_attraction_groups() > 0) { - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - num_molecule_failures = 0; - } else { - num_molecule_failures = got->second; - } - - if (num_molecule_failures > 0) { - remove_molecule_from_pb_stats_candidates(molecule, pb); - return; - } - } - - for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { - if (pb->pb_stats->feasible_blocks[i] == molecule) { - return; // already in queue, do nothing - } - } - - if (pb->pb_stats->num_feasible_blocks >= max_queue_size - 1) { - /* maximum size for array, remove smallest gain element and sort */ - if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - /* single loop insertion sort */ - for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) { - if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) <= get_molecule_gain(pb->pb_stats->feasible_blocks[j + 1], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - pb->pb_stats->feasible_blocks[j] = molecule; - break; - } else { - pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; - } - } - if (j == pb->pb_stats->num_feasible_blocks - 1) { - pb->pb_stats->feasible_blocks[j] = molecule; - } - } - } else { - /* Expand array and single loop insertion sort */ - for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) { - if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - pb->pb_stats->feasible_blocks[j + 1] = pb->pb_stats->feasible_blocks[j]; - } else { - pb->pb_stats->feasible_blocks[j + 1] = molecule; - break; - } - } - if (j < 0) { - pb->pb_stats->feasible_blocks[0] = molecule; - } - pb->pb_stats->num_feasible_blocks++; - } -} - -/*****************************************/ -void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - const Prepacker& prepacker, - t_clustering_data& clustering_data, - int num_molecules) { - /* Allocates the main data structures used for clustering and properly * - * initializes them. */ - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - /* alloc and load list of molecules to pack */ - clustering_data.unclustered_list_head = new t_molecule_link[max_molecule_stats.num_used_ext_inputs + 1]; - clustering_data.unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1; - - for (int i = 0; i <= max_molecule_stats.num_used_ext_inputs; i++) { - clustering_data.unclustered_list_head[i] = t_molecule_link(); - clustering_data.unclustered_list_head[i].next = nullptr; - } - - // Create a sorted list of molecules, sorted on increasing molecule base gain. - std::vector molecules_vector = prepacker.get_molecules_vector(); - VTR_ASSERT(molecules_vector.size() == (size_t)num_molecules); - std::stable_sort(molecules_vector.begin(), - molecules_vector.end(), - [](t_pack_molecule* a, t_pack_molecule* b) { - return a->base_gain < b->base_gain; - }); - - clustering_data.memory_pool = new t_molecule_link[num_molecules]; - t_molecule_link* next_ptr = clustering_data.memory_pool; - - for (t_pack_molecule* mol : molecules_vector) { - //Figure out how many external inputs are used by this molecule - t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_ctx.nlist); - int ext_inps = molecule_stats.num_used_ext_inputs; - - //Insert the molecule into the unclustered lists by number of external inputs - next_ptr->moleculeptr = mol; - next_ptr->next = clustering_data.unclustered_list_head[ext_inps].next; - clustering_data.unclustered_list_head[ext_inps].next = next_ptr; - - next_ptr++; - } -} - -/*****************************************/ - -t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, - const enum e_removal_policy remove_flag, - t_molecule_link* unclustered_list_head, - LegalizationClusterId legalization_cluster_id, - const ClusterLegalizer& cluster_legalizer) { - - t_molecule_link* prev_ptr = &unclustered_list_head[ext_inps]; - t_molecule_link* ptr = unclustered_list_head[ext_inps].next; - while (ptr != nullptr) { - /* TODO: Get better candidate atom block in future, eg. return most timing critical or some other smarter metric */ - if (!cluster_legalizer.is_mol_clustered(ptr->moleculeptr)) { - /* TODO: I should be using a better filtering check especially when I'm - * dealing with multiple clock/multiple global reset signals where the clock/reset - * packed in matters, need to do later when I have the circuits to check my work */ - if (cluster_legalizer.is_molecule_compatible(ptr->moleculeptr, legalization_cluster_id)) { - return ptr->moleculeptr; - } - prev_ptr = ptr; - } - - else if (remove_flag == REMOVE_CLUSTERED) { - VTR_ASSERT(0); /* this doesn't work right now with 2 the pass packing for each complex block */ - prev_ptr->next = ptr->next; - } - - ptr = ptr->next; - } - - return nullptr; -} - -/*****************************************/ -t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - LegalizationClusterId legalization_cluster_id, - const ClusterLegalizer& cluster_legalizer) { - /* - * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count - */ - - int inputs_avail = 0; - - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - inputs_avail += cur_pb->pb_stats->input_pins_used[i].size(); - } - - t_pack_molecule* molecule = nullptr; - - if (inputs_avail >= unclustered_list_head_size) { - inputs_avail = unclustered_list_head_size - 1; - } - - for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { - molecule = get_molecule_by_num_ext_inputs(ext_inps, - LEAVE_CLUSTERED, - unclustered_list_head, - legalization_cluster_id, - cluster_legalizer); - if (molecule != nullptr) { - break; - } - } - return molecule; -} - /*****************************************/ -void update_connection_gain_values(const AtomNetId net_id, - const AtomBlockId clustered_blk_id, - t_pb* cur_pb, - const ClusterLegalizer& cluster_legalizer, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { - /*This function is called when the connectiongain values on the net net_id* - *require updating. */ - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - int num_internal_connections, num_open_connections, num_stuck_connections; - - num_internal_connections = num_open_connections = num_stuck_connections = 0; - - LegalizationClusterId legalization_cluster_id = cluster_legalizer.get_atom_cluster(clustered_blk_id); - - /* may wish to speed things up by ignoring clock nets since they are high fanout */ - - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (cluster_legalizer.get_atom_cluster(blk_id) == legalization_cluster_id - && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { - num_internal_connections++; - } else if (!cluster_legalizer.is_atom_clustered(blk_id)) { - num_open_connections++; - } else { - num_stuck_connections++; - } - } - - if (net_relation_to_clustered_block == OUTPUT) { - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - VTR_ASSERT(blk_id); - - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - /* TODO: Gain function accurate only if net has one connection to block, - * TODO: Should we handle case where net has multi-connection to block? - * Gain computation is only off by a bit in this case */ - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - - if (num_internal_connections > 1) { - cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1); - } - cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); - } - } - } - - if (net_relation_to_clustered_block == INPUT) { - /*Calculate the connectiongain for the atom block which is driving * - *the atom net that is an input to an atom block in the cluster */ - - auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - if (num_internal_connections > 1) { - cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1); - } - cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); - } - } -} - -/*****************************************/ -void update_timing_gain_values(const AtomNetId net_id, - t_pb* cur_pb, - const ClusterLegalizer& cluster_legalizer, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global, - const std::unordered_set& net_output_feeds_driving_block_input) { - /*This function is called when the timing_gain values on the atom net* - *net_id requires updating. */ - float timinggain; - - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - /* Check if this atom net lists its driving atom block twice. If so, avoid * - * double counting this atom block by skipping the first (driving) pin. */ - auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input.count(net_id) != 0) - pins = atom_ctx.nlist.net_sinks(net_id); - - if (net_relation_to_clustered_block == OUTPUT - && !is_global.count(net_id)) { - for (auto pin_id : pins) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - timinggain = timing_info.setup_pin_criticality(pin_id); - - if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { - cur_pb->pb_stats->timinggain[blk_id] = 0; - } - if (timinggain > cur_pb->pb_stats->timinggain[blk_id]) - cur_pb->pb_stats->timinggain[blk_id] = timinggain; - } - } - } - - if (net_relation_to_clustered_block == INPUT - && !is_global.count(net_id)) { - /*Calculate the timing gain for the atom block which is driving * - *the atom net that is an input to a atom block in the cluster */ - auto driver_pin = atom_ctx.nlist.net_driver(net_id); - auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); - - if (!cluster_legalizer.is_atom_clustered(new_blk_id)) { - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - timinggain = timing_info.setup_pin_criticality(pin_id); - - if (cur_pb->pb_stats->timinggain.count(new_blk_id) == 0) { - cur_pb->pb_stats->timinggain[new_blk_id] = 0; - } - if (timinggain > cur_pb->pb_stats->timinggain[new_blk_id]) - cur_pb->pb_stats->timinggain[new_blk_id] = timinggain; - } - } - } -} - -/*****************************************/ -void mark_and_update_partial_gain(const AtomNetId net_id, - enum e_gain_update gain_flag, - const AtomBlockId clustered_blk_id, - const ClusterLegalizer& cluster_legalizer, - bool timing_driven, - bool connection_driven, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global, - const int high_fanout_net_threshold, - const std::unordered_set& net_output_feeds_driving_block_input) { - - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; - cur_pb = get_top_level_pb(cur_pb); - - if (int(atom_ctx.nlist.net_sinks(net_id).size()) > high_fanout_net_threshold) { - /* Optimization: It can be too runtime costly for marking all sinks for - * a high fanout-net that probably has no hope of ever getting packed, - * thus ignore those high fanout nets */ - if (!is_global.count(net_id)) { - /* If no low/medium fanout nets, we may need to consider - * high fan-out nets for packing, so select one and store it */ - AtomNetId stored_net = cur_pb->pb_stats->tie_break_high_fanout_net; - if (!stored_net || atom_ctx.nlist.net_sinks(net_id).size() < atom_ctx.nlist.net_sinks(stored_net).size()) { - cur_pb->pb_stats->tie_break_high_fanout_net = net_id; - } - } - return; - } - - /* Mark atom net as being visited, if necessary. */ - - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - cur_pb->pb_stats->marked_nets.push_back(net_id); - } - - /* Update gains of affected blocks. */ - - if (gain_flag == GAIN) { - /* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input) - * If so, avoid double counting by skipping the first (driving) pin. */ - - auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input.count(net_id) != 0) - //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2 - //(i.e. the net loops back to the block only once) - pins = atom_ctx.nlist.net_sinks(net_id); - - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - for (auto pin_id : pins) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { - cur_pb->pb_stats->marked_blocks.push_back(blk_id); - cur_pb->pb_stats->sharinggain[blk_id] = 1; - cur_pb->pb_stats->hillgain[blk_id] = 1 - num_ext_inputs_atom_block(blk_id); - } else { - cur_pb->pb_stats->sharinggain[blk_id]++; - cur_pb->pb_stats->hillgain[blk_id]++; - } - } - } - } - - if (connection_driven) { - update_connection_gain_values(net_id, clustered_blk_id, cur_pb, - cluster_legalizer, - net_relation_to_clustered_block); - } - - if (timing_driven) { - update_timing_gain_values(net_id, cur_pb, cluster_legalizer, - net_relation_to_clustered_block, - timing_info, - is_global, - net_output_feeds_driving_block_input); - } - } - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - cur_pb->pb_stats->num_pins_of_net_in_pb[net_id] = 0; - } - cur_pb->pb_stats->num_pins_of_net_in_pb[net_id]++; -} - -/*****************************************/ -void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - t_pb* cur_pb = pb; - - cur_pb = get_top_level_pb(cur_pb); - AttractGroupId cluster_att_grp_id; - - cluster_att_grp_id = cur_pb->pb_stats->attraction_grp_id; - - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - //Initialize connectiongain and sharinggain if - //they have not previously been updated for the block - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { - cur_pb->pb_stats->sharinggain[blk_id] = 0; - } - - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { - //increase gain of atom based on attraction group gain - float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); - cur_pb->pb_stats->gain[blk_id] += att_grp_gain; - } - - /* Todo: This was used to explore different normalization options, can - * be made more efficient once we decide on which one to use */ - int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size(); - int num_used_output_pins = atom_ctx.nlist.block_output_pins(blk_id).size(); - /* end todo */ - - /* Calculate area-only cost function */ - int num_used_pins = num_used_input_pins + num_used_output_pins; - VTR_ASSERT(num_used_pins > 0); - if (connection_driven) { - /*try to absorb as many connections as possible*/ - cur_pb->pb_stats->gain[blk_id] = ((1 - beta) - * (float)cur_pb->pb_stats->sharinggain[blk_id] - + beta * (float)cur_pb->pb_stats->connectiongain[blk_id]) - / (num_used_pins); - } else { - cur_pb->pb_stats->gain[blk_id] = ((float)cur_pb->pb_stats->sharinggain[blk_id]) - / (num_used_pins); - } - - /* Add in timing driven cost into cost function */ - if (timing_driven) { - cur_pb->pb_stats->gain[blk_id] = alpha - * cur_pb->pb_stats->timinggain[blk_id] - + (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id]; - } - } -} - -/*****************************************/ -void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterLegalizer& cluster_legalizer, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const bool global_clocks, - const float alpha, - const float beta, - const bool timing_driven, - const bool connection_driven, - const int high_fanout_net_threshold, - const SetupTimingInfo& timing_info, - AttractionInfo& attraction_groups, - const std::unordered_set& net_output_feeds_driving_block_input) { - - int molecule_size; - int iblock; - t_pb *cur_pb, *cb; - - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - molecule_size = get_array_size_of_molecule(molecule); - cb = nullptr; - - for (iblock = 0; iblock < molecule_size; iblock++) { - auto blk_id = molecule->atom_block_ids[iblock]; - if (!blk_id) { - continue; - } - - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(atom_pb); - - cur_pb = atom_pb->parent_pb; - - //Update attraction group - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - - while (cur_pb) { - /* reset list of feasible blocks */ - if (cur_pb->is_root()) { - cb = cur_pb; - } - cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - - if (atom_grp_id != AttractGroupId::INVALID()) { - /* TODO: Allow clusters to have more than one attraction group. */ - cur_pb->pb_stats->attraction_grp_id = atom_grp_id; - } - - cur_pb = cur_pb->parent_pb; - } - - /* Outputs first */ - for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - if (!is_clock.count(net_id) || !global_clocks) { - mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, - timing_driven, - connection_driven, OUTPUT, - timing_info, - is_global, - high_fanout_net_threshold, - net_output_feeds_driving_block_input); - } else { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, - timing_driven, - connection_driven, OUTPUT, - timing_info, - is_global, - high_fanout_net_threshold, - net_output_feeds_driving_block_input); - } - } - - /* Next Inputs */ - for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, - timing_driven, connection_driven, - INPUT, - timing_info, - is_global, - high_fanout_net_threshold, - net_output_feeds_driving_block_input); - } - - /* Finally Clocks */ - for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - if (global_clocks) { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, cluster_legalizer, - timing_driven, connection_driven, INPUT, - timing_info, - is_global, - high_fanout_net_threshold, - net_output_feeds_driving_block_input); - } else { - mark_and_update_partial_gain(net_id, GAIN, blk_id, cluster_legalizer, - timing_driven, connection_driven, INPUT, - timing_info, - is_global, - high_fanout_net_threshold, - net_output_feeds_driving_block_input); - } - } - - update_total_gain(alpha, beta, timing_driven, connection_driven, - atom_pb->parent_pb, attraction_groups); - } - - // if this molecule came from the transitive fanout candidates remove it - if (cb) { - cb->pb_stats->transitive_fanout_candidates.erase(molecule->atom_block_ids[molecule->root]); - cb->pb_stats->explore_transitive_fanout = true; - } -} - -t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, - AttractionInfo& attraction_groups, - const enum e_gain_type gain_mode, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - const LegalizationClusterId legalization_cluster_id, - bool prioritize_transitive_connectivity, - int transitive_fanout_threshold, - const int feasible_block_array_size, - const std::map>& primitive_candidate_block_types) { - /* - * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster. - * If there are no feasible blocks it returns a nullptr. - */ - - if (gain_mode == HILL_CLIMBING) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Hill climbing not supported yet, error out.\n"); - } - - // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, - legalization_cluster_id, - prepacker, - cluster_legalizer, - feasible_block_array_size, - attraction_groups); - } - - if (prioritize_transitive_connectivity) { - // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - transitive_fanout_threshold, - feasible_block_array_size, - attraction_groups); - } - - // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, - legalization_cluster_id, - prepacker, - cluster_legalizer, - feasible_block_array_size, - attraction_groups); - } - } else { //Reverse order - // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, - legalization_cluster_id, - prepacker, - cluster_legalizer, - feasible_block_array_size, - attraction_groups); - } - - // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - transitive_fanout_threshold, - feasible_block_array_size, - attraction_groups); - } - } - - // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - if (cur_pb->pb_stats->num_feasible_blocks == 0) { - add_cluster_molecule_candidates_by_attraction_group(cur_pb, - prepacker, - cluster_legalizer, - attraction_groups, - feasible_block_array_size, - legalization_cluster_id, - primitive_candidate_block_types); - } - /* Grab highest gain molecule */ - t_pack_molecule* molecule = nullptr; - if (cur_pb->pb_stats->num_feasible_blocks > 0) { - cur_pb->pb_stats->num_feasible_blocks--; - int index = cur_pb->pb_stats->num_feasible_blocks; - molecule = cur_pb->pb_stats->feasible_blocks[index]; - VTR_ASSERT(!cluster_legalizer.is_mol_clustered(molecule)); - return molecule; - } - - return molecule; -} - -void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, - LegalizationClusterId legalization_cluster_id, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); - - cur_pb->pb_stats->num_feasible_blocks = 0; - cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ - - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); - if (!cluster_legalizer.is_mol_clustered(molecule)) { - if (cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } -} - -void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, - LegalizationClusterId legalization_cluster_id, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - /* Because the packer ignores high fanout nets when marking what blocks - * to consider, use one of the ignored high fanout net to fill up lightly - * related blocks */ - const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - - AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; - - int count = 0; - for (auto pin_id : atom_nlist.net_pins(net_id)) { - if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { - break; - } - - AtomBlockId blk_id = atom_nlist.pin_block(pin_id); - - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); - if (!cluster_legalizer.is_mol_clustered(molecule)) { - if (cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); - count++; - } - } - } - } - cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ -} - -void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - AttractionInfo& attraction_groups, - const int feasible_block_array_size, - LegalizationClusterId legalization_cluster_id, - const std::map>& primitive_candidate_block_types) { - const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - - auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); - - /* - * For each cluster, we want to explore the attraction group molecules as potential - * candidates for the cluster a limited number of times. This limit is imposed because - * if the cluster belongs to a very large attraction group, we could potentially search - * through its attraction group molecules for a very long time. - * Defining a number of times to search through the attraction groups (i.e. number of - * attraction group pulls) determines how many times we search through the cluster's attraction - * group molecules for candidate molecules. - */ - int num_pulls = attraction_groups.get_att_group_pulls(); - if (cur_pb->pb_stats->pulled_from_atom_groups < num_pulls) { - cur_pb->pb_stats->pulled_from_atom_groups++; - } else { - return; - } - - AttractGroupId grp_id = cur_pb->pb_stats->attraction_grp_id; - if (grp_id == AttractGroupId::INVALID()) { - return; - } - - AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); - std::vector available_atoms; - for (AtomBlockId atom_id : group.group_atoms) { - const auto& atom_model = atom_nlist.block_model(atom_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - const std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (!cluster_legalizer.is_atom_clustered(atom_id) - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - available_atoms.push_back(atom_id); - } - } - - //int num_available_atoms = group.group_atoms.size(); - int num_available_atoms = available_atoms.size(); - if (num_available_atoms == 0) { - return; - } - - if (num_available_atoms < 500) { - //for (AtomBlockId atom_id : group.group_atoms) { - for (AtomBlockId atom_id : available_atoms) { - const auto& atom_model = atom_nlist.block_model(atom_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - const std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (!cluster_legalizer.is_atom_clustered(atom_id) - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id); - if (!cluster_legalizer.is_mol_clustered(molecule)) { - if (cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } - return; - } - - int min = 0; - int max = num_available_atoms - 1; - - for (int j = 0; j < 500; j++) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distr(min, max); - int selected_atom = distr(gen); - - //AtomBlockId blk_id = group.group_atoms[selected_atom]; - AtomBlockId blk_id = available_atoms[selected_atom]; - const auto& atom_model = atom_nlist.block_model(blk_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - const std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (!cluster_legalizer.is_atom_clustered(blk_id) - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); - if (!cluster_legalizer.is_mol_clustered(molecule)) { - if (cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } -} - -void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - const LegalizationClusterId legalization_cluster_id, - int transitive_fanout_threshold, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - //TODO: For now, only done by fan-out; should also consider fan-in - cur_pb->pb_stats->explore_transitive_fanout = false; - - /* First time finding transitive fanout candidates therefore alloc and load them */ - load_transitive_fanout_candidates(legalization_cluster_id, - cur_pb->pb_stats, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - transitive_fanout_threshold); - /* Only consider candidates that pass a very simple legality check */ - for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { - t_pack_molecule* molecule = transitive_candidate.second; - if (!cluster_legalizer.is_mol_clustered(molecule)) { - if (cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); - } - } - } -} - -/*****************************************/ -t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, - AttractionInfo& attraction_groups, - const bool allow_unrelated_clustering, - const bool prioritize_transitive_connectivity, - const int transitive_fanout_threshold, - const int feasible_block_array_size, - int* num_unrelated_clustering_attempts, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - LegalizationClusterId legalization_cluster_id, - int verbosity, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - const std::map>& primitive_candidate_block_types) { - /* Finds the block with the greatest gain that satisfies the - * input, clock and capacity constraints of a cluster that are - * passed in. If no suitable block is found it returns nullptr. - */ - - VTR_ASSERT(cur_pb->is_root()); - - /* If cannot pack into primitive, try packing into cluster */ - - auto best_molecule = get_highest_gain_molecule(cur_pb, attraction_groups, - NOT_HILL_CLIMBING, - prepacker, cluster_legalizer, clb_inter_blk_nets, - legalization_cluster_id, prioritize_transitive_connectivity, - transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); - - /* If no blocks have any gain to the current cluster, the code above * - * will not find anything. However, another atom block with no inputs in * - * common with the cluster may still be inserted into the cluster. */ - - if (allow_unrelated_clustering) { - if (best_molecule == nullptr) { - if (*num_unrelated_clustering_attempts == 0) { - best_molecule = get_free_molecule_with_most_ext_inputs_for_cluster(cur_pb, - unclustered_list_head, - unclustered_list_head_size, - legalization_cluster_id, - cluster_legalizer); - (*num_unrelated_clustering_attempts)++; - VTR_LOGV(best_molecule && verbosity > 2, "\tFound unrelated molecule to cluster\n"); - } - } else { - *num_unrelated_clustering_attempts = 0; - } - } else { - VTR_LOGV(!best_molecule && verbosity > 2, "\tNo related molecule found and unrelated clustering disabled\n"); - } - - return best_molecule; -} - -t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist) { - t_molecule_stats molecule_stats; - - //Calculate the number of available pins on primitives within the molecule - for (auto blk : molecule->atom_block_ids) { - if (!blk) continue; - - ++molecule_stats.num_blocks; //Record number of valid blocks in molecule - - const t_model* model = atom_nlist.block_model(blk); - - for (const t_model_ports* input_port = model->inputs; input_port != nullptr; input_port = input_port->next) { - molecule_stats.num_input_pins += input_port->size; - } - - for (const t_model_ports* output_port = model->outputs; output_port != nullptr; output_port = output_port->next) { - molecule_stats.num_output_pins += output_port->size; - } - } - molecule_stats.num_pins = molecule_stats.num_input_pins + molecule_stats.num_output_pins; - - //Calculate the number of externally used pins - std::set molecule_atoms(molecule->atom_block_ids.begin(), molecule->atom_block_ids.end()); - for (auto blk : molecule->atom_block_ids) { - if (!blk) continue; - - for (auto pin : atom_nlist.block_pins(blk)) { - auto net = atom_nlist.pin_net(pin); - - auto pin_type = atom_nlist.pin_type(pin); - if (pin_type == PinType::SINK) { - auto driver_blk = atom_nlist.net_driver_block(net); - - if (molecule_atoms.count(driver_blk)) { - //Pin driven by a block within the molecule - //Does not count as an external connection - } else { - //Pin driven by a block outside the molecule - ++molecule_stats.num_used_ext_inputs; - } - - } else { - VTR_ASSERT(pin_type == PinType::DRIVER); - - bool net_leaves_molecule = false; - for (auto sink_pin : atom_nlist.net_sinks(net)) { - auto sink_blk = atom_nlist.pin_block(sink_pin); - - if (!molecule_atoms.count(sink_blk)) { - //There is at least one sink outside of the current molecule - net_leaves_molecule = true; - break; - } - } - - //We assume that any fanout occurs outside of the molecule, hence we only - //count one used output (even if there are multiple sinks outside the molecule) - if (net_leaves_molecule) { - ++molecule_stats.num_used_ext_outputs; - } - } - } - } - molecule_stats.num_used_ext_pins = molecule_stats.num_used_ext_inputs + molecule_stats.num_used_ext_outputs; - - return molecule_stats; -} - -float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { - float gain; - int i; - int num_introduced_inputs_of_indirectly_related_block; - const AtomContext& atom_ctx = g_vpr_ctx.atom(); - - gain = 0; - float attraction_group_penalty = 0.1; - - num_introduced_inputs_of_indirectly_related_block = 0; - for (i = 0; i < get_array_size_of_molecule(molecule); i++) { - auto blk_id = molecule->atom_block_ids[i]; - if (blk_id) { - if (blk_gain.count(blk_id) > 0) { - gain += blk_gain[blk_id]; - } else { - /* This block has no connection with current cluster, penalize molecule for having this block - */ - for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - VTR_ASSERT(net_id); - - auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - VTR_ASSERT(driver_pin_id); - - auto driver_blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - - num_introduced_inputs_of_indirectly_related_block++; - for (int iblk = 0; iblk < get_array_size_of_molecule(molecule); iblk++) { - if (molecule->atom_block_ids[iblk] && driver_blk_id == molecule->atom_block_ids[iblk]) { - //valid block which is driver (and hence not an input) - num_introduced_inputs_of_indirectly_related_block--; - break; - } - } - } - } - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - if (atom_grp_id == cluster_attraction_group_id && cluster_attraction_group_id != AttractGroupId::INVALID()) { - float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); - gain += att_grp_gain; - } else if (cluster_attraction_group_id != AttractGroupId::INVALID() && atom_grp_id != cluster_attraction_group_id) { - gain -= attraction_group_penalty; - } - } - } - - gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ - gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); - - if (num_molecule_failures > 0 && attraction_groups.num_attraction_groups() > 0) { - gain -= 0.1 * num_molecule_failures; - } - - return gain; -} - -void load_transitive_fanout_candidates(LegalizationClusterId legalization_cluster_id, - t_pb_stats* pb_stats, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - int transitive_fanout_threshold) { - const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; - - // iterate over all the nets that have pins in this cluster - for (const auto net_id : pb_stats->marked_nets) { - // only consider small nets to constrain runtime - if (int(atom_nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { - // iterate over all the pins of the net - for (const auto pin_id : atom_nlist.net_pins(net_id)) { - AtomBlockId atom_blk_id = atom_nlist.pin_block(pin_id); - // get the transitive cluster - LegalizationClusterId tclb = cluster_legalizer.get_atom_cluster(atom_blk_id); - // if the block connected to this pin is packed in another cluster - if (tclb != legalization_cluster_id && tclb != LegalizationClusterId::INVALID()) { - // explore transitive nets from already packed cluster - for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { - // iterate over all the pins of the net - for (AtomPinId tpin : atom_nlist.net_pins(tnet)) { - auto blk_id = atom_nlist.pin_block(tpin); - // This transitive atom is not packed, score and add - if (!cluster_legalizer.is_atom_clustered(blk_id)) { - auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; - - if (pb_stats->gain.count(blk_id) == 0) { - pb_stats->gain[blk_id] = 0.001; - } else { - pb_stats->gain[blk_id] += 0.001; - } - t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); - if (!cluster_legalizer.is_mol_clustered(molecule)) { - transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); - } - } - } - } - } - } - } - } -} - std::map> identify_primitive_candidate_block_types() { std::map> model_candidates; const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; @@ -1580,19 +447,6 @@ void print_le_count(int num_logic_le, VTR_LOG(" LEs used for registers only : %d\n\n", num_reg_le); } -t_pb* get_top_level_pb(t_pb* pb) { - t_pb* top_level_pb = pb; - - while (pb) { - top_level_pb = pb; - pb = pb->parent_pb; - } - - VTR_ASSERT(top_level_pb != nullptr); - - return top_level_pb; -} - void init_clb_atoms_lookup(vtr::vector>& atoms_lookup, const AtomContext& atom_ctx, const ClusteredNetlist& clb_nlist) { @@ -1607,3 +461,4 @@ void init_clb_atoms_lookup(vtr::vector #include #include "cluster_legalizer.h" -#include "pack_types.h" #include "vtr_vector.h" class AtomNetId; +class AttractionInfo; class ClusterBlockId; +class ClusterLegalizer; class ClusteredNetlist; class PreClusterDelayCalculator; class Prepacker; @@ -21,62 +22,6 @@ struct AtomContext; * @brief This file includes useful structs and functions for building and modifying clustering */ -constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10; /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ -constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40; /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ - -enum e_gain_update { - GAIN, - NO_GAIN -}; -enum e_feasibility { - FEASIBLE, - INFEASIBLE -}; -enum e_gain_type { - HILL_CLIMBING, - NOT_HILL_CLIMBING -}; -enum e_removal_policy { - REMOVE_CLUSTERED, - LEAVE_CLUSTERED -}; -/* TODO: REMOVE_CLUSTERED no longer used, remove */ -enum e_net_relation_to_clustered_block { - INPUT, - OUTPUT -}; - -/* Linked list structure. Stores one integer (iblk). */ -struct t_molecule_link { - t_pack_molecule* moleculeptr; - t_molecule_link* next; -}; - -struct t_molecule_stats { - int num_blocks = 0; //Number of blocks across all primitives in molecule - - int num_pins = 0; //Number of pins across all primitives in molecule - int num_input_pins = 0; //Number of input pins across all primitives in molecule - int num_output_pins = 0; //Number of output pins across all primitives in molecule - - int num_used_ext_pins = 0; //Number of *used external* pins across all primitives in molecule - int num_used_ext_inputs = 0; //Number of *used external* input pins across all primitives in molecule - int num_used_ext_outputs = 0; //Number of *used external* output pins across all primitives in molecule -}; - -/* Useful data structures for creating or modifying clusters */ -struct t_clustering_data { - int unclustered_list_head_size = 0; - /* Keeps a linked list of the unclustered blocks to speed up looking for * - * unclustered blocks with a certain number of *external* inputs. * - * [0..lut_size]. Unclustered_list_head[i] points to the head of the * - * list of blocks with i inputs to be hooked up via external interconnect. */ - t_molecule_link* unclustered_list_head = nullptr; - - //Maintaining a linked list of free molecule data for speed - t_molecule_link* memory_pool = nullptr; -}; - /***********************************/ /* Clustering helper functions */ /***********************************/ @@ -91,11 +36,6 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, std::shared_ptr& timing_info, vtr::vector& atom_criticality); -/* - * @brief Free the clustering data structures. - */ -void free_clustering_data(t_clustering_data& clustering_data); - /* * @brief Check clustering legality and output it. */ @@ -104,68 +44,6 @@ void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, const std::unordered_set& is_clock, const t_arch* arch); -/* - * @brief Determine if atom block is in pb. - */ -bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); - -/* - * @brief Add blk to list of feasible blocks sorted according to gain. - */ -void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, - std::map& gain, - t_pb* pb, - int max_queue_size, - AttractionInfo& attraction_groups); - -/* - * @brief Remove blk from list of feasible blocks sorted according to gain. - * - * Useful for removing blocks that are repeatedly failing. If a block - * has been found to be illegal, we don't repeatedly consider it. - */ -void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, - t_pb* pb); -/* - * @brief Allocates and inits the data structures used for clustering. - * - * This method initializes the list of molecules to pack, the clustering data, - * and the net info. - */ -void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - const Prepacker& prepacker, - t_clustering_data& clustering_data, - int num_molecules); - -/* - * @brief This routine returns an atom block which has not been clustered, has - * no connection to the current cluster, satisfies the cluster clock - * constraints, is a valid subblock inside the cluster, does not exceed - * the cluster subblock units available, and has ext_inps external inputs. - * Remove_flag controls whether or not blocks that have already been - * clustered are removed from the unclustered_list data structures. - * NB: to get a atom block regardless of clock constraints just set - * clocks_avail > 0. - */ -t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, - const enum e_removal_policy remove_flag, - t_molecule_link* unclustered_list_head, - LegalizationClusterId legalization_cluster_id, - const ClusterLegalizer& cluster_legalizer); - -/* @brief This routine is used to find new blocks for clustering when there are - * no feasible blocks with any attraction to the current cluster (i.e. - * it finds blocks which are unconnected from the current cluster). It - * returns the atom block with the largest number of used inputs that - * satisfies the clocking and number of inputs constraints. If no - * suitable atom block is found, the routine returns nullptr. - */ -t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - LegalizationClusterId legalization_cluster_id, - const ClusterLegalizer& cluster_legalizer); - /* * @brief Print the header for the clustering progress table. */ @@ -190,194 +68,6 @@ void print_pack_status(int tot_num_molecules, void rebuild_attraction_groups(AttractionInfo& attraction_groups, const ClusterLegalizer& cluster_legalizer); -void update_connection_gain_values(const AtomNetId net_id, - const AtomBlockId clustered_blk_id, - t_pb* cur_pb, - const ClusterLegalizer& cluster_legalizer, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block); - -void update_timing_gain_values(const AtomNetId net_id, - t_pb* cur_pb, - const ClusterLegalizer& cluster_legalizer, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global, - const std::unordered_set& net_output_feeds_driving_block_input); - -/* - * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain - * when an atom block is added to a cluster. The sharinggain is the - * number of inputs that a atom block shares with blocks that are already - * in the cluster. Hillgain is the reduction in number of pins-required - * by adding a atom block to the cluster. The timinggain is the - * criticality of the most critical atom net between this atom block and - * an atom block in the cluster. - */ -void mark_and_update_partial_gain(const AtomNetId net_id, - enum e_gain_update gain_flag, - const AtomBlockId clustered_blk_id, - const ClusterLegalizer& cluster_legalizer, - bool timing_driven, - bool connection_driven, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global, - const int high_fanout_net_threshold, - const std::unordered_set& net_output_feeds_driving_block_input); - -/* - * @brief Updates the total gain array to reflect the desired tradeoff between - * input sharing (sharinggain) and path_length minimization (timinggain) - * input each time a new molecule is added to the cluster. - */ -void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); - -/* - * @brief Routine that is called each time a new molecule is added to the cluster. - * - * Makes calls to update cluster stats such as the gain map for atoms, used pins, - * and clock structures, in order to reflect the new content of the cluster. - * Also keeps track of which attraction group the cluster belongs to. - */ -void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterLegalizer& cluster_legalizer, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const bool global_clocks, - const float alpha, - const float beta, - const bool timing_driven, - const bool connection_driven, - const int high_fanout_net_threshold, - const SetupTimingInfo& timing_info, - AttractionInfo& attraction_groups, - const std::unordered_set& net_output_feeds_driving_block_input); - -/* - * @brief Get candidate molecule to pack into currently open cluster - * - * Molecule selection priority: - * 1. Find unpacked molecules based on criticality and strong connectedness - * (connected by low fanout nets) with current cluster. - * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) - * with current cluster. - * 3. Find unpacked molecules based on weak connectedness (connected by high - * fanout nets) with current cluster. - * 4. Find unpacked molecules based on attraction group of the current cluster - * (if the cluster has an attraction group). - */ -t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, - AttractionInfo& attraction_groups, - const enum e_gain_type gain_mode, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - const LegalizationClusterId cluster_index, - bool prioritize_transitive_connectivity, - int transitive_fanout_threshold, - const int feasible_block_array_size, - const std::map>& primitive_candidate_block_types); - -/* - * @brief Add molecules with strong connectedness to the current cluster to the - * list of feasible blocks. - */ -void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, - LegalizationClusterId legalization_cluster_id, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -/* - * @brief Add molecules based on weak connectedness (connected by high fanout - * nets) with current cluster. - */ -void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, - LegalizationClusterId legalization_cluster_id, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -/* - * @brief If the current cluster being packed has an attraction group associated - * with it (i.e. there are atoms in it that belong to an attraction group), - * this routine adds molecules from the associated attraction group to - * the list of feasible blocks for the cluster. - * - * Attraction groups can be very large, so we only add some randomly selected - * molecules for efficiency if the number of atoms in the group is greater than - * 500. Therefore, the molecules added to the candidates will vary each time you - * call this function. - */ -void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - AttractionInfo& attraction_groups, - const int feasible_block_array_size, - LegalizationClusterId clb_index, - const std::map>& primitive_candidate_block_types); - -/* - * @brief Add molecules based on transitive connections (eg. 2 hops away) with - * current cluster. - */ -void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - const LegalizationClusterId cluster_index, - int transitive_fanout_threshold, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, - AttractionInfo& attraction_groups, - const bool allow_unrelated_clustering, - const bool prioritize_transitive_connectivity, - const int transitive_fanout_threshold, - const int feasible_block_array_size, - int* num_unrelated_clustering_attempts, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - LegalizationClusterId cluster_index, - int verbosity, - t_molecule_link* unclustered_list_head, - const int& unclustered_list_head_size, - const std::map>& primitive_candidate_block_types); - -/* - * @brief Calculates molecule statistics for a single molecule. - */ -t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, const AtomNetlist& atom_nlist); - -/* - * @brief Get gain of packing molecule into current cluster. - * - * gain is equal to: - * total_block_gain - * + molecule_base_gain*some_factor - * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor - */ -float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); - -/** - * @brief Score unclustered atoms that are two hops away from current cluster - * - * For example, consider a cluster that has a FF feeding an adder in another - * cluster. Since this FF is feeding an adder that is packed in another cluster - * this function should find other FFs that are feeding other inputs of this adder - * since they are two hops away from the FF packed in this cluster - */ -void load_transitive_fanout_candidates(LegalizationClusterId cluster_index, - t_pb_stats* pb_stats, - const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer, - vtr::vector>& clb_inter_blk_nets, - int transitive_fanout_threshold); - std::map> identify_primitive_candidate_block_types(); /** @@ -440,14 +130,6 @@ void print_le_count(int num_logic_le, int num_logic_and_reg_le, const t_pb_type* le_pb_type); -/* - * @brief Given a pointer to a pb in a cluster, this routine returns a pointer - * to the top-level pb of the given pb. - * - * This is needed when updating the gain for a cluster. - */ -t_pb* get_top_level_pb(t_pb* pb); - /* * @brief Load the mapping between clusters and their atoms. */ diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp new file mode 100644 index 00000000000..1b74a44aa66 --- /dev/null +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -0,0 +1,1081 @@ +/** + * @file + * @author Alex Singer + * @date January 2024 + * @brief The definitions of the Greedy Candidate Selector class. + */ + +#include "greedy_candidate_selector.h" +#include +#include "atom_netlist.h" +#include "attraction_groups.h" +#include "cluster_legalizer.h" +#include "cluster_placement.h" +#include "globals.h" +#include "prepack.h" +#include "timing_info.h" +#include "vpr_context.h" +#include "vpr_types.h" +#include "vtr_assert.h" + +/* + * @brief Get gain of packing molecule into current cluster. + * + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + * + * TODO: Confirm that this comment is correct. + */ +static float get_molecule_gain(t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats, + AttractGroupId cluster_attraction_group_id, + AttractionInfo& attraction_groups, + int num_molecule_failures, + const AtomNetlist& atom_netlist); + +/* + * @brief Remove blk from list of feasible blocks sorted according to gain. + * + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it. + */ +static void remove_molecule_from_pb_stats_candidates( + t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats); + +/* + * @brief Add blk to list of feasible blocks sorted according to gain. + */ +static void add_molecule_to_pb_stats_candidates( + t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats, + int max_queue_size, + AttractionInfo& attraction_groups, + const AtomNetlist& atom_netlist); + +GreedyCandidateSelector::GreedyCandidateSelector( + const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, + bool allow_unrelated_clustering, + const t_molecule_stats& max_molecule_stats, + const std::map>& primitive_candidate_block_types, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + const std::unordered_set& is_clock, + const std::unordered_set& is_global, + const std::unordered_set& net_output_feeds_driving_block_input, + const SetupTimingInfo& timing_info, + int log_verbosity) + : atom_netlist_(atom_netlist), + packer_opts_(packer_opts), + allow_unrelated_clustering_(allow_unrelated_clustering), + log_verbosity_(log_verbosity), + primitive_candidate_block_types_(primitive_candidate_block_types), + high_fanout_thresholds_(high_fanout_thresholds), + is_clock_(is_clock), + is_global_(is_global), + net_output_feeds_driving_block_input_(net_output_feeds_driving_block_input), + timing_info_(timing_info) { + // Initialize the list of molecules to pack, the clustering data, and the + // net info. + + // Initialize unrelated clustering data. + if (allow_unrelated_clustering_) { + /* alloc and load list of molecules to pack */ + unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1); + + // Create a sorted list of molecules, sorted on decreasing molecule base + // gain. (Highest gain). + std::vector molecules_vector = prepacker.get_molecules_vector(); + std::stable_sort(molecules_vector.begin(), + molecules_vector.end(), + [](t_pack_molecule* a, t_pack_molecule* b) { + return a->base_gain > b->base_gain; + }); + + // Push back the each molecule into the unrelated clustering data vector + // for their external inputs. This creates individual sorted lists of + // molecules for each number of used external inputs. + for (t_pack_molecule* mol : molecules_vector) { + //Figure out how many external inputs are used by this molecule + t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(mol, atom_netlist); + int ext_inps = molecule_stats.num_used_ext_inputs; + + //Insert the molecule into the unclustered lists by number of external inputs + unrelated_clustering_data_[ext_inps].push_back(mol); + } + } + + /* TODO: This is memory inefficient, fix if causes problems */ + /* Store stats on nets used by packed block, useful for determining transitively connected blocks + * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ + clb_inter_blk_nets_.resize(atom_netlist.blocks().size()); +} + +GreedyCandidateSelector::~GreedyCandidateSelector() { +} + +ClusterGainStats GreedyCandidateSelector::create_cluster_gain_stats( + t_pack_molecule* cluster_seed_mol, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + // Initialize the cluster gain stats. + ClusterGainStats cluster_gain_stats; + cluster_gain_stats.num_feasible_blocks = NOT_VALID; + // TODO: The reason this is being resized and not reserved is due to legacy + // code which should be updated. + cluster_gain_stats.feasible_blocks.resize(packer_opts_.feasible_block_array_size); + for (int i = 0; i < packer_opts_.feasible_block_array_size; i++) + cluster_gain_stats.feasible_blocks[i] = nullptr; + cluster_gain_stats.tie_break_high_fanout_net = AtomNetId::INVALID(); + cluster_gain_stats.explore_transitive_fanout = true; + + // Update the cluster gain stats based on the addition of the seed mol to + // the cluster. + // TODO: We may want to update the cluster gain stats different, knowing + // that this candidate was the seed molecule. + update_cluster_gain_stats_candidate_success(cluster_gain_stats, + cluster_seed_mol, + cluster_id, + cluster_legalizer, + attraction_groups); + + // Return the cluster gain stats. + return cluster_gain_stats; +} + +void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( + ClusterGainStats& cluster_gain_stats, + t_pack_molecule* successful_mol, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + // TODO: If this threshold lookup gets expensive, move outside. + int high_fanout_net_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(cluster_id)->name); + + // Mark and update the gain stats for each block in the succesfully + // clustered molecule. + // Makes calls to update cluster stats such as the gain map for atoms, used + // pins, and clock structures, in order to reflect the new content of the + // cluster. Also keeps track of which attraction group the cluster belongs + // to. + int molecule_size = get_array_size_of_molecule(successful_mol); + for (int iblock = 0; iblock < molecule_size; iblock++) { + AtomBlockId blk_id = successful_mol->atom_block_ids[iblock]; + if (!blk_id) { + continue; + } + + //Update attraction group + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + + /* reset list of feasible blocks */ + cluster_gain_stats.num_feasible_blocks = NOT_VALID; + /* TODO: Allow clusters to have more than one attraction group. */ + if (atom_grp_id.is_valid()) + cluster_gain_stats.attraction_grp_id = atom_grp_id; + + /* Outputs first */ + for (AtomPinId pin_id : atom_netlist_.block_output_pins(blk_id)) { + AtomNetId net_id = atom_netlist_.pin_net(pin_id); + + e_gain_update gain_flag = e_gain_update::NO_GAIN; + if (!is_clock_.count(net_id) || !packer_opts_.global_clocks) + gain_flag = e_gain_update::GAIN; + + mark_and_update_partial_gain(cluster_gain_stats, + net_id, + gain_flag, + blk_id, + cluster_legalizer, + high_fanout_net_threshold, + e_net_relation_to_clustered_block::OUTPUT); + } + + /* Next Inputs */ + for (AtomPinId pin_id : atom_netlist_.block_input_pins(blk_id)) { + AtomNetId net_id = atom_netlist_.pin_net(pin_id); + mark_and_update_partial_gain(cluster_gain_stats, + net_id, + e_gain_update::GAIN, + blk_id, + cluster_legalizer, + high_fanout_net_threshold, + e_net_relation_to_clustered_block::INPUT); + } + + /* Finally Clocks */ + for (AtomPinId pin_id : atom_netlist_.block_clock_pins(blk_id)) { + AtomNetId net_id = atom_netlist_.pin_net(pin_id); + + e_gain_update gain_flag = e_gain_update::GAIN; + if (packer_opts_.global_clocks) + gain_flag = e_gain_update::NO_GAIN; + + mark_and_update_partial_gain(cluster_gain_stats, + net_id, + gain_flag, + blk_id, + cluster_legalizer, + high_fanout_net_threshold, + e_net_relation_to_clustered_block::INPUT); + } + + update_total_gain(cluster_gain_stats, attraction_groups); + } + + // if this molecule came from the transitive fanout candidates remove it + cluster_gain_stats.transitive_fanout_candidates.erase(successful_mol->atom_block_ids[successful_mol->root]); + cluster_gain_stats.explore_transitive_fanout = true; + + // Whenever a new molecule has been clustered, reset the number of + // unrelated clustering attempts. + num_unrelated_clustering_attempts_ = 0; +} + +void GreedyCandidateSelector::mark_and_update_partial_gain( + ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + e_gain_update gain_flag, + AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, + int high_fanout_net_threshold, + e_net_relation_to_clustered_block net_relation_to_clustered_block) { + + if (int(atom_netlist_.net_sinks(net_id).size()) > high_fanout_net_threshold) { + /* Optimization: It can be too runtime costly for marking all sinks for + * a high fanout-net that probably has no hope of ever getting packed, + * thus ignore those high fanout nets */ + if (!is_global_.count(net_id)) { + /* If no low/medium fanout nets, we may need to consider + * high fan-out nets for packing, so select one and store it */ + AtomNetId stored_net = cluster_gain_stats.tie_break_high_fanout_net; + if (!stored_net || atom_netlist_.net_sinks(net_id).size() < atom_netlist_.net_sinks(stored_net).size()) { + cluster_gain_stats.tie_break_high_fanout_net = net_id; + } + } + return; + } + + /* Mark atom net as being visited, if necessary. */ + if (cluster_gain_stats.num_pins_of_net_in_pb.count(net_id) == 0) { + cluster_gain_stats.marked_nets.push_back(net_id); + } + + /* Update gains of affected blocks. */ + if (gain_flag == e_gain_update::GAIN) { + /* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input) + * If so, avoid double counting by skipping the first (driving) pin. */ + auto pins = atom_netlist_.net_pins(net_id); + if (net_output_feeds_driving_block_input_.count(net_id) != 0) + //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2 + //(i.e. the net loops back to the block only once) + pins = atom_netlist_.net_sinks(net_id); + + if (cluster_gain_stats.num_pins_of_net_in_pb.count(net_id) == 0) { + for (AtomPinId pin_id : pins) { + AtomBlockId blk_id = atom_netlist_.pin_block(pin_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + if (cluster_gain_stats.sharing_gain.count(blk_id) == 0) { + cluster_gain_stats.marked_blocks.push_back(blk_id); + cluster_gain_stats.sharing_gain[blk_id] = 1; + } else { + cluster_gain_stats.sharing_gain[blk_id]++; + } + } + } + } + + if (packer_opts_.connection_driven) { + update_connection_gain_values(cluster_gain_stats, + net_id, + clustered_blk_id, + cluster_legalizer, + net_relation_to_clustered_block); + } + + if (packer_opts_.timing_driven) { + update_timing_gain_values(cluster_gain_stats, + net_id, + cluster_legalizer, + net_relation_to_clustered_block); + } + } + if (cluster_gain_stats.num_pins_of_net_in_pb.count(net_id) == 0) { + cluster_gain_stats.num_pins_of_net_in_pb[net_id] = 0; + } + cluster_gain_stats.num_pins_of_net_in_pb[net_id]++; +} + +/* + * @brief Determine if atom block is in pb. + * + * TODO: This would make more sense in the cluster legalizer class. + */ +static bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + while (cur_pb) { + if (cur_pb == pb) { + return true; + } + cur_pb = cur_pb->parent_pb; + } + return false; +} + +void GreedyCandidateSelector::update_connection_gain_values( + ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, + e_net_relation_to_clustered_block net_relation_to_clustered_block) { + + /*This function is called when the connection_gain values on the net net_id + *require updating. */ + + // Atom Context used to lookup the atom pb. + // TODO: Should investigate this. Using the atom pb in this class is very + // strange. + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + + int num_internal_connections, num_open_connections, num_stuck_connections; + num_internal_connections = num_open_connections = num_stuck_connections = 0; + + LegalizationClusterId legalization_cluster_id = cluster_legalizer.get_atom_cluster(clustered_blk_id); + + /* may wish to speed things up by ignoring clock nets since they are high fanout */ + for (AtomPinId pin_id : atom_netlist_.net_pins(net_id)) { + AtomBlockId blk_id = atom_netlist_.pin_block(pin_id); + if (cluster_legalizer.get_atom_cluster(blk_id) == legalization_cluster_id + && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { + num_internal_connections++; + } else if (!cluster_legalizer.is_atom_clustered(blk_id)) { + num_open_connections++; + } else { + num_stuck_connections++; + } + } + + if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::OUTPUT) { + for (AtomPinId pin_id : atom_netlist_.net_sinks(net_id)) { + AtomBlockId blk_id = atom_netlist_.pin_block(pin_id); + VTR_ASSERT(blk_id); + + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + /* TODO: Gain function accurate only if net has one connection to block, + * TODO: Should we handle case where net has multi-connection to block? + * Gain computation is only off by a bit in this case */ + if (cluster_gain_stats.connection_gain.count(blk_id) == 0) { + cluster_gain_stats.connection_gain[blk_id] = 0; + } + + if (num_internal_connections > 1) { + cluster_gain_stats.connection_gain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1); + } + cluster_gain_stats.connection_gain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); + } + } + } + + if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::INPUT) { + /*Calculate the connection_gain for the atom block which is driving * + *the atom net that is an input to an atom block in the cluster */ + + AtomPinId driver_pin_id = atom_netlist_.net_driver(net_id); + AtomBlockId blk_id = atom_netlist_.pin_block(driver_pin_id); + + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + if (cluster_gain_stats.connection_gain.count(blk_id) == 0) { + cluster_gain_stats.connection_gain[blk_id] = 0; + } + if (num_internal_connections > 1) { + cluster_gain_stats.connection_gain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1); + } + cluster_gain_stats.connection_gain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); + } + } +} + +void GreedyCandidateSelector::update_timing_gain_values( + ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + const ClusterLegalizer& cluster_legalizer, + e_net_relation_to_clustered_block net_relation_to_clustered_block) { + + /*This function is called when the timing_gain values on the atom net + *net_id requires updating. */ + + /* Check if this atom net lists its driving atom block twice. If so, avoid * + * double counting this atom block by skipping the first (driving) pin. */ + auto pins = atom_netlist_.net_pins(net_id); + if (net_output_feeds_driving_block_input_.count(net_id) != 0) + pins = atom_netlist_.net_sinks(net_id); + + if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::OUTPUT + && !is_global_.count(net_id)) { + for (AtomPinId pin_id : pins) { + AtomBlockId blk_id = atom_netlist_.pin_block(pin_id); + if (!cluster_legalizer.is_atom_clustered(blk_id)) { + double timing_gain = timing_info_.setup_pin_criticality(pin_id); + + if (cluster_gain_stats.timing_gain.count(blk_id) == 0) { + cluster_gain_stats.timing_gain[blk_id] = 0; + } + if (timing_gain > cluster_gain_stats.timing_gain[blk_id]) + cluster_gain_stats.timing_gain[blk_id] = timing_gain; + } + } + } + + if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::INPUT + && !is_global_.count(net_id)) { + /*Calculate the timing gain for the atom block which is driving * + *the atom net that is an input to a atom block in the cluster */ + AtomPinId driver_pin = atom_netlist_.net_driver(net_id); + AtomBlockId new_blk_id = atom_netlist_.pin_block(driver_pin); + + if (!cluster_legalizer.is_atom_clustered(new_blk_id)) { + for (AtomPinId pin_id : atom_netlist_.net_sinks(net_id)) { + double timing_gain = timing_info_.setup_pin_criticality(pin_id); + + if (cluster_gain_stats.timing_gain.count(new_blk_id) == 0) { + cluster_gain_stats.timing_gain[new_blk_id] = 0; + } + if (timing_gain > cluster_gain_stats.timing_gain[new_blk_id]) + cluster_gain_stats.timing_gain[new_blk_id] = timing_gain; + } + } + } +} + +void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_stats, + AttractionInfo& attraction_groups) { + AttractGroupId cluster_att_grp_id = cluster_gain_stats.attraction_grp_id; + + for (AtomBlockId blk_id : cluster_gain_stats.marked_blocks) { + //Initialize connection_gain and sharing_gain if + //they have not previously been updated for the block + if (cluster_gain_stats.connection_gain.count(blk_id) == 0) { + cluster_gain_stats.connection_gain[blk_id] = 0; + } + if (cluster_gain_stats.sharing_gain.count(blk_id) == 0) { + cluster_gain_stats.sharing_gain[blk_id] = 0; + } + + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { + //increase gain of atom based on attraction group gain + float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); + cluster_gain_stats.gain[blk_id] += att_grp_gain; + } + + /* Todo: This was used to explore different normalization options, can + * be made more efficient once we decide on which one to use */ + int num_used_input_pins = atom_netlist_.block_input_pins(blk_id).size(); + int num_used_output_pins = atom_netlist_.block_output_pins(blk_id).size(); + /* end todo */ + + /* Calculate area-only cost function */ + int num_used_pins = num_used_input_pins + num_used_output_pins; + VTR_ASSERT(num_used_pins > 0); + if (packer_opts_.connection_driven) { + /*try to absorb as many connections as possible*/ + cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.beta) + * (float)cluster_gain_stats.sharing_gain[blk_id] + + packer_opts_.beta * (float)cluster_gain_stats.connection_gain[blk_id]) + / (num_used_pins); + } else { + cluster_gain_stats.gain[blk_id] = ((float)cluster_gain_stats.sharing_gain[blk_id]) + / (num_used_pins); + } + + /* Add in timing driven cost into cost function */ + if (packer_opts_.timing_driven) { + cluster_gain_stats.gain[blk_id] = packer_opts_.alpha + * cluster_gain_stats.timing_gain[blk_id] + + (1.0 - packer_opts_.alpha) * (float)cluster_gain_stats.gain[blk_id]; + } + } +} + +void GreedyCandidateSelector::update_cluster_gain_stats_candidate_failed(ClusterGainStats& cluster_gain_stats, + t_pack_molecule* failed_mol) { + auto got = cluster_gain_stats.atom_failures.find(failed_mol->atom_block_ids[0]); + if (got == cluster_gain_stats.atom_failures.end()) { + cluster_gain_stats.atom_failures.insert({failed_mol->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + +t_pack_molecule* GreedyCandidateSelector::get_next_candidate_for_cluster( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + AttractionInfo& attraction_groups) { + /* Finds the block with the greatest gain that satisfies the + * input, clock and capacity constraints of a cluster that are + * passed in. If no suitable block is found it returns nullptr. + */ + + /* + * This routine populates a list of feasible blocks outside the cluster, + * then returns the best candidate for the cluster. + * If there are no feasible blocks it returns a nullptr. + */ + + /* + * @brief Get candidate molecule to pack into currently open cluster + * + * Molecule selection priority: + * 1. Find unpacked molecules based on criticality and strong connectedness + * (connected by low fanout nets) with current cluster. + * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) + * with current cluster. + * 3. Find unpacked molecules based on weak connectedness (connected by high + * fanout nets) with current cluster. + * 4. Find unpacked molecules based on attraction group of the current cluster + * (if the cluster has an attraction group). + */ + + // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster + if (cluster_gain_stats.num_feasible_blocks == NOT_VALID) { + add_cluster_molecule_candidates_by_connectivity_and_timing(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + + if (packer_opts_.prioritize_transitive_connectivity) { + // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster + if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) { + add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + + // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster + if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) { + add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + } else { //Reverse order + // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster + if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.tie_break_high_fanout_net) { + add_cluster_molecule_candidates_by_highfanout_connectivity(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + + // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster + if (cluster_gain_stats.num_feasible_blocks == 0 && cluster_gain_stats.explore_transitive_fanout) { + add_cluster_molecule_candidates_by_transitive_connectivity(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + } + + // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) + if (cluster_gain_stats.num_feasible_blocks == 0) { + add_cluster_molecule_candidates_by_attraction_group(cluster_gain_stats, + cluster_id, + prepacker, + cluster_legalizer, + attraction_groups); + } + /* Grab highest gain molecule */ + // If this was a vector, this would just be a pop_back. + t_pack_molecule* best_molecule = nullptr; + if (cluster_gain_stats.num_feasible_blocks > 0) { + cluster_gain_stats.num_feasible_blocks--; + int index = cluster_gain_stats.num_feasible_blocks; + best_molecule = cluster_gain_stats.feasible_blocks[index]; + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(best_molecule)); + } + + // If we are allowing unrelated clustering and no molecule has been found, + // get unrelated candidate for cluster. + if (allow_unrelated_clustering_ && best_molecule == nullptr) { + if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) { + best_molecule = get_unrelated_candidate_for_cluster(cluster_id, + cluster_legalizer); + num_unrelated_clustering_attempts_++; + VTR_LOGV(best_molecule && log_verbosity_ > 2, + "\tFound unrelated molecule to cluster\n"); + } else { + num_unrelated_clustering_attempts_ = 0; + } + } else { + VTR_LOGV(!best_molecule && log_verbosity_ > 2, + "\tNo related molecule found and unrelated clustering disabled\n"); + } + + return best_molecule; +} + +void GreedyCandidateSelector::add_cluster_molecule_candidates_by_connectivity_and_timing( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + VTR_ASSERT(cluster_gain_stats.num_feasible_blocks == NOT_VALID); + + cluster_gain_stats.num_feasible_blocks = 0; + cluster_gain_stats.explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ + + for (AtomBlockId blk_id : cluster_gain_stats.marked_blocks) { + // Get the molecule that contains this block. + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); + // Add the molecule as a candidate if the molecule is not clustered and + // is compatible with this cluster (using simple checks). + if (!cluster_legalizer.is_mol_clustered(molecule) && + cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { + add_molecule_to_pb_stats_candidates(molecule, + cluster_gain_stats, + packer_opts_.feasible_block_array_size, + attraction_groups, + atom_netlist_); + } + } +} + +void GreedyCandidateSelector::add_cluster_molecule_candidates_by_transitive_connectivity( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + //TODO: For now, only done by fan-out; should also consider fan-in + cluster_gain_stats.explore_transitive_fanout = false; + + /* First time finding transitive fanout candidates therefore alloc and load them */ + load_transitive_fanout_candidates(cluster_gain_stats, + legalization_cluster_id, + prepacker, + cluster_legalizer); + + /* Only consider candidates that pass a very simple legality check */ + for (const auto& transitive_candidate : cluster_gain_stats.transitive_fanout_candidates) { + t_pack_molecule* molecule = transitive_candidate.second; + if (!cluster_legalizer.is_mol_clustered(molecule) && + cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { + add_molecule_to_pb_stats_candidates(molecule, + cluster_gain_stats, + std::min(packer_opts_.feasible_block_array_size, + AAPACK_MAX_TRANSITIVE_EXPLORE), + attraction_groups, + atom_netlist_); + } + } +} + +void GreedyCandidateSelector::add_cluster_molecule_candidates_by_highfanout_connectivity( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + /* Because the packer ignores high fanout nets when marking what blocks + * to consider, use one of the ignored high fanout net to fill up lightly + * related blocks */ + + AtomNetId net_id = cluster_gain_stats.tie_break_high_fanout_net; + + int count = 0; + for (AtomPinId pin_id : atom_netlist_.net_pins(net_id)) { + if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { + break; + } + + AtomBlockId blk_id = atom_netlist_.pin_block(pin_id); + + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_mol_clustered(molecule) && + cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { + add_molecule_to_pb_stats_candidates(molecule, + cluster_gain_stats, + std::min(packer_opts_.feasible_block_array_size, + AAPACK_MAX_HIGH_FANOUT_EXPLORE), + attraction_groups, + atom_netlist_); + count++; + } + } + cluster_gain_stats.tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ +} + +void GreedyCandidateSelector::add_cluster_molecule_candidates_by_attraction_group( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups) { + auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id); + + /* + * For each cluster, we want to explore the attraction group molecules as potential + * candidates for the cluster a limited number of times. This limit is imposed because + * if the cluster belongs to a very large attraction group, we could potentially search + * through its attraction group molecules for a very long time. + * Defining a number of times to search through the attraction groups (i.e. number of + * attraction group pulls) determines how many times we search through the cluster's attraction + * group molecules for candidate molecules. + */ + AttractGroupId grp_id = cluster_gain_stats.attraction_grp_id; + if (grp_id == AttractGroupId::INVALID()) { + return; + } + + AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); + std::vector available_atoms; + for (AtomBlockId atom_id : group.group_atoms) { + const auto& atom_model = atom_netlist_.block_model(atom_id); + auto itr = primitive_candidate_block_types_.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types_.end()); + const std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (!cluster_legalizer.is_atom_clustered(atom_id) + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + available_atoms.push_back(atom_id); + } + } + + int num_available_atoms = available_atoms.size(); + if (num_available_atoms == 0) { + return; + } + + if (num_available_atoms < attraction_group_num_atoms_threshold_) { + for (AtomBlockId atom_id : available_atoms) { + //Only consider molecules that are unpacked and of the correct type + t_pack_molecule* molecule = prepacker.get_atom_molecule(atom_id); + if (!cluster_legalizer.is_mol_clustered(molecule) && + cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { + add_molecule_to_pb_stats_candidates(molecule, + cluster_gain_stats, + packer_opts_.feasible_block_array_size, + attraction_groups, + atom_netlist_); + } + } + return; + } + + int min = 0; + int max = num_available_atoms - 1; + + for (int j = 0; j < attraction_group_num_atoms_threshold_; j++) { + // FIXME: This is a non-deterministic random number generator and it is + // overkill to what this needs to be. Should use vtr::irand which + // would be faster. + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distr(min, max); + int selected_atom = distr(gen); + + AtomBlockId blk_id = available_atoms[selected_atom]; + + //Only consider molecules that are unpacked and of the correct type + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); + if (!cluster_legalizer.is_mol_clustered(molecule) && + cluster_legalizer.is_molecule_compatible(molecule, legalization_cluster_id)) { + add_molecule_to_pb_stats_candidates(molecule, + cluster_gain_stats, + packer_opts_.feasible_block_array_size, + attraction_groups, + atom_netlist_); + } + } +} + +/* + * @brief Add blk to list of feasible blocks sorted according to gain. + */ +static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats, + int max_queue_size, + AttractionInfo& attraction_groups, + const AtomNetlist& atom_netlist) { + int num_molecule_failures = 0; + + AttractGroupId cluster_att_grp = cluster_gain_stats.attraction_grp_id; + + /* When the clusterer packs with attraction groups the goal is to + * pack more densely. Removing failed molecules to make room for the exploration of + * more molecules helps to achieve this purpose. + */ + if (attraction_groups.num_attraction_groups() > 0) { + auto got = cluster_gain_stats.atom_failures.find(molecule->atom_block_ids[0]); + if (got == cluster_gain_stats.atom_failures.end()) { + num_molecule_failures = 0; + } else { + num_molecule_failures = got->second; + } + + if (num_molecule_failures > 0) { + remove_molecule_from_pb_stats_candidates(molecule, cluster_gain_stats); + return; + } + } + + for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) { + if (cluster_gain_stats.feasible_blocks[i] == molecule) { + return; // already in queue, do nothing + } + } + + if (cluster_gain_stats.num_feasible_blocks >= max_queue_size - 1) { + /* maximum size for array, remove smallest gain element and sort */ + if (get_molecule_gain(molecule, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist) > get_molecule_gain(cluster_gain_stats.feasible_blocks[0], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist)) { + /* single loop insertion sort */ + int j; + for (j = 0; j < cluster_gain_stats.num_feasible_blocks - 1; j++) { + if (get_molecule_gain(molecule, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist) <= get_molecule_gain(cluster_gain_stats.feasible_blocks[j + 1], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist)) { + cluster_gain_stats.feasible_blocks[j] = molecule; + break; + } else { + cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1]; + } + } + if (j == cluster_gain_stats.num_feasible_blocks - 1) { + cluster_gain_stats.feasible_blocks[j] = molecule; + } + } + } else { + /* Expand array and single loop insertion sort */ + int j; + for (j = cluster_gain_stats.num_feasible_blocks - 1; j >= 0; j--) { + if (get_molecule_gain(cluster_gain_stats.feasible_blocks[j], cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist) > get_molecule_gain(molecule, cluster_gain_stats, cluster_att_grp, attraction_groups, num_molecule_failures, atom_netlist)) { + cluster_gain_stats.feasible_blocks[j + 1] = cluster_gain_stats.feasible_blocks[j]; + } else { + cluster_gain_stats.feasible_blocks[j + 1] = molecule; + break; + } + } + if (j < 0) { + cluster_gain_stats.feasible_blocks[0] = molecule; + } + cluster_gain_stats.num_feasible_blocks++; + } +} + +/* + * @brief Remove blk from list of feasible blocks sorted according to gain. + * + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it. + */ +static void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats) { + int molecule_index; + bool found_molecule = false; + + //find the molecule index + for (int i = 0; i < cluster_gain_stats.num_feasible_blocks; i++) { + if (cluster_gain_stats.feasible_blocks[i] == molecule) { + found_molecule = true; + molecule_index = i; + } + } + + //if it is not in the array, return + if (found_molecule == false) { + return; + } + + //Otherwise, shift the molecules while removing the specified molecule + for (int j = molecule_index; j < cluster_gain_stats.num_feasible_blocks - 1; j++) { + cluster_gain_stats.feasible_blocks[j] = cluster_gain_stats.feasible_blocks[j + 1]; + } + cluster_gain_stats.num_feasible_blocks--; +} + +/* + * @brief Get gain of packing molecule into current cluster. + * + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + */ +static float get_molecule_gain(t_pack_molecule* molecule, + ClusterGainStats& cluster_gain_stats, + AttractGroupId cluster_attraction_group_id, + AttractionInfo& attraction_groups, + int num_molecule_failures, + const AtomNetlist& atom_netlist) { + float gain = 0; + constexpr float attraction_group_penalty = 0.1; + + int num_introduced_inputs_of_indirectly_related_block = 0; + for (int i = 0; i < get_array_size_of_molecule(molecule); i++) { + AtomBlockId blk_id = molecule->atom_block_ids[i]; + if (blk_id) { + if (cluster_gain_stats.gain.count(blk_id) > 0) { + gain += cluster_gain_stats.gain[blk_id]; + } else { + /* This block has no connection with current cluster, penalize molecule for having this block + */ + for (auto pin_id : atom_netlist.block_input_pins(blk_id)) { + auto net_id = atom_netlist.pin_net(pin_id); + VTR_ASSERT(net_id); + + auto driver_pin_id = atom_netlist.net_driver(net_id); + VTR_ASSERT(driver_pin_id); + + auto driver_blk_id = atom_netlist.pin_block(driver_pin_id); + + num_introduced_inputs_of_indirectly_related_block++; + for (int iblk = 0; iblk < get_array_size_of_molecule(molecule); iblk++) { + if (molecule->atom_block_ids[iblk] && driver_blk_id == molecule->atom_block_ids[iblk]) { + //valid block which is driver (and hence not an input) + num_introduced_inputs_of_indirectly_related_block--; + break; + } + } + } + } + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + if (atom_grp_id == cluster_attraction_group_id && cluster_attraction_group_id != AttractGroupId::INVALID()) { + float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); + gain += att_grp_gain; + } else if (cluster_attraction_group_id != AttractGroupId::INVALID() && atom_grp_id != cluster_attraction_group_id) { + gain -= attraction_group_penalty; + } + } + } + + gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ + gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); + + if (num_molecule_failures > 0 && attraction_groups.num_attraction_groups() > 0) { + gain -= 0.1 * num_molecule_failures; + } + + return gain; +} + +void GreedyCandidateSelector::load_transitive_fanout_candidates( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer) { + // iterate over all the nets that have pins in this cluster + for (AtomNetId net_id : cluster_gain_stats.marked_nets) { + // only consider small nets to constrain runtime + if (int(atom_netlist_.net_pins(net_id).size()) > packer_opts_.transitive_fanout_threshold) + continue; + + // iterate over all the pins of the net + for (AtomPinId pin_id : atom_netlist_.net_pins(net_id)) { + AtomBlockId atom_blk_id = atom_netlist_.pin_block(pin_id); + // get the transitive cluster + LegalizationClusterId tclb = cluster_legalizer.get_atom_cluster(atom_blk_id); + // Only consider blocks connected to this pin that are packed in + // another cluster. + if (tclb == legalization_cluster_id || tclb == LegalizationClusterId::INVALID()) + continue; + + // explore transitive nets from already packed cluster + for (AtomNetId tnet : clb_inter_blk_nets_[tclb]) { + // iterate over all the pins of the net + for (AtomPinId tpin : atom_netlist_.net_pins(tnet)) { + AtomBlockId blk_id = atom_netlist_.pin_block(tpin); + // Ignore blocks which have already been packed. + if (cluster_legalizer.is_atom_clustered(blk_id)) + continue; + + // This transitive atom is not packed, score and add + auto& transitive_fanout_candidates = cluster_gain_stats.transitive_fanout_candidates; + + if (cluster_gain_stats.gain.count(blk_id) == 0) { + cluster_gain_stats.gain[blk_id] = 0.001; + } else { + cluster_gain_stats.gain[blk_id] += 0.001; + } + t_pack_molecule* molecule = prepacker.get_atom_molecule(blk_id); + VTR_ASSERT(!cluster_legalizer.is_mol_clustered(molecule)); + transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); + } + } + } + } +} + +t_pack_molecule* GreedyCandidateSelector::get_unrelated_candidate_for_cluster( + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer) { + // Necessary data structures are only allocated in unrelated clustering is + // on. + VTR_ASSERT(allow_unrelated_clustering_); + + /* + * TODO: Analyze if this function is useful in more detail, also, should + * probably not include clock in input count + */ + + size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id); + if (inputs_avail >= unrelated_clustering_data_.size()) { + inputs_avail = unrelated_clustering_data_.size() - 1; + } + + for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { + // Get the molecule by the number of external inputs. + t_pack_molecule* molecule = nullptr; + for (t_pack_molecule* mol : unrelated_clustering_data_[ext_inps]) { + /* TODO: Get better candidate atom block in future, eg. return most timing critical or some other smarter metric */ + if (!cluster_legalizer.is_mol_clustered(mol)) { + /* TODO: I should be using a better filtering check especially when I'm + * dealing with multiple clock/multiple global reset signals where the clock/reset + * packed in matters, need to do later when I have the circuits to check my work */ + if (cluster_legalizer.is_molecule_compatible(mol, cluster_id)) { + molecule = mol; + break; + } + } + } + // If a molecule could be found, return it. + if (molecule != nullptr) + return molecule; + } + + // If no molecule could be found, return nullptr. + return nullptr; +} + +void GreedyCandidateSelector::update_candidate_selector_finalize_cluster( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId cluster_id) { + // store info that will be used later in packing. + for (const AtomNetId mnet_id : cluster_gain_stats.marked_nets) { + int external_terminals = atom_netlist_.net_pins(mnet_id).size() - cluster_gain_stats.num_pins_of_net_in_pb[mnet_id]; + // Check if external terminals of net is within the fanout limit and + // that there exists external terminals. + if (external_terminals < packer_opts_.transitive_fanout_threshold && external_terminals > 0) { + // TODO: This should really not use the cluster_id, it can be a bit + // dangerous since the legalizer may change the IDs of any + // cluster if it wants. Maybe store this information in the + // legalizer instead. + clb_inter_blk_nets_[cluster_id].push_back(mnet_id); + } + } +} + diff --git a/vpr/src/pack/greedy_candidate_selector.h b/vpr/src/pack/greedy_candidate_selector.h new file mode 100644 index 00000000000..881e41a4224 --- /dev/null +++ b/vpr/src/pack/greedy_candidate_selector.h @@ -0,0 +1,520 @@ +/** + * @file + * @author Alex Singer + * @date January 2025 + * @brief The declaration of the greedy candidate selector class which selects + * candidate molecules to pack into the given cluster. This class also + * maintains the gains of packing molecules into clusters. + */ + +#pragma once + +#include +#include +#include +#include "attraction_groups.h" +#include "cluster_legalizer.h" +#include "physical_types.h" +#include "vtr_vector.h" + +// Forward declarations +class AtomNetlist; +class AttractionInfo; +class Prepacker; +class SetupTimingInfo; +class t_pack_high_fanout_thresholds; +class t_pack_molecule; +struct t_model; +struct t_molecule_stats; +struct t_packer_opts; + +/** + * @brief Stats on the gain of a cluster. + * + * This contains information that is updated whenever a molecule is packed into + * a cluster. This information is used to select candidate molecules to pack + * into the given cluster. + */ +struct ClusterGainStats { + /// @brief Attraction (inverse of cost) function. + std::unordered_map gain; + + /// @brief The timing criticality score of this atom. + /// Determined by the most critical atom net between this atom + /// and any atom in the current pb. + std::unordered_map timing_gain; + /// @brief Weighted sum of connections to attraction function. + std::unordered_map connection_gain; + /// @brief How many nets on an atom are already in the pb under + /// consideration. + std::unordered_map sharing_gain; + + /// @brief Stores the number of times atoms have failed to be packed into + /// the cluster. + /// + /// key: root block id of the molecule, value: number of times the molecule + /// has failed to be packed into the cluster. + std::unordered_map atom_failures; + + /// @brief List of nets with the num_pins_of_net_in_pb and gain entries + /// altered (i.e. have some gain-related connection to the current + /// cluster). + std::vector marked_nets; + /// @brief List of blocks with the num_pins_of_net_in_pb and gain entries altered. + std::vector marked_blocks; + + /// @brief If no marked candidate molecules, use this high fanout net to + /// determine the next candidate atom. + AtomNetId tie_break_high_fanout_net; + /// @brief If no marked candidate molecules and no high fanout nets to + /// determine next candidate molecule then explore molecules on + /// transitive fanout. + bool explore_transitive_fanout; + /// @brief Holding transitive fanout candidates key: root block id of the + /// molecule, value: pointer to the molecule. + // TODO: This should be an unordered map, unless stability is desired. + std::map transitive_fanout_candidates; + + /// @brief How many pins of each atom net are contained in the currently open pb? + std::unordered_map num_pins_of_net_in_pb; + + /// @brief The attraction group associated with the cluster. Will be + /// AttractGroupId::INVALID() if no attraction group is associated + /// with the cluster. + AttractGroupId attraction_grp_id; + + /// @brief Array of feasible blocks to select from [0..max_array_size-1] + /// + /// Sorted in ascending gain order so that the last cluster_ctx.blocks is + /// the most desirable (this makes it easy to pop blocks off the list. + std::vector feasible_blocks; + int num_feasible_blocks; +}; + +/** + * @brief A selector class which will propose good candidate molecules to pack + * into the given cluster. This is used to grow clusters in a greedy + * clusterer. + * + * In greedy clustering algorithms, clusters are grown by selecting the + * candidate molecule with the highest gain to pack into it. This class + * calculates and maintains the gains on clusters and their candidates to allow + * it to select which candidate to try next. + * + * Usage: + * + * GreedyCandidateSelector candidate_selector(...); + * + * // ... (Start a new cluster using the cluster legalizer) + * + * // Create an object to hold the gain statistics for the new cluster. + * ClusterGainStats cluster_gain_stats = candidate_selector.create_cluster_gain_stats(...); + * + * // Select a candidate to pack into the cluster using the gain stats. + * t_pack_molecule* candidate_mol = candidate_selector.get_next_candidate_for_cluster(cluster_gain_stats, ...); + * + * // ... (Try to pack the candidate into the cluster) + * + * // Update the cluster gain stats based on if the pack was successful or not. + * if (pack succeeded): + * candidate_selector.update_cluster_gain_stats_candidate_success(cluster_gain_stats, candidate_mol, ...); + * else: + * candidate_selector.candidate_selector.update_cluster_gain_stats_candidate_failed(cluster_gain_stats, candidate_mol); + * + * // Pick a new candidate and continue + * candidate_mol = candidate_selector.get_next_candidate_for_cluster(cluster_gain_stats, ...); + * ... + * + * // Once the cluster is fully packed, finalize the cluster. + * candidate_selector.update_candidate_selector_finalize_cluster(cluster_gain_stats, ...); + */ +class GreedyCandidateSelector { +private: + /// @brief How many unrelated candidates can be proposed and not clustered + /// in a row. So if an unrelated candidate is successfully clustered, + /// the counter is reset. + static constexpr int max_unrelated_clustering_attempts_ = 1; + + /// @brief For high-fanout nets that are ignored, consider a maximum of this + /// many sinks, must be less than packer_opts.feasible_block_array_size. + static constexpr int AAPACK_MAX_HIGH_FANOUT_EXPLORE = 10; + + /// @brief When investigating transitive fanout connections in packing, + /// consider a maximum of this many molecules, must be less than + /// packer_opts.feasible_block_array_size. + static constexpr int AAPACK_MAX_TRANSITIVE_EXPLORE = 40; + + /// @brief When adding cluster molecule candidates by attraction groups, + /// only investigate this many candidates. Some attraction groups can + /// get very large; so this threshold decides when to explore all + /// atoms in the group, or a randomly selected number of them. + static constexpr int attraction_group_num_atoms_threshold_ = 500; + +public: + ~GreedyCandidateSelector(); + + /** + * @brief Constructor of the Greedy Candidate Selector class. Pre-computes + * data used by the candidate selector. + * + * @param atom_netlist + * The netlist of atoms to cluster. + * @param prepacker + * The prepacker used to generate pack-pattern molecules of the + * atoms in the netlist. + * @param packer_opts + * Options passed by the user to configure the packing + * algorithm. Changes how the candidates are selected. + * @param allow_unrelated_clustering + * Enables an algorithm in the selector to look for good + * candidates which are not necessarily connected to the + * cluster. + * @param max_molecule_stats + * The maximum stats over all molecules. Used for normalizing + * terms in the gain. + * @param primitive_candidate_block_types + * Candidate logical block types which are compatible with the + * given primitive model. + * @param high_fanout_thresholds + * The thresholds for what to consider as a high-fanout net + * for each logical block type. The clusterer will not consider + * nets with fanout higher than this to be important. + * @param is_clock + * The set of clock nets in the Atom Netlist. + * @param is_global + * The set of global nets in the Atom Netlist. These will be + * routed on special dedicated networks, and hence are less + * relavent to locality / attraction. + * @param net_output_feeds_driving_block_input + * The set of nets whose output feeds the block that drives + * itself. This may cause double-counting in the gain + * calculations and needs special handling. + * @param timing_info + * Setup timing info for this Atom Netlist. Used to incorporate + * timing / criticality into the gain calculation. + * @param log_verbosity + * The verbosity of log messages in the candidate selector. + */ + GreedyCandidateSelector(const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const t_packer_opts& packer_opts, + bool allow_unrelated_clustering, + const t_molecule_stats& max_molecule_stats, + const std::map>& primitive_candidate_block_types, + const t_pack_high_fanout_thresholds& high_fanout_thresholds, + const std::unordered_set& is_clock, + const std::unordered_set& is_global, + const std::unordered_set& net_output_feeds_driving_block_input, + const SetupTimingInfo& timing_info, + int log_verbosity); + + /** + * @brief Create and initialize the gain stats for the cluster with the + * given cluster_id which was created with the given cluster_seed_mol. + * + * Used when a seed is used to create a new cluster in a greedy clusterer. + * + * The returned ClusterGainStats object is used to store statistics on the + * cluster for selecting the candidate with the highest gain and keep track + * of which molecules have been tried. + * + * @param cluster_seed_mol + * The seed molecule which was used to create the cluster. + * @param cluster_id + * The legalization cluster ID of the cluster. + * @param cluster_legalizer + * The legalizer used to create the cluster. + * @param attraction_groups + * Groups of primitives that have extra attraction to each + * other. + */ + ClusterGainStats create_cluster_gain_stats( + t_pack_molecule* cluster_seed_mol, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /** + * @brief Update the cluster gain stats given that the successful_mol was + * packed successfully into the cluster. + * + * This marks and updates the gain stats for each block in the successfully + * clustered molecule. + * + * @param cluster_gain_stats + * The cluster gain stats to update. + * @param successful_mol + * The molecule which was successfully packed into the cluster. + * @param cluster_id + * The legalization cluster ID of the cluster. + * @param cluster_legalizer + * The legalizer used to create the cluster. + * @param attraction_groups + * Groups of primitives that have extra attraction to each + * other. + */ + void update_cluster_gain_stats_candidate_success( + ClusterGainStats& cluster_gain_stats, + t_pack_molecule* successful_mol, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /** + * @brief Update the cluster gain stats given that the failed_mol was not + * packed successfully into the cluster. + * + * This tracks the failed molecule to help decide future molecules to + * select. + * + * @param cluster_gain_stats + * The cluster gain stats to update. + * @param failed_mol + * The molecule that failed to pack into the cluster. + */ + void update_cluster_gain_stats_candidate_failed( + ClusterGainStats& cluster_gain_stats, + t_pack_molecule* failed_mol); + + /** + * @brief Given the cluster_gain_stats, select the next candidate molecule + * to pack into the given cluster. + * + * This uses the gain stats to find the unclustered molecule which will + * likely have the highest gain. + * + * @param cluster_gain_stats + * The cluster gain stats maintained for this cluster. + * @param cluster_id + * The legalization cluster id for the cluster. + * @param cluster_legalizer + * The legalizer used to create the cluster. + * @param prepacker + * The prepacker used to generate pack-pattern molecules of the + * atoms in the netlist. + * @param attraction_groups + * Groups of primitives that have extra attraction to each + * other. + */ + t_pack_molecule* get_next_candidate_for_cluster( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer, + const Prepacker& prepacker, + AttractionInfo& attraction_groups); + + /** + * @brief Finalize the creation of a cluster. + * + * This should be called after all molecules have been packed into a cluster. + * + * This updates internal lookup tables in the candidate selector. For + * example, what inter-clb nets exist on a cluster are stored by this + * routine to make later transistive gain function calculations more + * efficient. + * + * @param cluster_gain_stats + * The cluster gain stats for the cluster to finalize. + * @param cluster_id + * The legalization cluster id of the cluster to finalize. + */ + void update_candidate_selector_finalize_cluster( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId cluster_id); + +private: + // ===================================================================== // + // Cluster Gain Stats Updating + // ===================================================================== // + + /** + * @brief Flag used to decide if the gains of affected blocks should be + * updated when a block is marked. + */ + enum class e_gain_update : bool { + GAIN, // Update the gains of affected blocks. + NO_GAIN // Do not update the gains of affected blocks. + }; + + /** + * @brief Flag used to indicate if the net is an input or output when + * updating the connection gain values. + */ + enum class e_net_relation_to_clustered_block : bool { + INPUT, // This is an input net. + OUTPUT // This is an output net. + }; + + /** + * @brief Updates the marked data structures, and, if gain_flag is GAIN, the + * gain when an atom block is added to a cluster. + */ + void mark_and_update_partial_gain(ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + e_gain_update gain_flag, + AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, + int high_fanout_net_threshold, + e_net_relation_to_clustered_block net_relation_to_clustered_block); + + /** + * @brief Updates the connection_gain in the cluster_gain_stats. + */ + void update_connection_gain_values(ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + AtomBlockId clustered_blk_id, + const ClusterLegalizer& cluster_legalizer, + e_net_relation_to_clustered_block net_relation_to_clustered_block); + + /** + * Updates the timing_gain in the cluster_gain_stats. + */ + void update_timing_gain_values(ClusterGainStats& cluster_gain_stats, + AtomNetId net_id, + const ClusterLegalizer& cluster_legalizer, + e_net_relation_to_clustered_block net_relation_to_clustered_block); + + /** + * @brief Updates the total gain array to reflect the desired tradeoff + * between input sharing (sharing_gain) and path_length minimization + * (timing_gain) input each time a new molecule is added to the + * cluster. + */ + void update_total_gain(ClusterGainStats& cluster_gain_stats, + AttractionInfo& attraction_groups); + + // ===================================================================== // + // Cluster Candidate Selection + // ===================================================================== // + + /* + * @brief Add molecules with strong connectedness to the current cluster to + * the list of feasible blocks. + */ + void add_cluster_molecule_candidates_by_connectivity_and_timing( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /** + * @brief Score unclustered atoms that are two hops away from current + * cluster + * + * For example, consider a cluster that has a FF feeding an adder in another + * cluster. Since this FF is feeding an adder that is packed in another + * cluster this function should find other FFs that are feeding other inputs + * of this adder since they are two hops away from the FF packed in this + * cluster + * + * This is used when adding molecule candidates by transistive connectivity. + */ + void load_transitive_fanout_candidates( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer); + + /* + * @brief Add molecules based on transitive connections (eg. 2 hops away) + * with current cluster. + */ + void add_cluster_molecule_candidates_by_transitive_connectivity( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /* + * @brief Add molecules based on weak connectedness (connected by high + * fanout nets) with current cluster. + */ + void add_cluster_molecule_candidates_by_highfanout_connectivity( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /* + * @brief If the current cluster being packed has an attraction group + * associated with it (i.e. there are atoms in it that belong to an + * attraction group), this routine adds molecules from the associated + * attraction group to the list of feasible blocks for the cluster. + * + * Attraction groups can be very large, so we only add some randomly + * selected molecules for efficiency if the number of atoms in the group is + * greater than some threshold. Therefore, the molecules added to the + * candidates will vary each time you call this function. + */ + void add_cluster_molecule_candidates_by_attraction_group( + ClusterGainStats& cluster_gain_stats, + LegalizationClusterId legalization_cluster_id, + const Prepacker& prepacker, + const ClusterLegalizer& cluster_legalizer, + AttractionInfo& attraction_groups); + + /** + * @brief Finds a molecule to propose which is unrelated but may be good to + * cluster. + */ + t_pack_molecule* get_unrelated_candidate_for_cluster( + LegalizationClusterId cluster_id, + const ClusterLegalizer& cluster_legalizer); + + // ===================================================================== // + // Internal Variables + // ===================================================================== // + + /// @brief The atom netlist to cluster over. + const AtomNetlist& atom_netlist_; + + /// @brief The packer options used to configure the clusterer. + const t_packer_opts& packer_opts_; + + /// @brief Whether unrelated clustering should be performed or not. + const bool allow_unrelated_clustering_; + + /// @brief The verbosity of log messages in the candidate selector. + const int log_verbosity_; + + /// @brief Pre-computed vector of logical block types that could implement + /// the given model in the architecture. + const std::map>& primitive_candidate_block_types_; + + /// @brief The high-fanout thresholds per logical block type. Used to ignore + /// certain nets when calculating the gain for the next candidate + /// molecule to cluster. + // TODO: This should really be a map from the logical block type to the + // threshold. + const t_pack_high_fanout_thresholds& high_fanout_thresholds_; + + /// @brief A set of atom nets which are considered as clocks. + const std::unordered_set& is_clock_; + + /// @brief A set of atom nets which are considered as global nets. + const std::unordered_set& is_global_; + + /// @brief A set of atom nets which have outputs that feed the block that + /// drive them. + const std::unordered_set& net_output_feeds_driving_block_input_; + + /// @brief Setup timing info used to help select critical candidates to pack. + const SetupTimingInfo& timing_info_; + + /// @brief Inter-block nets within a finalized cluster. Used for finding + /// transitive candidates. + vtr::vector> clb_inter_blk_nets_; + + /// @brief Data pre-computed to help select unrelated molecules. This is a + /// list of list of molecules sorted by their gain, where the first + /// dimension is the number of external outputs of the molecule. + std::vector> unrelated_clustering_data_; + + /// @brief A count on the number of unrelated clustering attempts which + /// have been performed. + int num_unrelated_clustering_attempts_ = 0; +}; + diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp index d8612897069..dd2c9fb4551 100644 --- a/vpr/src/pack/greedy_clusterer.cpp +++ b/vpr/src/pack/greedy_clusterer.cpp @@ -47,6 +47,7 @@ #include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" +#include "greedy_candidate_selector.h" #include "greedy_seed_selector.h" #include "pack_types.h" #include "physical_types.h" @@ -127,19 +128,20 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, // Calculate the max molecule stats, which is used for gain calculation. const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_); - // Initialize the information for the greedy candidate selector. - // TODO: Abstract into a candidate selector class. - /* TODO: This is memory inefficient, fix if causes problems */ - /* Store stats on nets used by packed block, useful for determining transitively connected blocks - * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */ - vtr::vector> clb_inter_blk_nets(atom_netlist_.blocks().size()); - // FIXME: This should be abstracted into a selector class. This is only used - // for gain calculation and selecting candidate molecules. - t_clustering_data clustering_data; - alloc_and_init_clustering(max_molecule_stats, - prepacker, - clustering_data, - clustering_stats.num_molecules); + // Create the greedy candidate selector. This will be used to select + // candidate molecules to add to the clusters. + GreedyCandidateSelector candidate_selector(atom_netlist_, + prepacker, + packer_opts_, + allow_unrelated_clustering, + max_molecule_stats, + primitive_candidate_block_types_, + high_fanout_thresholds_, + is_clock_, + is_global_, + net_output_feeds_driving_block_input_, + *timing_info, + log_verbosity_); // Create the greedy seed selector. GreedySeedSelector seed_selector(atom_netlist_, @@ -174,14 +176,11 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, // route for each molecule (i.e. just use faster but not fully // conservative legality checks). LegalizationClusterId new_cluster_id = try_grow_cluster(seed_mol, + candidate_selector, ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, cluster_legalizer, prepacker, - allow_unrelated_clustering, balance_block_type_utilization, - *timing_info, - clb_inter_blk_nets, - clustering_data, attraction_groups, num_used_type_instances, mutable_device_ctx); @@ -191,20 +190,17 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, // but this time perform full legalization for each molecule added // to the cluster. new_cluster_id = try_grow_cluster(seed_mol, + candidate_selector, ClusterLegalizationStrategy::FULL, cluster_legalizer, prepacker, - allow_unrelated_clustering, balance_block_type_utilization, - *timing_info, - clb_inter_blk_nets, - clustering_data, attraction_groups, num_used_type_instances, mutable_device_ctx); } - // Ensure that at the seed was packed successfully. + // Ensure that the seed was packed successfully. VTR_ASSERT(new_cluster_id.is_valid()); VTR_ASSERT(cluster_legalizer.is_mol_clustered(seed_mol)); @@ -231,25 +227,16 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, // If this architecture has LE physical block, report its usage. report_le_physical_block_usage(cluster_legalizer); - // Free the clustering data. - // FIXME: This struct should use standard data structures so it does not - // have to be freed like this. This is also specific to the candidate - // gain calculation. - free_clustering_data(clustering_data); - return num_used_type_instances; } LegalizationClusterId GreedyClusterer::try_grow_cluster( t_pack_molecule* seed_mol, + GreedyCandidateSelector& candidate_selector, ClusterLegalizationStrategy strategy, ClusterLegalizer& cluster_legalizer, Prepacker& prepacker, - bool allow_unrelated_clustering, bool balance_block_type_utilization, - SetupTimingInfo& timing_info, - vtr::vector>& clb_inter_blk_nets, - t_clustering_data& clustering_data, AttractionInfo& attraction_groups, std::map& num_used_type_instances, DeviceContext& mutable_device_ctx) { @@ -267,36 +254,20 @@ LegalizationClusterId GreedyClusterer::try_grow_cluster( num_used_type_instances, mutable_device_ctx); - int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name); - update_cluster_stats(seed_mol, - cluster_legalizer, - is_clock_, //Set of clock nets - is_global_, //Set of global nets (currently all clocks) - packer_opts_.global_clocks, - packer_opts_.alpha, packer_opts_.beta, - packer_opts_.timing_driven, packer_opts_.connection_driven, - high_fanout_threshold, - timing_info, - attraction_groups, - net_output_feeds_driving_block_input_); - - int num_unrelated_clustering_attempts = 0; - t_pack_molecule *candidate_mol; - candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), - attraction_groups, - allow_unrelated_clustering, - packer_opts_.prioritize_transitive_connectivity, - packer_opts_.transitive_fanout_threshold, - packer_opts_.feasible_block_array_size, - &num_unrelated_clustering_attempts, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - log_verbosity_, - clustering_data.unclustered_list_head, - clustering_data.unclustered_list_head_size, - primitive_candidate_block_types_); + // Create the cluster gain stats. This updates the gains in the candidate + // selector due to a new molecule being clustered. + ClusterGainStats cluster_gain_stats = candidate_selector.create_cluster_gain_stats(seed_mol, + legalization_cluster_id, + cluster_legalizer, + attraction_groups); + + // Select the first candidate molecule to try to add to this cluster. + t_pack_molecule* candidate_mol = candidate_selector.get_next_candidate_for_cluster( + cluster_gain_stats, + legalization_cluster_id, + cluster_legalizer, + prepacker, + attraction_groups); /* * When attraction groups are created, the purpose is to pack more densely by adding more molecules @@ -324,42 +295,31 @@ LegalizationClusterId GreedyClusterer::try_grow_cluster( // If the candidate molecule was clustered successfully, update // the cluster stats. if (success) { - update_cluster_stats(candidate_mol, - cluster_legalizer, - is_clock_, //Set of all clocks - is_global_, //Set of all global signals (currently clocks) - packer_opts_.global_clocks, - packer_opts_.alpha, - packer_opts_.beta, - packer_opts_.timing_driven, - packer_opts_.connection_driven, - high_fanout_threshold, - timing_info, - attraction_groups, - net_output_feeds_driving_block_input_); - num_unrelated_clustering_attempts = 0; + // If the last candidate was clustered successfully, update the + // gains in the candidate selector. + candidate_selector.update_cluster_gain_stats_candidate_success(cluster_gain_stats, + candidate_mol, + legalization_cluster_id, + cluster_legalizer, + attraction_groups); + } else { + // If the last candidate was not clustered successfully, update the + // gains in the candidate selector accordingly. + candidate_selector.update_cluster_gain_stats_candidate_failed(cluster_gain_stats, + candidate_mol); } // Get the next candidate molecule. t_pack_molecule* prev_candidate_mol = candidate_mol; - candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id), - attraction_groups, - allow_unrelated_clustering, - packer_opts_.prioritize_transitive_connectivity, - packer_opts_.transitive_fanout_threshold, - packer_opts_.feasible_block_array_size, - &num_unrelated_clustering_attempts, - prepacker, - cluster_legalizer, - clb_inter_blk_nets, - legalization_cluster_id, - log_verbosity_, - clustering_data.unclustered_list_head, - clustering_data.unclustered_list_head_size, - primitive_candidate_block_types_); + candidate_mol = candidate_selector.get_next_candidate_for_cluster( + cluster_gain_stats, + legalization_cluster_id, + cluster_legalizer, + prepacker, + attraction_groups); // If the next candidate molecule is the same as the previous - // candidate molecule, increment the number of repreated + // candidate molecule, increment the number of repeated // molecules counter. if (candidate_mol == prev_candidate_mol) num_repeated_molecules++; @@ -385,23 +345,13 @@ LegalizationClusterId GreedyClusterer::try_grow_cluster( } } + // A legal cluster must have been created by this point. VTR_ASSERT(legalization_cluster_id.is_valid()); - // Legal cluster was created. Store cluster info and clean cluster. - - // store info that will be used later in packing from pb_stats. - // FIXME: If this is used for gain, it should be moved into the selector - // class. Perhaps a finalize_cluster_gain method. - t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id); - t_pb_stats* pb_stats = cur_pb->pb_stats; - for (const AtomNetId mnet_id : pb_stats->marked_nets) { - int external_terminals = atom_netlist_.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; - // Check if external terminals of net is within the fanout limit and - // that there exists external terminals. - if (external_terminals < packer_opts_.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id); - } - } + // After the cluster has been fully created, update internal structures + // to improve the gain calculation. + candidate_selector.update_candidate_selector_finalize_cluster(cluster_gain_stats, + legalization_cluster_id); // Since the cluster will no longer be added to beyond this point, // clean the cluster of any data not strictly necessary for diff --git a/vpr/src/pack/greedy_clusterer.h b/vpr/src/pack/greedy_clusterer.h index 6df695b3336..eb1dff3afdf 100644 --- a/vpr/src/pack/greedy_clusterer.h +++ b/vpr/src/pack/greedy_clusterer.h @@ -13,13 +13,13 @@ #include #include "cluster_legalizer.h" #include "physical_types.h" -#include "vtr_vector.h" // Forward declarations class AtomNetId; class AtomNetlist; class AttractionInfo; class DeviceContext; +class GreedyCandidateSelector; class Prepacker; class SetupTimingInfo; class t_pack_high_fanout_thresholds; @@ -107,12 +107,12 @@ class GreedyClusterer { * have multiple logical block types to which they can cluster, * e.g. multiple sizes of physical RAMs exist on the chip. * @param attraction_groups - * clustering process. These are groups of primitives that have - * extra attraction to each other; currently they are used to - * guide the clusterer when it must cluster some parts of a - * design densely due to user placement/floorplanning - * constraints. They are created if some floorplan regions are - * overfilled after a clustering attempt. + * These are groups of primitives that have extra attraction to + * each other; currently they are used to guide the clusterer + * when it must cluster some parts of a design densely due to + * user placement/floorplanning constraints. They are created + * if some floorplan regions are overfilled after a clustering + * attempt. * @param mutable_device_ctx * The mutable device context. The clusterer will modify the * device context by potentially increasing the size of the @@ -149,14 +149,11 @@ class GreedyClusterer { * can exist in a cluster), so it will always return a valid cluster ID. */ LegalizationClusterId try_grow_cluster(t_pack_molecule* seed_mol, + GreedyCandidateSelector& candidate_selector, ClusterLegalizationStrategy strategy, ClusterLegalizer& cluster_legalizer, Prepacker& prepacker, - bool allow_unrelated_clustering, bool balance_block_type_utilization, - SetupTimingInfo& timing_info, - vtr::vector>& clb_inter_blk_nets, - t_clustering_data& clustering_data, AttractionInfo& attraction_groups, std::map& num_used_type_instances, DeviceContext& mutable_device_ctx); diff --git a/vpr/src/pack/greedy_seed_selector.cpp b/vpr/src/pack/greedy_seed_selector.cpp index 6421cef5f19..24a57930f71 100644 --- a/vpr/src/pack/greedy_seed_selector.cpp +++ b/vpr/src/pack/greedy_seed_selector.cpp @@ -10,7 +10,6 @@ #include #include "atom_netlist.h" #include "cluster_legalizer.h" -#include "cluster_util.h" #include "echo_files.h" #include "prepack.h" #include "vpr_error.h" @@ -44,7 +43,7 @@ static inline float get_seed_gain(AtomBlockId blk_id, case e_cluster_seed::MAX_INPUTS: { const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk_id); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_netlist); + const t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(blk_mol, atom_netlist); return molecule_stats.num_used_ext_inputs; } // By blended gain (criticality and inputs used). @@ -56,7 +55,7 @@ static inline float get_seed_gain(AtomBlockId blk_id, float seed_blend_fac = 0.5f; const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk_id); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_netlist); + const t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(blk_mol, atom_netlist); VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); float used_ext_input_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_inputs, max_molecule_stats.num_used_ext_inputs); @@ -71,7 +70,7 @@ static inline float get_seed_gain(AtomBlockId blk_id, case e_cluster_seed::MAX_PINS: { const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk_id); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_netlist); + const t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(blk_mol, atom_netlist); return molecule_stats.num_pins; } // By input pins per molecule (i.e. available pins on primitives, not pins in use). @@ -80,13 +79,13 @@ static inline float get_seed_gain(AtomBlockId blk_id, case e_cluster_seed::MAX_INPUT_PINS: { const t_pack_molecule* blk_mol = prepacker.get_atom_molecule(blk_id); - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol, atom_netlist); + const t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(blk_mol, atom_netlist); return molecule_stats.num_input_pins; } case e_cluster_seed::BLEND2: { const t_pack_molecule* mol = prepacker.get_atom_molecule(blk_id); - const t_molecule_stats molecule_stats = calc_molecule_stats(mol, atom_netlist); + const t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(mol, atom_netlist); float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index cb27a23e831..e9ba3f7f7e3 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -1,6 +1,7 @@ #include #include "SetupGrid.h" +#include "attraction_groups.h" #include "cluster_legalizer.h" #include "cluster_util.h" #include "constraints_report.h" diff --git a/vpr/src/pack/pack_types.h b/vpr/src/pack/pack_types.h index 95a460751b1..44fa9107966 100644 --- a/vpr/src/pack/pack_types.h +++ b/vpr/src/pack/pack_types.h @@ -36,50 +36,8 @@ const std::vector lb_rr_type_str{ /* Stores statistical information for a physical cluster_ctx.blocks such as costs and usages */ struct t_pb_stats { - /* Packing statistics */ - std::map gain; /* Attraction (inverse of cost) function */ - - std::map timinggain; /* The timing criticality score of this atom cluster_ctx.blocks. - * Determined by the most critical atom net - * between this atom cluster_ctx.blocks and any atom cluster_ctx.blocks in - * the current pb */ - std::map connectiongain; /* Weighted sum of connections to attraction function */ - std::map sharinggain; /* How many nets on an atom cluster_ctx.blocks are already in the pb under consideration */ - - /* This is the gain used for hill-climbing. It stores* - * the reduction in the number of pins that adding this atom cluster_ctx.blocks to the the* - * current pb will have. This reflects the fact that sometimes the * - * addition of an atom cluster_ctx.blocks to a pb may reduce the number of inputs * - * required if it shares inputs with all other BLEs and it's output is * - * used by all other child pbs in this parent pb. */ - std::map hillgain; - - /* - * stores the number of times atoms have failed to be packed into the cluster - * key: root block id of the molecule, value: number of times the molecule has failed to be packed into the cluster - */ - std::map atom_failures; - - int pulled_from_atom_groups; - int num_att_group_atoms_used; - - std::vector available_att_group_atoms; - - std::vector marked_nets; //List of nets with the num_pins_of_net_in_pb and gain entries altered - std::vector marked_blocks; //List of blocks with the num_pins_of_net_in_pb and gain entries altered - int num_child_blocks_in_pb; - AtomNetId tie_break_high_fanout_net; /* If no marked candidate molecules, use - * this high fanout net to determine the - * next candidate atom */ - bool explore_transitive_fanout; /* If no marked candidate molecules and no high fanout nets to determine next candidate molecule then explore molecules on transitive fanout */ - std::map transitive_fanout_candidates; // Holding trasitive fanout candidates key: root block id of the molecule, value: pointer to the molecule - - /* How many pins of each atom net are contained in the * - * currently open pb? */ - std::map num_pins_of_net_in_pb; - /* Record of pins of class used */ std::vector> input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this input pin class */ std::vector> output_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this output pin class */ @@ -87,16 +45,6 @@ struct t_pb_stats { /* Use vector because array size is expected to be small so runtime should be faster using vector than map despite the O(N) vs O(log(n)) behaviour.*/ std::vector> lookahead_input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] vector of input pins of this class that are speculatively used */ std::vector> lookahead_output_pins_used; /* [0..pb_graph_node->num_pin_classes-1] vector of input pins of this class that are speculatively used */ - - //The attraction group associated with the cluster. - //Will be AttractGroupId::INVALID() if no attraction group is associated with the cluster. - AttractGroupId attraction_grp_id; - - /* Array of feasible blocks to select from [0..max_array_size-1] - * Sorted in ascending gain order so that the last cluster_ctx.blocks is the most desirable (this makes it easy to pop blocks off the list - */ - t_pack_molecule** feasible_blocks; - int num_feasible_blocks; /* [0..num_marked_models-1] */ }; /************************************************************************** diff --git a/vpr/src/pack/prepack.cpp b/vpr/src/pack/prepack.cpp index f7ade02d767..9cda9de0360 100644 --- a/vpr/src/pack/prepack.cpp +++ b/vpr/src/pack/prepack.cpp @@ -20,7 +20,6 @@ #include #include "atom_netlist.h" -#include "cluster_util.h" #include "echo_files.h" #include "physical_types.h" #include "vpr_error.h" @@ -1730,6 +1729,78 @@ void Prepacker::init(const AtomNetlist& atom_nlist, const std::vectoratom_block_ids) { + if (!blk) continue; + + ++molecule_stats.num_blocks; //Record number of valid blocks in molecule + + const t_model* model = atom_nlist.block_model(blk); + + for (const t_model_ports* input_port = model->inputs; input_port != nullptr; input_port = input_port->next) { + molecule_stats.num_input_pins += input_port->size; + } + + for (const t_model_ports* output_port = model->outputs; output_port != nullptr; output_port = output_port->next) { + molecule_stats.num_output_pins += output_port->size; + } + } + molecule_stats.num_pins = molecule_stats.num_input_pins + molecule_stats.num_output_pins; + + //Calculate the number of externally used pins + std::set molecule_atoms(molecule->atom_block_ids.begin(), molecule->atom_block_ids.end()); + for (auto blk : molecule->atom_block_ids) { + if (!blk) continue; + + for (auto pin : atom_nlist.block_pins(blk)) { + auto net = atom_nlist.pin_net(pin); + + auto pin_type = atom_nlist.pin_type(pin); + if (pin_type == PinType::SINK) { + auto driver_blk = atom_nlist.net_driver_block(net); + + if (molecule_atoms.count(driver_blk)) { + //Pin driven by a block within the molecule + //Does not count as an external connection + } else { + //Pin driven by a block outside the molecule + ++molecule_stats.num_used_ext_inputs; + } + + } else { + VTR_ASSERT(pin_type == PinType::DRIVER); + + bool net_leaves_molecule = false; + for (auto sink_pin : atom_nlist.net_sinks(net)) { + auto sink_blk = atom_nlist.pin_block(sink_pin); + + if (!molecule_atoms.count(sink_blk)) { + //There is at least one sink outside of the current molecule + net_leaves_molecule = true; + break; + } + } + + //We assume that any fanout occurs outside of the molecule, hence we only + //count one used output (even if there are multiple sinks outside the molecule) + if (net_leaves_molecule) { + ++molecule_stats.num_used_ext_outputs; + } + } + } + } + molecule_stats.num_used_ext_pins = molecule_stats.num_used_ext_inputs + molecule_stats.num_used_ext_outputs; + + return molecule_stats; +} + t_molecule_stats Prepacker::calc_max_molecule_stats(const AtomNetlist& atom_nlist) const { t_molecule_stats max_molecules_stats; t_pack_molecule* molecule_head = list_of_pack_molecules; diff --git a/vpr/src/pack/prepack.h b/vpr/src/pack/prepack.h index 74c1071a907..810c79bd19c 100644 --- a/vpr/src/pack/prepack.h +++ b/vpr/src/pack/prepack.h @@ -20,6 +20,36 @@ class AtomBlockId; struct t_molecule_stats; struct t_logical_block_type; +/** + * @brief Statistics on a molecule. + * + * This is used during packing to quickly look up information on a molecule to + * compute gain information. + * + * TODO: The prepacker should precompute this information per molecule. + */ +struct t_molecule_stats { + /// @brief Number of blocks across all primitives in molecule. + int num_blocks = 0; + + /// @brief Number of pins across all primitives in molecule. + int num_pins = 0; + /// @brief Number of input pins across all primitives in molecule. + int num_input_pins = 0; + /// @brief Number of output pins across all primitives in molecule. + int num_output_pins = 0; + + /// @brief Number of *used external* (i.e. come from outside of the + /// molecule) pins across all primitives in molecle. + int num_used_ext_pins = 0; + /// @brief Number of *used external* input pins across all primitives in + /// molecule. + int num_used_ext_inputs = 0; + /// @brief Number of *used external* output pins across all primitives in + /// molecule. + int num_used_ext_outputs = 0; +}; + /** * @brief Class that performs prepacking. * @@ -119,6 +149,12 @@ class Prepacker { return molecules; } + /* + * @brief Calculates molecule statistics for a single molecule. + */ + t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule, + const AtomNetlist& atom_netlist) const; + /** * @brief Calculates maximum molecule statistics accross all molecules, */ diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index c2aa98286c0..7d820c86e5d 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1419,24 +1419,7 @@ void free_pb(t_pb* pb) { } void free_pb_stats(t_pb* pb) { - if (pb) { - if (pb->pb_stats == nullptr) { - return; - } - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->num_pins_of_net_in_pb.clear(); - - if (pb->pb_stats->feasible_blocks) { - delete[] pb->pb_stats->feasible_blocks; - } - if (!pb->parent_pb) { - pb->pb_stats->transitive_fanout_candidates.clear(); - } + if (pb && pb->pb_stats != nullptr) { delete pb->pb_stats; pb->pb_stats = nullptr; }