diff --git a/vpr/src/base/read_netlist.cpp b/vpr/src/base/read_netlist.cpp index f9d0be47641..7a328565882 100644 --- a/vpr/src/base/read_netlist.cpp +++ b/vpr/src/base/read_netlist.cpp @@ -60,7 +60,6 @@ static size_t mark_constant_generators_rec(const t_pb* pb, const t_pb_routes& pb static t_pb_routes alloc_pb_route(t_pb_graph_node* pb_graph_node); static void load_atom_pin_mapping(const ClusteredNetlist& clb_nlist); -static void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin); /** * @brief Initializes the clb_nlist with info from a netlist @@ -1219,7 +1218,7 @@ static void load_atom_pin_mapping(const ClusteredNetlist& clb_nlist) { } } -static void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin) { +void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin) { auto& atom_ctx = g_vpr_ctx.mutable_atom(); VTR_ASSERT(atom_ctx.nlist.port_block(atom_port) == atom_blk); diff --git a/vpr/src/base/read_netlist.h b/vpr/src/base/read_netlist.h index 186dc77ca62..e430f278bd5 100644 --- a/vpr/src/base/read_netlist.h +++ b/vpr/src/base/read_netlist.h @@ -17,4 +17,9 @@ ClusteredNetlist read_netlist(const char* net_file, bool verify_file_digests, int verbosity); +void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, + const AtomBlockId atom_blk, + const AtomPortId atom_port, + const t_pb_graph_pin* gpin); + #endif diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index f4d759a862e..d2b55121543 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -348,6 +348,9 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a } fflush(stdout); + + auto& helper_ctx = g_vpr_ctx.mutable_helper(); + helper_ctx.lb_type_rr_graphs = vpr_setup->PackerRRGraph; } bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) { @@ -382,6 +385,14 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) { { //Analysis vpr_analysis_flow(vpr_setup, arch, route_status); } + + //clean packing-placement data + if (vpr_setup.PackerOpts.doPacking == STAGE_DO) { + auto& helper_ctx = g_vpr_ctx.mutable_helper(); + free_cluster_placement_stats(helper_ctx.cluster_placement_stats); + } + + //close the graphics vpr_close_graphics(vpr_setup); return route_status.success(); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index d4f5a3a221e..750179f5d95 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -55,12 +55,18 @@ struct AtomContext : public Context { /******************************************************************** * Atom Netlist ********************************************************************/ - + AtomContext() + : list_of_pack_molecules(nullptr, free_pack_molecules) {} ///@brief Atom netlist AtomNetlist nlist; ///@brief Mappings to/from the Atom Netlist to physically described .blif models AtomLookup lookup; + + ///@brief The molecules associated with each atom block + std::multimap atom_molecules; + + std::unique_ptr list_of_pack_molecules; }; /** @@ -259,6 +265,26 @@ struct ClusteringContext : public Context { */ std::map> post_routing_clb_pin_nets; std::map> pre_routing_net_pin_mapping; + + std::map num_used_type_instances; +}; + +struct ClusteringHelperContext : public Context { + std::map num_used_type_instances; + t_cluster_placement_stats* cluster_placement_stats; + int num_models; + int max_cluster_size; + t_pb_graph_node** primitives_list; + + bool enable_pin_feasibility_filter; + int feasible_block_array_size; + + int total_clb_num; + std::vector* lb_type_rr_graphs; + + ~ClusteringHelperContext() { + free(primitives_list); + } }; /** @@ -446,6 +472,9 @@ class VprContext : public Context { const ClusteringContext& clustering() const { return clustering_; } ClusteringContext& mutable_clustering() { return clustering_; } + const ClusteringHelperContext& helper() const { return helper_; } + ClusteringHelperContext& mutable_helper() { return helper_; } + const PlacementContext& placement() const { return placement_; } PlacementContext& mutable_placement() { return placement_; } @@ -464,6 +493,8 @@ class VprContext : public Context { PowerContext power_; ClusteringContext clustering_; + ClusteringHelperContext helper_; + PlacementContext placement_; RoutingContext routing_; FloorplanningContext constraints_; diff --git a/vpr/src/base/vpr_types.cpp b/vpr/src/base/vpr_types.cpp index 5ba8f4910f5..5b74779893e 100644 --- a/vpr/src/base/vpr_types.cpp +++ b/vpr/src/base/vpr_types.cpp @@ -1,5 +1,6 @@ #include #include "vpr_types.h" +#include "globals.h" t_ext_pin_util_targets::t_ext_pin_util_targets(float default_in_util, float default_out_util) { defaults_.input_pin_util = default_in_util; @@ -213,3 +214,53 @@ BitIndex t_pb::atom_pin_bit_index(const t_pb_graph_pin* gpin) const { void t_pb::set_atom_pin_bit_index(const t_pb_graph_pin* gpin, BitIndex atom_pin_bit_idx) { pin_rotations_[gpin] = atom_pin_bit_idx; } + +void free_pack_molecules(t_pack_molecule* list_of_pack_molecules) { + t_pack_molecule* cur_pack_molecule = list_of_pack_molecules; + while (cur_pack_molecule != nullptr) { + cur_pack_molecule = list_of_pack_molecules->next; + delete list_of_pack_molecules; + list_of_pack_molecules = cur_pack_molecule; + } +} + +/** + * Free linked lists found in cluster_placement_stats_list + */ +void free_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats_list) { + t_cluster_placement_primitive *cur, *next; + auto& device_ctx = g_vpr_ctx.device(); + + for (const auto& type : device_ctx.logical_block_types) { + int index = type.index; + cur = cluster_placement_stats_list[index].tried; + while (cur != nullptr) { + next = cur->next_primitive; + free(cur); + cur = next; + } + cur = cluster_placement_stats_list[index].in_flight; + while (cur != nullptr) { + next = cur->next_primitive; + free(cur); + cur = next; + } + cur = cluster_placement_stats_list[index].invalid; + while (cur != nullptr) { + next = cur->next_primitive; + free(cur); + cur = next; + } + for (int j = 0; j < cluster_placement_stats_list[index].num_pb_types; j++) { + cur = cluster_placement_stats_list[index].valid_primitives[j]->next_primitive; + while (cur != nullptr) { + next = cur->next_primitive; + free(cur); + cur = next; + } + free(cluster_placement_stats_list[index].valid_primitives[j]); + } + free(cluster_placement_stats_list[index].valid_primitives); + } + free(cluster_placement_stats_list); +} \ No newline at end of file diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index f469e76dbc4..75ce30c031a 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1654,4 +1654,9 @@ class RouteStatus { typedef vtr::vector>> t_clb_opins_used; //[0..num_blocks-1][0..class-1][0..used_pins-1] +typedef std::vector> t_arch_switch_fanin; + +void free_pack_molecules(t_pack_molecule* list_of_pack_molecules); +void free_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats); + #endif diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index bf6354adda3..b7804c6e2bd 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -71,11 +71,9 @@ #include "tatum/report/graphviz_dot_writer.hpp" #include "tatum/TimingReporter.hpp" +#include "re_cluster_util.h" #include "constraints_report.h" -#define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ -#define AAPACK_MAX_TRANSITIVE_EXPLORE 40 /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ - /* * When attraction groups are created, the purpose is to pack more densely by adding more molecules * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are @@ -86,345 +84,12 @@ */ #define ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES 500 -//Constant allowing all cluster pins to be used -const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); - -/* Keeps a linked list of the unclustered blocks to speed up looking for * - * unclustered blocks with a certain number of *external* inputs. * - * [0..lut_size]. Unclustered_list_head[i] points to the head of the * - * list of blocks with i inputs to be hooked up via external interconnect. */ -static t_molecule_link* unclustered_list_head; -int unclustered_list_head_size; -static t_molecule_link* memory_pool; /*Declared here so I can free easily.*/ - -/* Does the atom block that drives the output of this atom net also appear as a * - * receiver (input) pin of the atom net? If so, then by how much? - * - * This is used in the gain routines to avoid double counting the connections from * - * the current cluster to other blocks (hence yielding better clusterings). * - * The only time an atom block should connect to the same atom net * - * twice is when one connection is an output and the other is an input, * - * so this should take care of all multiple connections. */ -static std::unordered_map net_output_feeds_driving_block_input; - -/*****************************************/ -/*local functions*/ -/*****************************************/ - -#if 0 -static void check_for_duplicate_inputs (); -#endif - -static bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); - -static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, - std::map& gain, - t_pb* pb, - int max_queue_size, - AttractionInfo& attraction_groups); - -static void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, - t_pb* pb); - -static void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, - t_pack_molecule* molecules_head, - int num_molecules); - -static void free_pb_stats_recursive(t_pb* pb); - -static void try_update_lookahead_pins_used(t_pb* cur_pb); - -static void reset_lookahead_pins_used(t_pb* cur_pb); - -static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id); - -static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, - const t_pb* primitive_pb, - const AtomNetId net_id); - -static void commit_lookahead_pins_used(t_pb* cur_pb); - -static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util); - -static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb); - -static bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_memory_blk); - -static t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, - const enum e_removal_policy remove_flag, - t_cluster_placement_stats* cluster_placement_stats_ptr); - -static t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr); - -static void print_pack_status_header(); - -static void print_pack_status(int num_clb, - int tot_num_molecules, - int num_molecules_processed, - int& mols_since_last_print, - int device_width, - int device_height, - AttractionInfo& attraction_groups); - -static void rebuild_attraction_groups(AttractionInfo& attraction_groups); - -static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); - -static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - const int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr); - -static void try_fill_cluster(const t_packer_opts& packer_opts, - t_cluster_placement_stats* cur_cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - t_pack_molecule*& prev_molecule, - t_pack_molecule*& next_molecule, - int& num_same_molecules, - t_pb_graph_node** primitives_list, - t_cluster_progress_stats& cluster_stats, - int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, - AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, - bool allow_unrelated_clustering, - const int& high_fanout_threshold, - const std::unordered_set& is_clock, - const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_external_pin_util, - PartitionRegion& temp_cluster_pr, - std::map>& primitive_candidate_block_types, - e_block_pack_status& block_pack_status); - -static t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const std::multimap& atom_molecules, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data); - -static void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, - const t_logical_block_type_ptr logic_block_type, - const t_pb_type* le_pb_type, - std::vector& le_count, - vtr::vector>& clb_inter_blk_nets); - -static void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - const std::multimap& atom_molecules, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex); - -static enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size); - -static enum e_block_pack_status atom_cluster_floorplanning_check(const AtomBlockId blk_id, - const ClusterBlockId clb_index, - const int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update); - -static void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data, const std::multimap& atom_molecules); - -static void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block); - -static void update_timing_gain_values(const AtomNetId net_id, - t_pb* cur_pb, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global); - -static void mark_and_update_partial_gain(const AtomNetId inet, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, const int high_fanout_net_threshold); - -static void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); - -static void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const bool global_clocks, - const float alpha, - const float beta, - const bool timing_driven, - const bool connection_driven, - const int high_fanout_net_threshold, - const SetupTimingInfo& timing_info, - AttractionInfo& attraction_groups); - -static void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - const std::multimap& atom_molecules, - ClusterBlockId clb_index, - t_pack_molecule* molecule, - std::map& num_used_type_instances, - const float target_device_utilization, - const int num_models, - const int max_cluster_size, - const t_arch* arch, - std::string device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, - const std::map>& primitive_candidate_block_types, - int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr); - -static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const enum e_gain_type gain_mode, - t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, - bool prioritize_transitive_connectivity, - int transitive_fanout_threshold, - const int feasible_block_array_size, - std::map>& primitive_candidate_block_types); - -static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -static void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const int feasible_block_array_size, - ClusterBlockId clb_index, - std::map>& primitive_candidate_block_types); - -static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, - int transitive_fanout_threshold, - const int feasible_block_array_size, - AttractionInfo& attraction_groups); - -static bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); - -static t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const bool allow_unrelated_clustering, - const bool prioritize_transitive_connectivity, - const int transitive_fanout_threshold, - const int feasible_block_array_size, - int* num_unrelated_clustering_attempts, - t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, - int verbosity, - std::map>& primitive_candidate_block_types); - -static void mark_all_molecules_valid(t_pack_molecule* molecule_head); - -static int count_molecules(t_pack_molecule* molecule_head); - -static t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule); - -static t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head); - -static std::vector initialize_seed_atoms(const e_cluster_seed seed_type, - const std::multimap& atom_molecules, - const t_molecule_stats& max_molecule_stats, - const vtr::vector& atom_criticality); - -static t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std::multimap& atom_molecules, const std::vector seed_atoms); - -static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); -static int compare_molecule_gain(const void* a, const void* b); -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); - -static void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); - -static void load_transitive_fanout_candidates(ClusterBlockId cluster_index, - const std::multimap& atom_molecules, - t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, - int transitive_fanout_threshold); - -static std::map> identify_primitive_candidate_block_types(); - -static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive); - -static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id); - -static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id); - -static size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); - -static void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); - -static void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); - -static t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); - -static t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type); - -static bool pb_used_for_blif_model(const t_pb* pb, std::string blif_model_name); - -static void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); - -static t_pb* get_top_level_pb(t_pb* pb); - -/*****************************************/ -/*globally accessible function*/ std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, const t_arch* arch, t_pack_molecule* molecule_head, int num_models, const std::unordered_set& is_clock, - std::multimap& atom_molecules, const std::unordered_map& expected_lowest_cost_pb_gnode, bool allow_unrelated_clustering, bool balance_block_type_utilization, @@ -432,7 +97,8 @@ std::map do_clustering(const t_packer_opts& pa const t_ext_pin_util_targets& ext_pin_util_targets, const t_pack_high_fanout_thresholds& high_fanout_thresholds, AttractionInfo& attraction_groups, - bool& floorplan_regions_overfull) { + bool& floorplan_regions_overfull, + t_clustering_data& clustering_data) { /* Does the actual work of clustering multiple netlist blocks * * into clusters. */ @@ -457,12 +123,15 @@ std::map do_clustering(const t_packer_opts& pa t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, - int num_clb, num_blocks_hill_added, max_cluster_size, max_pb_depth, + int num_blocks_hill_added, max_pb_depth, seedindex, savedseedindex /* index of next most timing critical block */, - detailed_routing_stage, *hill_climbing_inputs_avail; + detailed_routing_stage; const int verbosity = packer_opts.pack_verbosity; + int unclustered_list_head_size; + std::unordered_map net_output_feeds_driving_block_input; + cluster_stats.num_molecules_processed = 0; cluster_stats.mols_since_last_print = 0; @@ -471,16 +140,17 @@ std::map do_clustering(const t_packer_opts& pa bool is_cluster_legal; enum e_block_pack_status block_pack_status; - t_cluster_placement_stats *cluster_placement_stats, *cur_cluster_placement_stats_ptr; - t_pb_graph_node** primitives_list; + t_cluster_placement_stats* cur_cluster_placement_stats_ptr; t_lb_router_data* router_data = nullptr; t_pack_molecule *istart, *next_molecule, *prev_molecule; auto& atom_ctx = g_vpr_ctx.atom(); auto& device_ctx = g_vpr_ctx.mutable_device(); auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& helper_ctx = g_vpr_ctx.mutable_helper(); - vtr::vector*> intra_lb_routing; + helper_ctx.enable_pin_feasibility_filter = packer_opts.enable_pin_feasibility_filter; + helper_ctx.feasible_block_array_size = packer_opts.feasible_block_array_size; std::shared_ptr clustering_delay_calc; std::shared_ptr timing_info; @@ -495,7 +165,7 @@ std::map do_clustering(const t_packer_opts& pa // Index 2 holds the number of LEs that are used for registers only. std::vector le_count(3, 0); - num_clb = 0; + helper_ctx.total_clb_num = 0; /* TODO: This is memory inefficient, fix if causes problems */ /* Store stats on nets used by packed block, useful for determining transitively connected blocks @@ -505,7 +175,7 @@ std::map do_clustering(const t_packer_opts& pa istart = nullptr; /* determine bound on cluster size and primitive input size */ - max_cluster_size = 0; + helper_ctx.max_cluster_size = 0; max_pb_depth = 0; seedindex = 0; @@ -516,21 +186,22 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.num_molecules = count_molecules(molecule_head); - get_max_cluster_size_and_pb_depth(max_cluster_size, max_pb_depth); + get_max_cluster_size_and_pb_depth(helper_ctx.max_cluster_size, max_pb_depth); if (packer_opts.hill_climbing_flag) { - hill_climbing_inputs_avail = (int*)vtr::calloc(max_cluster_size + 1, - sizeof(int)); + clustering_data.hill_climbing_inputs_avail = (int*)vtr::calloc(helper_ctx.max_cluster_size + 1, + sizeof(int)); } else { - hill_climbing_inputs_avail = nullptr; /* if used, die hard */ + clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */ } #if 0 check_for_duplicate_inputs (); #endif alloc_and_init_clustering(max_molecule_stats, - &cluster_placement_stats, &primitives_list, molecule_head, - cluster_stats.num_molecules); + &(helper_ctx.cluster_placement_stats), &(helper_ctx.primitives_list), molecule_head, + clustering_data, net_output_feeds_driving_block_input, + unclustered_list_head_size, cluster_stats.num_molecules); auto primitive_candidate_block_types = identify_primitive_candidate_block_types(); // find the cluster type that has lut primitives @@ -541,7 +212,7 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.blocks_since_last_analysis = 0; num_blocks_hill_added = 0; - VTR_ASSERT(max_cluster_size < MAX_SHORT); + VTR_ASSERT(helper_ctx.max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */ //Default criticalities set to zero (e.g. if not timing driven) @@ -552,9 +223,9 @@ std::map do_clustering(const t_packer_opts& pa clustering_delay_calc, timing_info, atom_criticality); } - auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, atom_molecules, max_molecule_stats, atom_criticality); + auto seed_atoms = initialize_seed_atoms(packer_opts.cluster_seed_type, max_molecule_stats, atom_criticality); - istart = get_highest_gain_seed_molecule(&seedindex, atom_molecules, seed_atoms); + istart = get_highest_gain_seed_molecule(&seedindex, seed_atoms); print_pack_status_header(); @@ -566,20 +237,20 @@ std::map do_clustering(const t_packer_opts& pa is_cluster_legal = false; savedseedindex = seedindex; for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) { - ClusterBlockId clb_index(num_clb); + ClusterBlockId clb_index(helper_ctx.total_clb_num); - VTR_LOGV(verbosity > 2, "Complex block %d:\n", num_clb); + VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); /*Used to store cluster's PartitionRegion as primitives are added to it. * Since some of the primitives might fail legality, this structure temporarily * stores PartitionRegion information while the cluster is packed*/ PartitionRegion temp_cluster_pr; - start_new_cluster(cluster_placement_stats, primitives_list, - atom_molecules, clb_index, istart, + start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, + clb_index, istart, num_used_type_instances, packer_opts.target_device_utilization, - num_models, max_cluster_size, + num_models, helper_ctx.max_cluster_size, arch, packer_opts.device_layout, lb_type_rr_graphs, &router_data, detailed_routing_stage, &cluster_ctx.clb_nlist, @@ -593,7 +264,7 @@ std::map do_clustering(const t_packer_opts& pa //initial molecule in cluster has been processed cluster_stats.num_molecules_processed++; cluster_stats.mols_since_last_print++; - print_pack_status(num_clb, + print_pack_status(helper_ctx.total_clb_num, cluster_stats.num_molecules, cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, @@ -602,7 +273,7 @@ std::map do_clustering(const t_packer_opts& pa attraction_groups); VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", num_clb, + "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, cluster_ctx.clb_nlist.block_name(clb_index).c_str(), cluster_ctx.clb_nlist.block_type(clb_index)->name); VTR_LOGV(verbosity > 2, "."); @@ -619,18 +290,18 @@ std::map do_clustering(const t_packer_opts& pa packer_opts.timing_driven, packer_opts.connection_driven, high_fanout_threshold, *timing_info, - attraction_groups); - num_clb++; + attraction_groups, + net_output_feeds_driving_block_input); + helper_ctx.total_clb_num++; if (packer_opts.timing_driven) { cluster_stats.blocks_since_last_analysis++; /*it doesn't make sense to do a timing analysis here since there* *is only one atom block clustered it would not change anything */ } - cur_cluster_placement_stats_ptr = &cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]; + cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); cluster_stats.num_unrelated_clustering_attempts = 0; next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), - atom_molecules, attraction_groups, allow_unrelated_clustering, packer_opts.prioritize_transitive_connectivity, @@ -640,7 +311,9 @@ std::map do_clustering(const t_packer_opts& pa cur_cluster_placement_stats_ptr, clb_inter_blk_nets, clb_index, - packer_opts.pack_verbosity, + verbosity, + clustering_data.unclustered_list_head, + unclustered_list_head_size, primitive_candidate_block_types); prev_molecule = istart; @@ -665,15 +338,14 @@ std::map do_clustering(const t_packer_opts& pa try_fill_cluster(packer_opts, cur_cluster_placement_stats_ptr, - atom_molecules, prev_molecule, next_molecule, num_repeated_molecules, - primitives_list, + helper_ctx.primitives_list, cluster_stats, - num_clb, + helper_ctx.total_clb_num, num_models, - max_cluster_size, + helper_ctx.max_cluster_size, clb_index, detailed_routing_stage, attraction_groups, @@ -685,17 +357,20 @@ std::map do_clustering(const t_packer_opts& pa router_data, target_ext_pin_util, temp_cluster_pr, - primitive_candidate_block_types, - block_pack_status); + block_pack_status, + clustering_data.unclustered_list_head, + unclustered_list_head_size, + net_output_feeds_driving_block_input, + primitive_candidate_block_types); } is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); if (is_cluster_legal) { - istart = save_cluster_routing_and_pick_new_seed(packer_opts, atom_molecules, num_clb, seed_atoms, num_blocks_hill_added, intra_lb_routing, seedindex, cluster_stats, router_data); + istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seedindex, cluster_stats, router_data); store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets); } else { - free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, atom_molecules, num_used_type_instances, num_clb, seedindex); + free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, num_used_type_instances, helper_ctx.total_clb_num, seedindex); } free_router_data(router_data); router_data = nullptr; @@ -707,3395 +382,37 @@ std::map do_clustering(const t_packer_opts& pa print_le_count(le_count, le_pb_type); } - //check clustering and output it - check_and_output_clustering(packer_opts, is_clock, arch, num_clb, intra_lb_routing, floorplan_regions_overfull); - - // Free Data Structures - free_clustering_data(packer_opts, intra_lb_routing, hill_climbing_inputs_avail, cluster_placement_stats, - unclustered_list_head, memory_pool, primitives_list); + //check_floorplan_regions(floorplan_regions_overfull); + floorplan_regions_overfull = floorplan_constraints_regions_overfull(); return num_used_type_instances; } -/*print the header for the clustering progress table*/ -static void print_pack_status_header() { - VTR_LOG("Starting Clustering - Clustering Progress: \n"); - VTR_LOG("------------------- -------------------------- ---------\n"); - VTR_LOG("Molecules processed Number of clusters created FPGA size\n"); - VTR_LOG("------------------- -------------------------- ---------\n"); -} - -/*incrementally print progress updates during clustering*/ -static void print_pack_status(int num_clb, - int tot_num_molecules, - int num_molecules_processed, - int& mols_since_last_print, - int device_width, - int device_height, - AttractionInfo& attraction_groups) { - //Print a packing update each time another 4% of molecules have been packed. - const float print_frequency = 0.04; - - double percentage = (num_molecules_processed / (double)tot_num_molecules) * 100; - - int int_percentage = int(percentage); - - int int_molecule_increment = (int)(print_frequency * tot_num_molecules); - - if (mols_since_last_print == int_molecule_increment) { - VTR_LOG( - "%6d/%-6d %3d%% " - "%26d " - "%3d x %-3d ", - num_molecules_processed, - tot_num_molecules, - int_percentage, - num_clb, - device_width, - device_height); - - VTR_LOG("\n"); - fflush(stdout); - mols_since_last_print = 0; - if (attraction_groups.num_attraction_groups() > 0) { - rebuild_attraction_groups(attraction_groups); - } - } -} - -/* - * Periodically rebuild the attraction groups to reflect which atoms in them - * are still available for new clusters (i.e. remove the atoms that have already - * been packed from the attraction group). +/** + * Print the total number of used physical blocks for each pb type in the architecture */ -static void rebuild_attraction_groups(AttractionInfo& attraction_groups) { - auto& atom_ctx = g_vpr_ctx.atom(); - - for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { - AttractGroupId group_id(igroup); - AttractionGroup& group = attraction_groups.get_attraction_group_info(group_id); - AttractionGroup new_att_group_info; - - for (AtomBlockId atom : group.group_atoms) { - //If the ClusterBlockId is anything other than invalid, the atom has been packed already - if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { - new_att_group_info.group_atoms.push_back(atom); - } - } - - attraction_groups.set_attraction_group_info(group_id, new_att_group_info); - } -} - -/* Determine if atom block is in pb */ -static bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { - auto& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - while (cur_pb) { - if (cur_pb == pb) { - return true; - } - cur_pb = cur_pb->parent_pb; - } - return false; -} - -/* Remove blk from list of feasible blocks sorted according to gain - * Useful for removing blocks that are repeatedly failing. If a block - * has been found to be illegal, we don't repeatedly consider it.*/ -static void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, - t_pb* pb) { - int molecule_index; - bool found_molecule = false; - - //find the molecule index - for (int i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { - if (pb->pb_stats->feasible_blocks[i] == molecule) { - found_molecule = true; - molecule_index = i; - } - } - - //if it is not in the array, return - if (found_molecule == false) { - return; - } - - //Otherwise, shift the molecules while removing the specified molecule - for (int j = molecule_index; j < pb->pb_stats->num_feasible_blocks - 1; j++) { - pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; - } - pb->pb_stats->num_feasible_blocks--; -} - -/* Add blk to list of feasible blocks sorted according to gain */ -static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, - std::map& gain, - t_pb* pb, - int max_queue_size, - AttractionInfo& attraction_groups) { - int i, j; - int num_molecule_failures = 0; - - AttractGroupId cluster_att_grp = pb->pb_stats->attraction_grp_id; - - /* When the clusterer packs with attraction groups the goal is to - * pack more densely. Removing failed molecules to make room for the exploration of - * more molecules helps to achieve this purpose. - */ - if (attraction_groups.num_attraction_groups() > 0) { - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - num_molecule_failures = 0; - } else { - num_molecule_failures = got->second; - } - - if (num_molecule_failures > 0) { - remove_molecule_from_pb_stats_candidates(molecule, pb); - return; - } - } - - for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { - if (pb->pb_stats->feasible_blocks[i] == molecule) { - return; // already in queue, do nothing - } - } - - if (pb->pb_stats->num_feasible_blocks >= max_queue_size - 1) { - /* maximum size for array, remove smallest gain element and sort */ - if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - /* single loop insertion sort */ - for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) { - if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) <= get_molecule_gain(pb->pb_stats->feasible_blocks[j + 1], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - pb->pb_stats->feasible_blocks[j] = molecule; - break; - } else { - pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; - } - } - if (j == pb->pb_stats->num_feasible_blocks - 1) { - pb->pb_stats->feasible_blocks[j] = molecule; - } - } - } else { - /* Expand array and single loop insertion sort */ - for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) { - if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { - pb->pb_stats->feasible_blocks[j + 1] = pb->pb_stats->feasible_blocks[j]; - } else { - pb->pb_stats->feasible_blocks[j + 1] = molecule; - break; - } - } - if (j < 0) { - pb->pb_stats->feasible_blocks[0] = molecule; - } - pb->pb_stats->num_feasible_blocks++; - } -} - -/*****************************************/ -static void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, - t_cluster_placement_stats** cluster_placement_stats, - t_pb_graph_node*** primitives_list, - t_pack_molecule* molecules_head, - int num_molecules) { - /* Allocates the main data structures used for clustering and properly * - * initializes them. */ - - t_molecule_link* next_ptr; - t_pack_molecule* cur_molecule; - t_pack_molecule** molecule_array; - int max_molecule_size; - - /* alloc and load list of molecules to pack */ - unclustered_list_head = (t_molecule_link*)vtr::calloc(max_molecule_stats.num_used_ext_inputs + 1, sizeof(t_molecule_link)); - unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1; - - for (int i = 0; i <= max_molecule_stats.num_used_ext_inputs; i++) { - unclustered_list_head[i].next = nullptr; - } - - molecule_array = (t_pack_molecule**)vtr::malloc(num_molecules * sizeof(t_pack_molecule*)); - cur_molecule = molecules_head; - for (int i = 0; i < num_molecules; i++) { - VTR_ASSERT(cur_molecule != nullptr); - molecule_array[i] = cur_molecule; - cur_molecule = cur_molecule->next; - } - VTR_ASSERT(cur_molecule == nullptr); - qsort((void*)molecule_array, num_molecules, sizeof(t_pack_molecule*), - compare_molecule_gain); - - memory_pool = (t_molecule_link*)vtr::malloc(num_molecules * sizeof(t_molecule_link)); - next_ptr = memory_pool; - - for (int i = 0; i < num_molecules; i++) { - //Figure out how many external inputs are used by this molecule - t_molecule_stats molecule_stats = calc_molecule_stats(molecule_array[i]); - int ext_inps = molecule_stats.num_used_ext_inputs; - - //Insert the molecule into the unclustered lists by number of external inputs - next_ptr->moleculeptr = molecule_array[i]; - next_ptr->next = unclustered_list_head[ext_inps].next; - unclustered_list_head[ext_inps].next = next_ptr; - - next_ptr++; - } - free(molecule_array); - - /* load net info */ - auto& atom_ctx = g_vpr_ctx.atom(); - for (AtomNetId net : atom_ctx.nlist.nets()) { - AtomPinId driver_pin = atom_ctx.nlist.net_driver(net); - AtomBlockId driver_block = atom_ctx.nlist.pin_block(driver_pin); - - for (AtomPinId sink_pin : atom_ctx.nlist.net_sinks(net)) { - AtomBlockId sink_block = atom_ctx.nlist.pin_block(sink_pin); - - if (driver_block == sink_block) { - net_output_feeds_driving_block_input[net]++; - } - } - } - - /* alloc and load cluster placement info */ - *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); - - /* alloc array that will store primitives that a molecule gets placed to, - * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list - * this array must be the size of the biggest molecule - */ - max_molecule_size = 1; - cur_molecule = molecules_head; - while (cur_molecule != nullptr) { - if (cur_molecule->num_blocks > max_molecule_size) { - max_molecule_size = cur_molecule->num_blocks; - } - cur_molecule = cur_molecule->next; - } - *primitives_list = (t_pb_graph_node**)vtr::calloc(max_molecule_size, sizeof(t_pb_graph_node*)); -} - -/*****************************************/ -static void free_pb_stats_recursive(t_pb* pb) { - int i, j; - /* Releases all the memory used by clustering data structures. */ - if (pb) { - if (pb->pb_graph_node != nullptr) { - if (!pb->pb_graph_node->is_primitive()) { - for (i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { - for (j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { - if (pb->child_pbs && pb->child_pbs[i]) { - free_pb_stats_recursive(&pb->child_pbs[i][j]); - } - } - } - } - } - free_pb_stats(pb); - } -} - -static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { - const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; - - VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ - - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { - /* This pb already has a different logical block */ - return false; - } - - if (cur_pb_type->class_type == MEMORY_CLASS) { - /* Memory class has additional feasibility requirements: - * - all siblings must share all nets, including open nets, with the exception of data nets */ - - /* find sibling if one exists */ - AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); - - if (sibling_memory_blk_id) { - //There is a sibling, see if the current block is feasible with it - bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); - if (!sibling_feasible) { - return false; - } - } - } - - //Generic feasibility check - return primitive_type_feasible(blk_id, cur_pb_type); -} - -static bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { - /* Check that the two atom blocks blk_id and sibling_blk_id (which should both be memory slices) - * are feasible, in the sence that they have precicely the same net connections (with the - * exception of nets in data port classes). - * - * Note that this routine does not check pin feasibility against the cur_pb_type; so - * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. - */ - auto& atom_ctx = g_vpr_ctx.atom(); - VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); - - //First, identify the 'data' ports by looking at the cur_pb_type - std::unordered_set data_ports; - for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { - const char* port_class = cur_pb_type->ports[iport].port_class; - if (port_class && strstr(port_class, "data") == port_class) { - //The port_class starts with "data", so it is a data port - - //Record the port - data_ports.insert(cur_pb_type->ports[iport].model_port); - } - } - - //Now verify that all nets (except those connected to data ports) are equivalent - //between blk_id and sibling_blk_id - - //Since the atom netlist stores only in-use ports, we iterate over the model to ensure - //all ports are compared - const t_model* model = cur_pb_type->model; - for (t_model_ports* port : {model->inputs, model->outputs}) { - for (; port; port = port->next) { - if (data_ports.count(port)) { - //Don't check data ports - continue; - } - - //Note: VPR doesn't support multi-driven nets, so all outputs - //should be data ports, otherwise the siblings will both be - //driving the output net - - //Get the ports from each primitive - auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); - auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); - - //Check that all nets (including unconnected nets) match - for (int ipin = 0; ipin < port->size; ++ipin) { - //The nets are initialized as invalid (i.e. disconnected) - AtomNetId blk_net_id; - AtomNetId sib_net_id; - - //We can get the actual net provided the port exists - // - //Note that if the port did not exist, the net is left - //as invalid/disconneced - if (blk_port_id) { - blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); - } - if (sib_port_id) { - sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); - } - - //The sibling and block must have the same (possibly disconnected) - //net on this pin - if (blk_net_id != sib_net_id) { - //Nets do not match, not feasible - return false; - } - } - } - } - - return true; -} - -/*****************************************/ -static t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, - const enum e_removal_policy remove_flag, - t_cluster_placement_stats* cluster_placement_stats_ptr) { - /* This routine returns an atom block which has not been clustered, has * - * no connection to the current cluster, satisfies the cluster * - * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, - * and has ext_inps external inputs. If * - * there is no such atom block it returns ClusterBlockId::INVALID(). Remove_flag * - * controls whether or not blocks that have already been clustered * - * are removed from the unclustered_list data structures. NB: * - * to get a atom block regardless of clock constraints just set clocks_ * - * avail > 0. */ - - t_molecule_link *ptr, *prev_ptr; - int i; - bool success; - - prev_ptr = &unclustered_list_head[ext_inps]; - ptr = unclustered_list_head[ext_inps].next; - while (ptr != nullptr) { - /* TODO: Get better candidate atom block in future, eg. return most timing critical or some other smarter metric */ - if (ptr->moleculeptr->valid) { - success = true; - for (i = 0; i < get_array_size_of_molecule(ptr->moleculeptr); i++) { - if (ptr->moleculeptr->atom_block_ids[i]) { - auto blk_id = ptr->moleculeptr->atom_block_ids[i]; - if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id)) { - /* TODO: I should be using a better filtering check especially when I'm - * dealing with multiple clock/multiple global reset signals where the clock/reset - * packed in matters, need to do later when I have the circuits to check my work */ - success = false; - break; - } - } - } - if (success == true) { - return ptr->moleculeptr; - } - prev_ptr = ptr; - } - - else if (remove_flag == REMOVE_CLUSTERED) { - VTR_ASSERT(0); /* this doesn't work right now with 2 the pass packing for each complex block */ - prev_ptr->next = ptr->next; - } - - ptr = ptr->next; - } - - return nullptr; -} - -/*****************************************/ -static t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr) { - /* This routine is used to find new blocks for clustering when there are no feasible * - * blocks with any attraction to the current cluster (i.e. it finds * - * blocks which are unconnected from the current cluster). It returns * - * the atom block with the largest number of used inputs that satisfies the * - * clocking and number of inputs constraints. If no suitable atom block is * - * found, the routine returns ClusterBlockId::INVALID(). - * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count - */ - - int inputs_avail = 0; +void print_pb_type_count(const ClusteredNetlist& clb_nlist) { + auto& device_ctx = g_vpr_ctx.device(); - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - inputs_avail += cur_pb->pb_stats->input_pins_used[i].size(); - } + std::map pb_type_count; - t_pack_molecule* molecule = nullptr; + size_t max_depth = 0; + for (ClusterBlockId blk : clb_nlist.blocks()) { + size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0); - if (inputs_avail >= unclustered_list_head_size) { - inputs_avail = unclustered_list_head_size - 1; + max_depth = std::max(max_depth, pb_max_depth); } - for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { - molecule = get_molecule_by_num_ext_inputs(ext_inps, LEAVE_CLUSTERED, cluster_placement_stats_ptr); - if (molecule != nullptr) { - break; - } + size_t max_pb_type_name_chars = 0; + for (auto& pb_type : pb_type_count) { + max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name)); } - return molecule; -} -/*****************************************/ -static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { - /* Call this routine when starting to fill up a new cluster. It resets * - * the gain vector, etc. */ - - pb->pb_stats = new t_pb_stats; - - /* If statement below is for speed. If nets are reasonably low-fanout, * - * only a relatively small number of blocks will be marked, and updating * - * only those atom block structures will be fastest. If almost all blocks * - * have been touched it should be faster to just run through them all * - * in order (less addressing and better cache locality). */ - pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); - pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); - pb->pb_stats->num_feasible_blocks = NOT_VALID; - pb->pb_stats->feasible_blocks = (t_pack_molecule**)vtr::calloc(feasible_block_array_size, sizeof(t_pack_molecule*)); - - pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); - - pb->pb_stats->pulled_from_atom_groups = 0; - pb->pb_stats->num_att_group_atoms_used = 0; - - pb->pb_stats->gain.clear(); - pb->pb_stats->timinggain.clear(); - pb->pb_stats->connectiongain.clear(); - pb->pb_stats->sharinggain.clear(); - pb->pb_stats->hillgain.clear(); - pb->pb_stats->transitive_fanout_candidates.clear(); - pb->pb_stats->atom_failures.clear(); - - pb->pb_stats->num_pins_of_net_in_pb.clear(); - - pb->pb_stats->num_child_blocks_in_pb = 0; - - pb->pb_stats->explore_transitive_fanout = true; -} -/*****************************************/ - -/** - * Cleans up a pb after unsuccessful molecule packing - * - * Recursively frees pbs from a t_pb tree. The given root pb itself is not - * deleted. - * - * If a pb object has its children allocated then before freeing them the - * function checks if there is no atom that corresponds to any of them. The - * check is performed only for leaf (primitive) pbs. The function recurses for - * non-primitive pbs. - * - * The cleaning itself includes deleting all child pbs, resetting mode of the - * pb and also freeing its name. This prepares the pb for another round of - * molecule packing tryout. - */ -static bool cleanup_pb(t_pb* pb) { - bool can_free = true; - - /* Recursively check if there are any children with already assigned atoms */ - if (pb->child_pbs != nullptr) { - const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; - VTR_ASSERT(mode != nullptr); - - /* Check each mode */ - for (int i = 0; i < mode->num_pb_type_children; ++i) { - /* Check each child */ - if (pb->child_pbs[i] != nullptr) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { - t_pb* pb_child = &pb->child_pbs[i][j]; - t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; - - /* Primitive, check occupancy */ - if (pb_type->num_modes == 0) { - if (pb_child->name != nullptr) { - can_free = false; - } - } - - /* Non-primitive, recurse */ - else { - if (!cleanup_pb(pb_child)) { - can_free = false; - } - } - } - } - } - - /* Free if can */ - if (can_free) { - for (int i = 0; i < mode->num_pb_type_children; ++i) { - if (pb->child_pbs[i] != nullptr) { - delete[] pb->child_pbs[i]; - } - } - - delete[] pb->child_pbs; - pb->child_pbs = nullptr; - pb->mode = 0; + VTR_LOG("\nPb types usage...\n"); + for (const auto& logical_block_type : device_ctx.logical_block_types) { + if (!logical_block_type.pb_type) continue; - if (pb->name) { - free(pb->name); - pb->name = nullptr; - } - } - } - - return can_free; -} - -/** - * Performs legality checks to see whether the selected molecule can be - * packed into the current cluster. The legality checks are related to - * floorplanning, pin feasibility, and routing (if detailed route - * checking is enabled). The routine returns BLK_PASSED if the molecule - * can be packed in the cluster. If the block passes, the routine commits - * it to the current cluster and updates the appropriate data structures. - * Otherwise, it returns the appropriate failed pack status based on which - * legality check the molecule failed. - */ -static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - t_pack_molecule* molecule, - t_pb_graph_node** primitives_list, - t_pb* pb, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, - t_lb_router_data* router_data, - int verbosity, - bool enable_pin_feasibility_filter, - const int feasible_block_array_size, - t_ext_pin_util max_external_pin_util, - PartitionRegion& temp_cluster_pr) { - int molecule_size, failed_location; - int i; - enum e_block_pack_status block_pack_status; - t_pb* parent; - t_pb* cur_pb; - - auto& atom_ctx = g_vpr_ctx.atom(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - parent = nullptr; - - block_pack_status = BLK_STATUS_UNDEFINED; - - molecule_size = get_array_size_of_molecule(molecule); - failed_location = 0; - - if (verbosity > 3) { - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - VTR_LOG("\t\tTry pack molecule: '%s' (%s)", - atom_ctx.nlist.block_name(root_atom).c_str(), - atom_ctx.nlist.block_model(root_atom)->name); - VTR_LOGV(molecule->pack_pattern, - " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, - molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - // if this cluster has a molecule placed in it that is part of a long chain - // (a chain that consists of more than one molecule), don't allow more long chain - // molecules to be placed in this cluster. To avoid possibly creating cluster level - // blocks that have incompatible placement constraints or form very long placement - // macros that limit placement flexibility. - if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return BLK_FAILED_FEASIBLE; - } - - bool cluster_pr_needs_update = false; - bool cluster_pr_update_check = false; - - //check if every atom in the molecule is legal in the cluster from a floorplanning perspective - for (int i_mol = 0; i_mol < molecule_size; i_mol++) { - //try to intersect with atom PartitionRegion if atom exists - if (molecule->atom_block_ids[i_mol]) { - block_pack_status = atom_cluster_floorplanning_check(molecule->atom_block_ids[i_mol], - clb_index, verbosity, - temp_cluster_pr, - cluster_pr_needs_update); - if (block_pack_status == BLK_FAILED_FLOORPLANNING) { - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - return block_pack_status; - } - if (cluster_pr_needs_update == true) { - cluster_pr_update_check = true; - } - } - } - - //change status back to undefined before the while loop in case in was changed to BLK_PASSED in the above for loop - block_pack_status = BLK_STATUS_UNDEFINED; - - while (block_pack_status != BLK_PASSED) { - if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, - primitives_list)) { - block_pack_status = BLK_PASSED; - - for (i = 0; i < molecule_size && block_pack_status == BLK_PASSED; i++) { - VTR_ASSERT((primitives_list[i] == nullptr) == (!molecule->atom_block_ids[i])); - failed_location = i + 1; - // try place atom block if it exists - if (molecule->atom_block_ids[i]) { - block_pack_status = try_place_atom_block_rec(primitives_list[i], - molecule->atom_block_ids[i], pb, &parent, - max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - } - } - - if (enable_pin_feasibility_filter && block_pack_status == BLK_PASSED) { - /* Check if pin usage is feasible for the current packing assignment */ - reset_lookahead_pins_used(pb); - try_update_lookahead_pins_used(pb); - if (!check_lookahead_pins_used(pb, max_external_pin_util)) { - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); - block_pack_status = BLK_FAILED_FEASIBLE; - } - } - if (block_pack_status == BLK_PASSED) { - /* - * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster - * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). - * depending on its value we have different behaviors: - * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. - * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, - * it means that more checks have to be performed as the previous stage failed to generate a new cluster. - * - * mode_status is a data structure containing the status of the mode selection. Its members are: - * - bool is_mode_conflict - * - bool try_expand_all_modes - * - bool expand_all_modes - * - * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. - * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. - * - * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted - * an error will be thrown during mode conflicts checks (this to prevent infinite loops). - * - * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices - * for what regards the mode that has to be selected. - * - * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. - * - * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route - * by using all the modes during node expansion. - * - * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. - */ - t_mode_selection_status mode_status; - bool is_routed = false; - bool do_detailed_routing_stage = detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM; - if (do_detailed_routing_stage) { - do { - reset_intra_lb_route(router_data); - is_routed = try_intra_lb_route(router_data, verbosity, &mode_status); - } while (do_detailed_routing_stage && mode_status.is_mode_issue()); - } - - if (do_detailed_routing_stage && is_routed == false) { - /* Cannot pack */ - VTR_LOGV(verbosity > 4, "\t\t\tFAILED Detailed Routing Legality\n"); - block_pack_status = BLK_FAILED_ROUTE; - } else { - /* Pack successful, commit - * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside - */ - VTR_ASSERT(block_pack_status == BLK_PASSED); - if (molecule->is_chain()) { - /* Chained molecules often take up lots of area and are important, - * if a chain is packed in, want to rename logic block to match chain name */ - AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; - cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; - while (cur_pb != nullptr) { - free(cur_pb->name); - cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); - cur_pb = cur_pb->parent_pb; - } - // if this molecule is part of a chain, mark the cluster as having a long chain - // molecule. Also check if it's the first molecule in the chain to be packed. - // If so, update the chain id for this chain of molecules to make sure all - // molecules will be packed to the same chain id and can reach each other using - // the chain direct links between clusters - if (molecule->chain_info->is_long_chain) { - cluster_placement_stats_ptr->has_long_chain = true; - if (molecule->chain_info->chain_id == -1) { - update_molecule_chain_info(molecule, primitives_list[molecule->root]); - } - } - } - - //update cluster PartitionRegion if atom with floorplanning constraints was added - if (cluster_pr_update_check) { - floorplanning_ctx.cluster_constraints[clb_index] = temp_cluster_pr; - if (verbosity > 2) { - VTR_LOG("\nUpdated PartitionRegion of cluster %d\n", clb_index); - } - } - - for (i = 0; i < molecule_size; i++) { - if (molecule->atom_block_ids[i]) { - /* invalidate all molecules that share atom block with current molecule */ - - auto rng = atom_molecules.equal_range(molecule->atom_block_ids[i]); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* cur_molecule = kv.second; - cur_molecule->valid = false; - } - - commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); - } - } - } - } - - if (block_pack_status != BLK_PASSED) { - for (i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - remove_atom_from_target(router_data, molecule->atom_block_ids[i]); - } - } - for (i = 0; i < failed_location; i++) { - if (molecule->atom_block_ids[i]) { - revert_place_atom_block(molecule->atom_block_ids[i], router_data, atom_molecules); - } - } - - //Record the failure of this molecule in the current pb stats - record_molecule_failure(molecule, pb); - - /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. - * Before trying to pack next molecule the unused pbs need to be freed and, the most important, - * their modes reset. This task is performed by the cleanup_pb() function below. */ - cleanup_pb(pb); - - } else { - VTR_LOGV(verbosity > 3, "\t\tPASSED pack molecule\n"); - } - } else { - VTR_LOGV(verbosity > 3, "\t\tFAILED No candidate primitives available\n"); - block_pack_status = BLK_FAILED_FEASIBLE; - break; /* no more candidate primitives available, this molecule will not pack, return fail */ - } - } - return block_pack_status; -} - -/* Record the failure of the molecule in this cluster in the current pb stats. - * If a molecule fails repeatedly, it's gain will be penalized if packing with - * attraction groups on. */ -static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { - //Only have to record the failure for the first atom in the molecule. - //The convention when checking if a molecule has failed to pack in the cluster - //is to check whether the first atoms has been recorded as having failed - - auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); - if (got == pb->pb_stats->atom_failures.end()) { - pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); - } else { - got->second++; - } -} - -/** - * Try place atom block into current primitive location - */ - -static enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, - const AtomBlockId blk_id, - t_pb* cb, - t_pb** parent, - const int max_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const t_cluster_placement_stats* cluster_placement_stats_ptr, - const t_pack_molecule* molecule, - t_lb_router_data* router_data, - int verbosity, - const int feasible_block_array_size) { - int i, j; - bool is_primitive; - enum e_block_pack_status block_pack_status; - - t_pb* my_parent; - t_pb *pb, *parent_pb; - const t_pb_type* pb_type; - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - my_parent = nullptr; - - block_pack_status = BLK_PASSED; - - /* Discover parent */ - if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { - block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, - &my_parent, max_models, max_cluster_size, clb_index, - cluster_placement_stats_ptr, molecule, router_data, - verbosity, feasible_block_array_size); - parent_pb = my_parent; - } else { - parent_pb = cb; - } - - /* Create siblings if siblings are not allocated */ - if (parent_pb->child_pbs == nullptr) { - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), parent_pb); - - VTR_ASSERT(parent_pb->name == nullptr); - parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; - set_reset_pb_modes(router_data, parent_pb, true); - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; - - for (i = 0; i < mode->num_pb_type_children; i++) { - parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; - - for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { - parent_pb->child_pbs[i][j].parent_pb = parent_pb; - - atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), &parent_pb->child_pbs[i][j]); - - parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); - } - } - } else { - VTR_ASSERT(parent_pb->mode == pb_graph_node->pb_type->parent_mode->index); - } - - const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; - for (i = 0; i < mode->num_pb_type_children; i++) { - if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { - break; - } - } - VTR_ASSERT(i < mode->num_pb_type_children); - pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; - *parent = pb; /* this pb is parent of it's child that called this function */ - VTR_ASSERT(pb->pb_graph_node == pb_graph_node); - if (pb->pb_stats == nullptr) { - alloc_and_load_pb_stats(pb, feasible_block_array_size); - } - pb_type = pb_graph_node->pb_type; - - /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping - * Early exit to flag failure - */ - if (true == pb_type->parent_mode->disable_packing) { - return BLK_FAILED_FEASIBLE; - } - - is_primitive = (pb_type->num_modes == 0); - - if (is_primitive) { - VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) - && atom_ctx.lookup.atom_pb(blk_id) == nullptr - && atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()); - /* try pack to location */ - VTR_ASSERT(pb->name == nullptr); - pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); - - //Update the atom netlist mappings - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - atom_ctx.lookup.set_atom_pb(blk_id, pb); - - add_atom_as_target(router_data, blk_id); - if (!primitive_feasible(blk_id, pb)) { - /* failed location feasibility check, revert pack */ - block_pack_status = BLK_FAILED_FEASIBLE; - } - - // if this block passed and is part of a chained molecule - if (block_pack_status == BLK_PASSED && molecule->is_chain()) { - auto molecule_root_block = molecule->atom_block_ids[molecule->root]; - // if this is the root block of the chain molecule check its placmeent feasibility - if (blk_id == molecule_root_block) { - block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); - } - } - - VTR_LOGV(verbosity > 4 && block_pack_status == BLK_PASSED, - "\t\t\tPlaced atom '%s' (%s) at %s\n", - atom_ctx.nlist.block_name(blk_id).c_str(), - atom_ctx.nlist.block_model(blk_id)->name, - pb->hierarchical_type_name().c_str()); - } - - if (block_pack_status != BLK_PASSED) { - free(pb->name); - pb->name = nullptr; - } - - return block_pack_status; -} - -/* - * Checks if the atom and cluster have compatible floorplanning constraints - * If the atom and cluster both have non-empty PartitionRegions, and the intersection - * of the PartitionRegions is empty, the atom cannot be packed in the cluster. - */ -static enum e_block_pack_status atom_cluster_floorplanning_check(const AtomBlockId blk_id, - const ClusterBlockId clb_index, - const int verbosity, - PartitionRegion& temp_cluster_pr, - bool& cluster_pr_needs_update) { - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*check if the atom can go in the cluster by checking if the atom and cluster have intersecting PartitionRegions*/ - - //get partition that atom belongs to - PartitionId partid; - partid = floorplanning_ctx.constraints.get_atom_partition(blk_id); - - PartitionRegion atom_pr; - PartitionRegion cluster_pr; - - //if the atom does not belong to a partition, it can be put in the cluster - //regardless of what the cluster's PartitionRegion is because it has no constraints - if (partid == PartitionId::INVALID()) { - if (verbosity > 3) { - VTR_LOG("\t\t\t Intersect: Atom block %d has no floorplanning constraints, passed for cluster %d \n", blk_id, clb_index); - } - cluster_pr_needs_update = false; - return BLK_PASSED; - } else { - //get pr of that partition - atom_pr = floorplanning_ctx.constraints.get_partition_pr(partid); - - //intersect it with the pr of the current cluster - cluster_pr = floorplanning_ctx.cluster_constraints[clb_index]; - - if (cluster_pr.empty() == true) { - temp_cluster_pr = atom_pr; - cluster_pr_needs_update = true; - if (verbosity > 3) { - VTR_LOG("\t\t\t Intersect: Atom block %d has floorplanning constraints, passed cluster %d which has empty PR\n", blk_id, clb_index); - } - return BLK_PASSED; - } else { - //update cluster_pr with the intersection of the cluster's PartitionRegion - //and the atom's PartitionRegion - update_cluster_part_reg(cluster_pr, atom_pr); - } - - if (cluster_pr.empty() == true) { - if (verbosity > 3) { - VTR_LOG("\t\t\t Intersect: Atom block %d failed floorplanning check for cluster %d \n", blk_id, clb_index); - } - cluster_pr_needs_update = false; - return BLK_FAILED_FLOORPLANNING; - } else { - //update the cluster's PartitionRegion with the intersecting PartitionRegion - temp_cluster_pr = cluster_pr; - cluster_pr_needs_update = true; - if (verbosity > 3) { - VTR_LOG("\t\t\t Intersect: Atom block %d passed cluster %d, cluster PR was updated with intersection result \n", blk_id, clb_index); - } - return BLK_PASSED; - } - } -} - -/* Revert trial atom block iblock and free up memory space accordingly - */ -static void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data, const std::multimap& atom_molecules) { - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - - //We cast away const here since we may free the pb, and it is - //being removed from the active mapping. - // - //In general most code works fine accessing cosnt t_pb*, - //which is why we store them as such in atom_ctx.lookup - t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); - - //Update the atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); - atom_ctx.lookup.set_atom_pb(blk_id, nullptr); - - if (pb != nullptr) { - /* When freeing molecules, the current block might already have been freed by a prior revert - * When this happens, no need to do anything beyond basic book keeping at the atom block - */ - - t_pb* next = pb->parent_pb; - revalid_molecules(pb, atom_molecules); - free_pb(pb); - pb = next; - - while (pb != nullptr) { - /* If this is pb is created only for the purposes of holding new molecule, remove it - * Must check if cluster is already freed (which can be the case) - */ - next = pb->parent_pb; - - if (pb->child_pbs != nullptr && pb->pb_stats != nullptr - && pb->pb_stats->num_child_blocks_in_pb == 0) { - set_reset_pb_modes(router_data, pb, false); - if (next != nullptr) { - /* If the code gets here, then that means that placing the initial seed molecule - * failed, don't free the actual complex block itself as the seed needs to find - * another placement */ - revalid_molecules(pb, atom_molecules); - free_pb(pb); - } - } - pb = next; - } - } -} - -static void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { - /*This function is called when the connectiongain values on the net net_id* - *require updating. */ - - int num_internal_connections, num_open_connections, num_stuck_connections; - - num_internal_connections = num_open_connections = num_stuck_connections = 0; - - auto& atom_ctx = g_vpr_ctx.atom(); - ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(clustered_blk_id); - - /* may wish to speed things up by ignoring clock nets since they are high fanout */ - - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == clb_index - && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { - num_internal_connections++; - } else if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - num_open_connections++; - } else { - num_stuck_connections++; - } - } - - if (net_relation_to_clustered_block == OUTPUT) { - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - VTR_ASSERT(blk_id); - - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - /* TODO: Gain function accurate only if net has one connection to block, - * TODO: Should we handle case where net has multi-connection to block? - * Gain computation is only off by a bit in this case */ - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - - if (num_internal_connections > 1) { - cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1); - } - cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); - } - } - } - - if (net_relation_to_clustered_block == INPUT) { - /*Calculate the connectiongain for the atom block which is driving * - *the atom net that is an input to an atom block in the cluster */ - - auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - if (num_internal_connections > 1) { - cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1); - } - cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); - } - } -} - -static void try_fill_cluster(const t_packer_opts& packer_opts, - t_cluster_placement_stats* cur_cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - t_pack_molecule*& prev_molecule, - t_pack_molecule*& next_molecule, - int& num_same_molecules, - t_pb_graph_node** primitives_list, - t_cluster_progress_stats& cluster_stats, - int num_clb, - const int num_models, - const int max_cluster_size, - const ClusterBlockId clb_index, - const int detailed_routing_stage, - AttractionInfo& attraction_groups, - vtr::vector>& clb_inter_blk_nets, - bool allow_unrelated_clustering, - const int& high_fanout_threshold, - const std::unordered_set& is_clock, - const std::shared_ptr& timing_info, - t_lb_router_data* router_data, - t_ext_pin_util target_ext_pin_util, - PartitionRegion& temp_cluster_pr, - std::map>& primitive_candidate_block_types, - e_block_pack_status& block_pack_status) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - - block_pack_status = try_pack_molecule(cur_cluster_placement_stats_ptr, - atom_molecules, - next_molecule, - primitives_list, - cluster_ctx.clb_nlist.block_pb(clb_index), - num_models, - max_cluster_size, - clb_index, - detailed_routing_stage, - router_data, - packer_opts.pack_verbosity, - packer_opts.enable_pin_feasibility_filter, - packer_opts.feasible_block_array_size, - target_ext_pin_util, - temp_cluster_pr); - - auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; - VTR_ASSERT(blk_id); - - std::string blk_name = atom_ctx.nlist.block_name(blk_id); - const t_model* blk_model = atom_ctx.nlist.block_model(blk_id); - - if (block_pack_status != BLK_PASSED) { - if (packer_opts.pack_verbosity > 2) { - if (block_pack_status == BLK_FAILED_ROUTE) { - VTR_LOG("\tNO_ROUTE: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - fflush(stdout); - } else if (block_pack_status == BLK_FAILED_FLOORPLANNING) { - VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOG("\n"); - } else { - VTR_LOG("\tFAILED_FEASIBILITY_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name, block_pack_status); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - fflush(stdout); - } - } - - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), - atom_molecules, - attraction_groups, - allow_unrelated_clustering, - packer_opts.prioritize_transitive_connectivity, - packer_opts.transitive_fanout_threshold, - packer_opts.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - cur_cluster_placement_stats_ptr, - clb_inter_blk_nets, - clb_index, packer_opts.pack_verbosity, - primitive_candidate_block_types); - if (prev_molecule == next_molecule) { - num_same_molecules++; - } - return; - } - - /* Continue packing by filling smallest cluster */ - if (packer_opts.pack_verbosity > 2) { - VTR_LOG("\tPASSED: '%s' (%s)", blk_name.c_str(), blk_model->name); - VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", - next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - fflush(stdout); - - //Since molecule passed, update num_molecules_processed - cluster_stats.num_molecules_processed++; - cluster_stats.mols_since_last_print++; - print_pack_status(num_clb, cluster_stats.num_molecules, - cluster_stats.num_molecules_processed, - cluster_stats.mols_since_last_print, - device_ctx.grid.width(), - device_ctx.grid.height(), - attraction_groups); - - update_cluster_stats(next_molecule, clb_index, - is_clock, //Set of all clocks - is_clock, //Set of all global signals (currently clocks) - packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, - packer_opts.connection_driven, - high_fanout_threshold, - *timing_info, - attraction_groups); - cluster_stats.num_unrelated_clustering_attempts = 0; - - if (packer_opts.timing_driven) { - cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ - } - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), - atom_molecules, - attraction_groups, - allow_unrelated_clustering, - packer_opts.prioritize_transitive_connectivity, - packer_opts.transitive_fanout_threshold, - packer_opts.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - cur_cluster_placement_stats_ptr, - clb_inter_blk_nets, - clb_index, - packer_opts.pack_verbosity, - primitive_candidate_block_types); - - if (prev_molecule == next_molecule) { - num_same_molecules++; - } -} - -static t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, - const std::multimap& atom_molecules, - const int& num_clb, - const std::vector& seed_atoms, - const int& num_blocks_hill_added, - vtr::vector*>& intra_lb_routing, - int& seedindex, - t_cluster_progress_stats& cluster_stats, - t_lb_router_data* router_data) { - t_pack_molecule* next_seed = nullptr; - - intra_lb_routing.push_back(router_data->saved_lb_nets); - VTR_ASSERT((int)intra_lb_routing.size() == num_clb); - router_data->saved_lb_nets = nullptr; - - //Pick a new seed - next_seed = get_highest_gain_seed_molecule(&seedindex, atom_molecules, seed_atoms); - - if (packer_opts.timing_driven) { - if (num_blocks_hill_added > 0) { - cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; - } - } - return next_seed; -} - -static void store_cluster_info_and_free(const t_packer_opts& packer_opts, - const ClusterBlockId& clb_index, - const t_logical_block_type_ptr logic_block_type, - const t_pb_type* le_pb_type, - std::vector& le_count, - vtr::vector>& clb_inter_blk_nets) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& atom_ctx = g_vpr_ctx.atom(); - - /* store info that will be used later in packing from pb_stats and free the rest */ - t_pb_stats* pb_stats = cluster_ctx.clb_nlist.block_pb(clb_index)->pb_stats; - for (const AtomNetId mnet_id : pb_stats->marked_nets) { - int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; - /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ - if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { - clb_inter_blk_nets[clb_index].push_back(mnet_id); - } - } - auto cur_pb = cluster_ctx.clb_nlist.block_pb(clb_index); - - // update the data structure holding the LE counts - update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); - - //print clustering progress incrementally - //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); - - free_pb_stats_recursive(cur_pb); -} - -/* Free up data structures and requeue used molecules */ -static void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, - const int& savedseedindex, - const std::multimap& atom_molecules, - std::map& num_used_type_instances, - int& num_clb, - int& seedindex) { - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints[clb_index] = empty_pr; - num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; - revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index), atom_molecules); - cluster_ctx.clb_nlist.remove_block(clb_index); - cluster_ctx.clb_nlist.compress(); - num_clb--; - seedindex = savedseedindex; -} - -/*****************************************/ -static void update_timing_gain_values(const AtomNetId net_id, - t_pb* cur_pb, - enum e_net_relation_to_clustered_block net_relation_to_clustered_block, - const SetupTimingInfo& timing_info, - const std::unordered_set& is_global) { - /*This function is called when the timing_gain values on the atom net* - *net_id requires updating. */ - float timinggain; - - auto& atom_ctx = g_vpr_ctx.atom(); - - /* Check if this atom net lists its driving atom block twice. If so, avoid * - * double counting this atom block by skipping the first (driving) pin. */ - auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input[net_id] != 0) - pins = atom_ctx.nlist.net_sinks(net_id); - - if (net_relation_to_clustered_block == OUTPUT - && !is_global.count(net_id)) { - for (auto pin_id : pins) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - timinggain = timing_info.setup_pin_criticality(pin_id); - - if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { - cur_pb->pb_stats->timinggain[blk_id] = 0; - } - if (timinggain > cur_pb->pb_stats->timinggain[blk_id]) - cur_pb->pb_stats->timinggain[blk_id] = timinggain; - } - } - } - - if (net_relation_to_clustered_block == INPUT - && !is_global.count(net_id)) { - /*Calculate the timing gain for the atom block which is driving * - *the atom net that is an input to a atom block in the cluster */ - auto driver_pin = atom_ctx.nlist.net_driver(net_id); - auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); - - if (atom_ctx.lookup.atom_clb(new_blk_id) == ClusterBlockId::INVALID()) { - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - timinggain = timing_info.setup_pin_criticality(pin_id); - - if (cur_pb->pb_stats->timinggain.count(new_blk_id) == 0) { - cur_pb->pb_stats->timinggain[new_blk_id] = 0; - } - if (timinggain > cur_pb->pb_stats->timinggain[new_blk_id]) - cur_pb->pb_stats->timinggain[new_blk_id] = timinggain; - } - } - } -} - -/*****************************************/ -static void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_update gain_flag, const AtomBlockId clustered_blk_id, bool timing_driven, bool connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block, const SetupTimingInfo& timing_info, const std::unordered_set& is_global, const int high_fanout_net_threshold) { - /* Updates the marked data structures, and if gain_flag is GAIN, * - * the gain when an atom block is added to a cluster. The * - * sharinggain is the number of inputs that a atom block shares with * - * blocks that are already in the cluster. Hillgain is the * - * reduction in number of pins-required by adding a atom block to the * - * cluster. The timinggain is the criticality of the most critical* - * atom net between this atom block and an atom block in the cluster. */ - - auto& atom_ctx = g_vpr_ctx.atom(); - t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; - cur_pb = get_top_level_pb(cur_pb); - - if (int(atom_ctx.nlist.net_sinks(net_id).size()) > high_fanout_net_threshold) { - /* Optimization: It can be too runtime costly for marking all sinks for - * a high fanout-net that probably has no hope of ever getting packed, - * thus ignore those high fanout nets */ - if (!is_global.count(net_id)) { - /* If no low/medium fanout nets, we may need to consider - * high fan-out nets for packing, so select one and store it */ - AtomNetId stored_net = cur_pb->pb_stats->tie_break_high_fanout_net; - if (!stored_net || atom_ctx.nlist.net_sinks(net_id).size() < atom_ctx.nlist.net_sinks(stored_net).size()) { - cur_pb->pb_stats->tie_break_high_fanout_net = net_id; - } - } - return; - } - - /* Mark atom net as being visited, if necessary. */ - - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - cur_pb->pb_stats->marked_nets.push_back(net_id); - } - - /* Update gains of affected blocks. */ - - if (gain_flag == GAIN) { - /* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input) - * If so, avoid double counting by skipping the first (driving) pin. */ - - auto pins = atom_ctx.nlist.net_pins(net_id); - if (net_output_feeds_driving_block_input[net_id] != 0) - //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2 - //(i.e. the net loops back to the block only once) - pins = atom_ctx.nlist.net_sinks(net_id); - - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - for (auto pin_id : pins) { - auto blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { - cur_pb->pb_stats->marked_blocks.push_back(blk_id); - cur_pb->pb_stats->sharinggain[blk_id] = 1; - cur_pb->pb_stats->hillgain[blk_id] = 1 - num_ext_inputs_atom_block(blk_id); - } else { - cur_pb->pb_stats->sharinggain[blk_id]++; - cur_pb->pb_stats->hillgain[blk_id]++; - } - } - } - } - - if (connection_driven) { - update_connection_gain_values(net_id, clustered_blk_id, cur_pb, - net_relation_to_clustered_block); - } - - if (timing_driven) { - update_timing_gain_values(net_id, cur_pb, - net_relation_to_clustered_block, - timing_info, - is_global); - } - } - if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { - cur_pb->pb_stats->num_pins_of_net_in_pb[net_id] = 0; - } - cur_pb->pb_stats->num_pins_of_net_in_pb[net_id]++; -} - -/*****************************************/ -static void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { - /*Updates the total gain array to reflect the desired tradeoff between* - *input sharing (sharinggain) and path_length minimization (timinggain) - *input each time a new molecule is added to the cluster.*/ - auto& atom_ctx = g_vpr_ctx.atom(); - t_pb* cur_pb = pb; - - cur_pb = get_top_level_pb(cur_pb); - AttractGroupId cluster_att_grp_id; - - cluster_att_grp_id = cur_pb->pb_stats->attraction_grp_id; - - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - //Initialize connectiongain and sharinggain if - //they have not previously been updated for the block - if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { - cur_pb->pb_stats->connectiongain[blk_id] = 0; - } - if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { - cur_pb->pb_stats->sharinggain[blk_id] = 0; - } - - /* Todo: This was used to explore different normalization options, can - * be made more efficient once we decide on which one to use*/ - int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size(); - int num_used_output_pins = atom_ctx.nlist.block_output_pins(blk_id).size(); - /* end todo */ - - /* Calculate area-only cost function */ - int num_used_pins = num_used_input_pins + num_used_output_pins; - VTR_ASSERT(num_used_pins > 0); - if (connection_driven) { - /*try to absorb as many connections as possible*/ - cur_pb->pb_stats->gain[blk_id] = ((1 - beta) - * (float)cur_pb->pb_stats->sharinggain[blk_id] - + beta * (float)cur_pb->pb_stats->connectiongain[blk_id]) - / (num_used_pins); - } else { - cur_pb->pb_stats->gain[blk_id] = ((float)cur_pb->pb_stats->sharinggain[blk_id]) - / (num_used_pins); - } - - /* Add in timing driven cost into cost function */ - if (timing_driven) { - cur_pb->pb_stats->gain[blk_id] = alpha - * cur_pb->pb_stats->timinggain[blk_id] - + (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id]; - } - - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { - //increase gain of atom based on attraction group gain - float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); - cur_pb->pb_stats->gain[blk_id] += att_grp_gain; - } - } -} - -/*****************************************/ -static void update_cluster_stats(const t_pack_molecule* molecule, - const ClusterBlockId clb_index, - const std::unordered_set& is_clock, - const std::unordered_set& is_global, - const bool global_clocks, - const float alpha, - const float beta, - const bool timing_driven, - const bool connection_driven, - const int high_fanout_net_threshold, - const SetupTimingInfo& timing_info, - AttractionInfo& attraction_groups) { - /* Routine that is called each time a new molecule is added to the cluster. - * Makes calls to update cluster stats such as the gain map for atoms, used pins, and clock structures, - * in order to reflect the new content of the cluster. - * Also keeps track of which attraction group the cluster belongs to. */ - - int molecule_size; - int iblock; - t_pb *cur_pb, *cb; - - auto& atom_ctx = g_vpr_ctx.mutable_atom(); - molecule_size = get_array_size_of_molecule(molecule); - cb = nullptr; - - for (iblock = 0; iblock < molecule_size; iblock++) { - auto blk_id = molecule->atom_block_ids[iblock]; - if (!blk_id) { - continue; - } - - //Update atom netlist mapping - atom_ctx.lookup.set_atom_clb(blk_id, clb_index); - - const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(atom_pb); - - cur_pb = atom_pb->parent_pb; - - //Update attraction group - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - - while (cur_pb) { - /* reset list of feasible blocks */ - if (cur_pb->is_root()) { - cb = cur_pb; - } - cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; - cur_pb->pb_stats->num_child_blocks_in_pb++; - - if (atom_grp_id != AttractGroupId::INVALID()) { - /* TODO: Allow clusters to have more than one attraction group. */ - cur_pb->pb_stats->attraction_grp_id = atom_grp_id; - } - - cur_pb = cur_pb->parent_pb; - } - - /* Outputs first */ - for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - if (!is_clock.count(net_id) || !global_clocks) { - mark_and_update_partial_gain(net_id, GAIN, blk_id, - timing_driven, - connection_driven, OUTPUT, - timing_info, - is_global, - high_fanout_net_threshold); - } else { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, - timing_driven, - connection_driven, OUTPUT, - timing_info, - is_global, - high_fanout_net_threshold); - } - } - - /* Next Inputs */ - for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - mark_and_update_partial_gain(net_id, GAIN, blk_id, - timing_driven, connection_driven, - INPUT, - timing_info, - is_global, - high_fanout_net_threshold); - } - - /* Finally Clocks */ - for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - if (global_clocks) { - mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, - timing_driven, connection_driven, INPUT, - timing_info, - is_global, - high_fanout_net_threshold); - } else { - mark_and_update_partial_gain(net_id, GAIN, blk_id, - timing_driven, connection_driven, INPUT, - timing_info, - is_global, - high_fanout_net_threshold); - } - } - - update_total_gain(alpha, beta, timing_driven, connection_driven, - atom_pb->parent_pb, attraction_groups); - - commit_lookahead_pins_used(cb); - } - - // if this molecule came from the transitive fanout candidates remove it - if (cb) { - cb->pb_stats->transitive_fanout_candidates.erase(molecule->atom_block_ids[molecule->root]); - cb->pb_stats->explore_transitive_fanout = true; - } -} - -static void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, - t_pb_graph_node** primitives_list, - const std::multimap& atom_molecules, - ClusterBlockId clb_index, - t_pack_molecule* molecule, - std::map& num_used_type_instances, - const float target_device_utilization, - const int num_models, - const int max_cluster_size, - const t_arch* arch, - std::string device_layout_name, - std::vector* lb_type_rr_graphs, - t_lb_router_data** router_data, - const int detailed_routing_stage, - ClusteredNetlist* clb_nlist, - const std::map>& primitive_candidate_block_types, - int verbosity, - bool enable_pin_feasibility_filter, - bool balance_block_type_utilization, - const int feasible_block_array_size, - PartitionRegion& temp_cluster_pr) { - /* Given a starting seed block, start_new_cluster determines the next cluster type to use - * It expands the FPGA if it cannot find a legal cluster for the atom block - */ - - auto& atom_ctx = g_vpr_ctx.atom(); - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); - - /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ - PartitionRegion empty_pr; - floorplanning_ctx.cluster_constraints.push_back(empty_pr); - - /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ - AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; - const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); - const t_model* root_model = atom_ctx.nlist.block_model(root_atom); - - auto itr = primitive_candidate_block_types.find(root_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector candidate_types = itr->second; - - if (balance_block_type_utilization) { - //We sort the candidate types in ascending order by their current utilization. - //This means that the packer will prefer to use types with lower utilization. - //This is a naive approach to try balancing utilization when multiple types can - //support the same primitive(s). - std::stable_sort(candidate_types.begin(), candidate_types.end(), - [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) { - int lhs_num_instances = 0; - int rhs_num_instances = 0; - // Count number of instances for each type - for (auto type : lhs->equivalent_tiles) - lhs_num_instances += device_ctx.grid.num_instances(type); - for (auto type : rhs->equivalent_tiles) - rhs_num_instances += device_ctx.grid.num_instances(type); - - float lhs_util = vtr::safe_ratio(num_used_type_instances[lhs], lhs_num_instances); - float rhs_util = vtr::safe_ratio(num_used_type_instances[rhs], rhs_num_instances); - //Lower util first - return lhs_util < rhs_util; - }); - } - - if (verbosity > 2) { - VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name); - VTR_LOGV(molecule->pack_pattern, " molecule_type %s molecule_size %zu", - molecule->pack_pattern->name, molecule->atom_block_ids.size()); - VTR_LOG("\n"); - } - - //Try packing into each candidate type - bool success = false; - for (size_t i = 0; i < candidate_types.size(); i++) { - auto type = candidate_types[i]; - - t_pb* pb = new t_pb; - pb->pb_graph_node = type->pb_graph_head; - alloc_and_load_pb_stats(pb, feasible_block_array_size); - pb->parent_pb = nullptr; - - *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); - - //Try packing into each mode - e_block_pack_status pack_result = BLK_STATUS_UNDEFINED; - for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { - pb->mode = j; - - reset_cluster_placement_stats(&cluster_placement_stats[type->index]); - set_mode_cluster_placement_stats(pb->pb_graph_node, j); - - //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL, - //which allows all cluster pins to be used. This ensures that if we have a large - //molecule which would otherwise exceed the external pin utilization targets it - //can use the full set of cluster pins when selected as the seed block -- ensuring - //it is still implementable. - pack_result = try_pack_molecule(&cluster_placement_stats[type->index], - atom_molecules, - molecule, primitives_list, pb, - num_models, max_cluster_size, clb_index, - detailed_routing_stage, *router_data, - verbosity, - enable_pin_feasibility_filter, - feasible_block_array_size, - FULL_EXTERNAL_PIN_UTIL, - temp_cluster_pr); - - success = (pack_result == BLK_PASSED); - } - - if (success) { - VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); - //Once clustering succeeds, add it to the clb netlist - if (pb->name != nullptr) { - free(pb->name); - } - pb->name = vtr::strdup(root_atom_name.c_str()); - clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type); - break; - } else { - VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name); - //Free failed clustering and try again - free_router_data(*router_data); - free_pb(pb); - delete pb; - *router_data = nullptr; - } - } - - if (!success) { - //Explored all candidates - if (molecule->type == MOLECULE_FORCED_PACK) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Can not find any logic block that can implement molecule.\n" - "\tPattern %s %s (%d). Root model is %s\n", - molecule->pack_pattern->name, - root_atom_name.c_str(), root_atom, root_model->name); - } else { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Can not find any logic block that can implement molecule.\n" - "\tAtom %s (%s)\n", - root_atom_name.c_str(), root_model->name); - } - } - - VTR_ASSERT(success); - - //Successfully create cluster - auto block_type = clb_nlist->block_type(clb_index); - num_used_type_instances[block_type]++; - - /* Expand FPGA size if needed */ - // Check used type instances against the possible equivalent physical locations - unsigned int num_instances = 0; - for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile); - } - - if (num_used_type_instances[block_type] > num_instances) { - device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); - } -} - -/* - * Get candidate molecule to pack into currently open cluster - * Molecule selection priority: - * 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - * 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - * 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - */ -static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const enum e_gain_type gain_mode, - t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, - bool prioritize_transitive_connectivity, - int transitive_fanout_threshold, - const int feasible_block_array_size, - std::map>& primitive_candidate_block_types) { - /* - * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster. - * If there are no feasible blocks it returns a nullptr. - */ - - if (gain_mode == HILL_CLIMBING) { - VPR_FATAL_ERROR(VPR_ERROR_PACK, - "Hill climbing not supported yet, error out.\n"); - } - - // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); - } - - if (prioritize_transitive_connectivity) { - // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); - } - - // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); - } - } else { //Reverse order - // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); - } - - // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster - if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { - add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); - } - } - - /* Grab highest gain molecule */ - t_pack_molecule* molecule = nullptr; - if (cur_pb->pb_stats->num_feasible_blocks == 0) { - /* - * No suitable molecules were found from the above functions - if - * attraction groups were created, explore the attraction groups to see if - * any suitable molecules can be found. - */ - add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, atom_molecules, attraction_groups, - feasible_block_array_size, cluster_index, primitive_candidate_block_types); - } - - if (cur_pb->pb_stats->num_feasible_blocks > 0) { - cur_pb->pb_stats->num_feasible_blocks--; - int index = cur_pb->pb_stats->num_feasible_blocks; - molecule = cur_pb->pb_stats->feasible_blocks[index]; - VTR_ASSERT(molecule->valid == true); - return molecule; - } - - return molecule; -} - -/* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ -static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); - - cur_pb->pb_stats->num_feasible_blocks = 0; - cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ - - auto& atom_ctx = g_vpr_ctx.atom(); - - for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto rng = atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } - } -} - -/* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ -static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - /* Because the packer ignores high fanout nets when marking what blocks - * to consider, use one of the ignored high fanout net to fill up lightly - * related blocks */ - reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); - - AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; - - auto& atom_ctx = g_vpr_ctx.atom(); - - int count = 0; - for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { - break; - } - - AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); - - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto rng = atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); - count++; - } - } - } - } - } - cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ -} - -/* - * If the current cluster being packed has an attraction group associated with it - * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules - * from the associated attraction group to the list of feasible blocks for the cluster. - * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency - * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates - * will vary each time you call this function. - */ -static void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const int feasible_block_array_size, - ClusterBlockId clb_index, - std::map>& primitive_candidate_block_types) { - auto& atom_ctx = g_vpr_ctx.atom(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); - - /* - * For each cluster, we want to explore the attraction group molecules as potential - * candidates for the cluster a limited number of times. This limit is imposed because - * if the cluster belongs to a very large attraction group, we could potentially search - * through its attraction group molecules for a very long time. - * Defining a number of times to search through the attraction groups (i.e. number of - * attraction group pulls) determines how many times we search through the cluster's attraction - * group molecules for candidate molecules. - */ - int num_pulls = attraction_groups.get_att_group_pulls(); - if (cur_pb->pb_stats->pulled_from_atom_groups < num_pulls) { - cur_pb->pb_stats->pulled_from_atom_groups++; - } else { - return; - } - - AttractGroupId grp_id = cur_pb->pb_stats->attraction_grp_id; - if (grp_id == AttractGroupId::INVALID()) { - return; - } - - AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); - std::vector available_atoms; - for (AtomBlockId atom_id : group.group_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - available_atoms.push_back(atom_id); - } - } - - //int num_available_atoms = group.group_atoms.size(); - int num_available_atoms = available_atoms.size(); - if (num_available_atoms == 0) { - return; - } - - if (num_available_atoms < 500) { - //for (AtomBlockId atom_id : group.group_atoms) { - for (AtomBlockId atom_id : available_atoms) { - const auto& atom_model = atom_ctx.nlist.block_model(atom_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - auto rng = atom_molecules.equal_range(atom_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } - } - return; - } - - int min = 0; - int max = num_available_atoms - 1; - - for (int j = 0; j < 500; j++) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distr(min, max); - int selected_atom = distr(gen); - - //AtomBlockId blk_id = group.group_atoms[selected_atom]; - AtomBlockId blk_id = available_atoms[selected_atom]; - const auto& atom_model = atom_ctx.nlist.block_model(blk_id); - auto itr = primitive_candidate_block_types.find(atom_model); - VTR_ASSERT(itr != primitive_candidate_block_types.end()); - std::vector& candidate_types = itr->second; - - //Only consider molecules that are unpacked and of the correct type - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() - && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { - auto rng = atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); - } - } - } - } - } -} - -/* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ -static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, - t_cluster_placement_stats* cluster_placement_stats_ptr, - const std::multimap& atom_molecules, - vtr::vector>& clb_inter_blk_nets, - const ClusterBlockId cluster_index, - int transitive_fanout_threshold, - const int feasible_block_array_size, - AttractionInfo& attraction_groups) { - //TODO: For now, only done by fan-out; should also consider fan-in - - cur_pb->pb_stats->explore_transitive_fanout = false; - - /* First time finding transitive fanout candidates therefore alloc and load them */ - load_transitive_fanout_candidates(cluster_index, - atom_molecules, - cur_pb->pb_stats, - clb_inter_blk_nets, - transitive_fanout_threshold); - /* Only consider candidates that pass a very simple legality check */ - for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { - t_pack_molecule* molecule = transitive_candidate.second; - if (molecule->valid) { - bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); - if (success) { - add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); - } - } - } -} - -/*Check whether a free primitive exists for each atom block in the molecule*/ -static bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr) { - auto& atom_ctx = g_vpr_ctx.atom(); - bool success = true; - - for (int i_atom = 0; i_atom < get_array_size_of_molecule(molecule); i_atom++) { - if (molecule->atom_block_ids[i_atom]) { - VTR_ASSERT(atom_ctx.lookup.atom_clb(molecule->atom_block_ids[i_atom]) == ClusterBlockId::INVALID()); - auto blk_id2 = molecule->atom_block_ids[i_atom]; - if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id2)) { - /* TODO (Jason Luu): debating whether to check if placement exists for molecule - * (more robust) or individual atom blocks (faster)*/ - success = false; - break; - } - } - } - - return success; -} - -/*****************************************/ -static t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, - const std::multimap& atom_molecules, - AttractionInfo& attraction_groups, - const bool allow_unrelated_clustering, - const bool prioritize_transitive_connectivity, - const int transitive_fanout_threshold, - const int feasible_block_array_size, - int* num_unrelated_clustering_attempts, - t_cluster_placement_stats* cluster_placement_stats_ptr, - vtr::vector>& clb_inter_blk_nets, - ClusterBlockId cluster_index, - int verbosity, - std::map>& primitive_candidate_block_types) { - /* Finds the block with the greatest gain that satisfies the - * input, clock and capacity constraints of a cluster that are - * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). - */ - - VTR_ASSERT(cur_pb->is_root()); - - /* If cannot pack into primitive, try packing into cluster */ - - auto best_molecule = get_highest_gain_molecule(cur_pb, atom_molecules, attraction_groups, - NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, - cluster_index, prioritize_transitive_connectivity, - transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); - - /* If no blocks have any gain to the current cluster, the code above * - * will not find anything. However, another atom block with no inputs in * - * common with the cluster may still be inserted into the cluster. */ - - if (allow_unrelated_clustering) { - if (best_molecule == nullptr) { - if (*num_unrelated_clustering_attempts == 0) { - best_molecule = get_free_molecule_with_most_ext_inputs_for_cluster(cur_pb, - cluster_placement_stats_ptr); - (*num_unrelated_clustering_attempts)++; - VTR_LOGV(best_molecule && verbosity > 2, "\tFound unrelated molecule to cluster\n"); - } - } else { - *num_unrelated_clustering_attempts = 0; - } - } else { - VTR_LOGV(!best_molecule && verbosity > 2, "\tNo related molecule found and unrelated clustering disabled\n"); - } - - return best_molecule; -} - -static void mark_all_molecules_valid(t_pack_molecule* molecule_head) { - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - cur_molecule->valid = true; - } -} - -static int count_molecules(t_pack_molecule* molecule_head) { - int num_molecules = 0; - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - ++num_molecules; - } - return num_molecules; -} - -//Calculates molecule statistics for a single molecule -static t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { - t_molecule_stats molecule_stats; - - auto& atom_ctx = g_vpr_ctx.atom(); - - //Calculate the number of available pins on primitives within the molecule - for (auto blk : molecule->atom_block_ids) { - if (!blk) continue; - - ++molecule_stats.num_blocks; //Record number of valid blocks in molecule - - const t_model* model = atom_ctx.nlist.block_model(blk); - - for (const t_model_ports* input_port = model->inputs; input_port != nullptr; input_port = input_port->next) { - molecule_stats.num_input_pins += input_port->size; - } - - for (const t_model_ports* output_port = model->outputs; output_port != nullptr; output_port = output_port->next) { - molecule_stats.num_output_pins += output_port->size; - } - } - molecule_stats.num_pins = molecule_stats.num_input_pins + molecule_stats.num_output_pins; - - //Calculate the number of externally used pins - std::set molecule_atoms(molecule->atom_block_ids.begin(), molecule->atom_block_ids.end()); - for (auto blk : molecule->atom_block_ids) { - if (!blk) continue; - - for (auto pin : atom_ctx.nlist.block_pins(blk)) { - auto net = atom_ctx.nlist.pin_net(pin); - - auto pin_type = atom_ctx.nlist.pin_type(pin); - if (pin_type == PinType::SINK) { - auto driver_blk = atom_ctx.nlist.net_driver_block(net); - - if (molecule_atoms.count(driver_blk)) { - //Pin driven by a block within the molecule - //Does not count as an external connection - } else { - //Pin driven by a block outside the molecule - ++molecule_stats.num_used_ext_inputs; - } - - } else { - VTR_ASSERT(pin_type == PinType::DRIVER); - - bool net_leaves_molecule = false; - for (auto sink_pin : atom_ctx.nlist.net_sinks(net)) { - auto sink_blk = atom_ctx.nlist.pin_block(sink_pin); - - if (!molecule_atoms.count(sink_blk)) { - //There is at least one sink outside of the current molecule - net_leaves_molecule = true; - break; - } - } - - //We assume that any fanout occurs outside of the molecule, hence we only - //count one used output (even if there are multiple sinks outside the molecule) - if (net_leaves_molecule) { - ++molecule_stats.num_used_ext_outputs; - } - } - } - } - molecule_stats.num_used_ext_pins = molecule_stats.num_used_ext_inputs + molecule_stats.num_used_ext_outputs; - - return molecule_stats; -} - -//Calculates maximum molecule statistics accross all molecules in linked list -static t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head) { - t_molecule_stats max_molecules_stats; - - for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { - //Calculate per-molecule statistics - t_molecule_stats cur_molecule_stats = calc_molecule_stats(cur_molecule); - - //Record the maximums (member-wise) over all molecules - max_molecules_stats.num_blocks = std::max(max_molecules_stats.num_blocks, cur_molecule_stats.num_blocks); - - max_molecules_stats.num_pins = std::max(max_molecules_stats.num_pins, cur_molecule_stats.num_pins); - max_molecules_stats.num_input_pins = std::max(max_molecules_stats.num_input_pins, cur_molecule_stats.num_input_pins); - max_molecules_stats.num_output_pins = std::max(max_molecules_stats.num_output_pins, cur_molecule_stats.num_output_pins); - - max_molecules_stats.num_used_ext_pins = std::max(max_molecules_stats.num_used_ext_pins, cur_molecule_stats.num_used_ext_pins); - max_molecules_stats.num_used_ext_inputs = std::max(max_molecules_stats.num_used_ext_inputs, cur_molecule_stats.num_used_ext_inputs); - max_molecules_stats.num_used_ext_outputs = std::max(max_molecules_stats.num_used_ext_outputs, cur_molecule_stats.num_used_ext_outputs); - } - - return max_molecules_stats; -} - -static std::vector initialize_seed_atoms(const e_cluster_seed seed_type, - const std::multimap& atom_molecules, - const t_molecule_stats& max_molecule_stats, - const vtr::vector& atom_criticality) { - std::vector seed_atoms; - - //Put all atoms in seed list - auto& atom_ctx = g_vpr_ctx.atom(); - for (auto blk : atom_ctx.nlist.blocks()) { - seed_atoms.emplace_back(blk); - } - - //Initially all gains are zero - vtr::vector atom_gains(atom_ctx.nlist.blocks().size(), 0.); - - if (seed_type == e_cluster_seed::TIMING) { - VTR_ASSERT(atom_gains.size() == atom_criticality.size()); - - //By criticality - atom_gains = atom_criticality; - - } else if (seed_type == e_cluster_seed::MAX_INPUTS) { - //By number of used molecule input pins - for (auto blk : atom_ctx.nlist.blocks()) { - int max_molecule_inputs = 0; - auto molecule_rng = atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* blk_mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); - - //Keep the max over all molecules associated with the atom - max_molecule_inputs = std::max(max_molecule_inputs, molecule_stats.num_used_ext_inputs); - } - - atom_gains[blk] = max_molecule_inputs; - } - - } else if (seed_type == e_cluster_seed::BLEND) { - //By blended gain (criticality and inputs used) - for (auto blk : atom_ctx.nlist.blocks()) { - /* Score seed gain of each block as a weighted sum of timing criticality, - * number of tightly coupled blocks connected to it, and number of external inputs */ - float seed_blend_fac = 0.5; - float max_blend_gain = 0; - - auto molecule_rng = atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* blk_mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); - - VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); - - float blend_gain = (seed_blend_fac * atom_criticality[blk] - + (1 - seed_blend_fac) * (molecule_stats.num_used_ext_inputs / max_molecule_stats.num_used_ext_inputs)); - blend_gain *= (1 + 0.2 * (molecule_stats.num_blocks - 1)); - - //Keep the max over all molecules associated with the atom - max_blend_gain = std::max(max_blend_gain, blend_gain); - } - atom_gains[blk] = max_blend_gain; - } - - } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { - //By pins per molecule (i.e. available pins on primitives, not pins in use) - - for (auto blk : atom_ctx.nlist.blocks()) { - int max_molecule_pins = 0; - auto molecule_rng = atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(mol); - - //Keep the max over all molecules associated with the atom - int molecule_pins = 0; - if (seed_type == e_cluster_seed::MAX_PINS) { - //All pins - molecule_pins = molecule_stats.num_pins; - } else { - VTR_ASSERT(seed_type == e_cluster_seed::MAX_INPUT_PINS); - //Input pins only - molecule_pins = molecule_stats.num_input_pins; - } - - //Keep the max over all molecules associated with the atom - max_molecule_pins = std::max(max_molecule_pins, molecule_pins); - } - atom_gains[blk] = max_molecule_pins; - } - - } else if (seed_type == e_cluster_seed::BLEND2) { - for (auto blk : atom_ctx.nlist.blocks()) { - float max_gain = 0; - auto molecule_rng = atom_molecules.equal_range(blk); - for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { - const t_pack_molecule* mol = kv.second; - - const t_molecule_stats molecule_stats = calc_molecule_stats(mol); - - float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); - float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); - float output_pin_ratio = vtr::safe_ratio(molecule_stats.num_output_pins, max_molecule_stats.num_output_pins); - float used_ext_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_pins, max_molecule_stats.num_used_ext_pins); - float used_ext_input_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_inputs, max_molecule_stats.num_used_ext_inputs); - float used_ext_output_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_outputs, max_molecule_stats.num_used_ext_outputs); - float num_blocks_ratio = vtr::safe_ratio(molecule_stats.num_blocks, max_molecule_stats.num_blocks); - float criticality = atom_criticality[blk]; - - constexpr float PIN_WEIGHT = 0.; - constexpr float INPUT_PIN_WEIGHT = 0.5; - constexpr float OUTPUT_PIN_WEIGHT = 0.; - constexpr float USED_PIN_WEIGHT = 0.; - constexpr float USED_INPUT_PIN_WEIGHT = 0.2; - constexpr float USED_OUTPUT_PIN_WEIGHT = 0.; - constexpr float BLOCKS_WEIGHT = 0.2; - constexpr float CRITICALITY_WEIGHT = 0.1; - - float gain = PIN_WEIGHT * pin_ratio - + INPUT_PIN_WEIGHT * input_pin_ratio - + OUTPUT_PIN_WEIGHT * output_pin_ratio - - + USED_PIN_WEIGHT * used_ext_pin_ratio - + USED_INPUT_PIN_WEIGHT * used_ext_input_pin_ratio - + USED_OUTPUT_PIN_WEIGHT * used_ext_output_pin_ratio - - + BLOCKS_WEIGHT * num_blocks_ratio - + CRITICALITY_WEIGHT * criticality; - - max_gain = std::max(max_gain, gain); - } - - atom_gains[blk] = max_gain; - } - - } else { - VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unrecognized cluster seed type"); - } - - //Sort seeds in descending order of gain (i.e. highest gain first) - // - // Note that we use a *stable* sort here. It has been observed that different - // standard library implementations (e.g. gcc-4.9 vs gcc-5) use sorting algorithms - // which produce different orderings for seeds of equal gain (which is allowed with - // std::sort which does not specify how equal values are handled). Using a stable - // sort ensures that regardless of the underlying sorting algorithm the same seed - // order is produced regardless of compiler. - auto by_descending_gain = [&](const AtomBlockId lhs, const AtomBlockId rhs) { - return atom_gains[lhs] > atom_gains[rhs]; - }; - std::stable_sort(seed_atoms.begin(), seed_atoms.end(), by_descending_gain); - - if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES)) { - print_seed_gains(getEchoFileName(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES), seed_atoms, atom_gains, atom_criticality); - } - - return seed_atoms; -} - -static t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std::multimap& atom_molecules, const std::vector seed_atoms) { - auto& atom_ctx = g_vpr_ctx.atom(); - - while (*seedindex < static_cast(seed_atoms.size())) { - AtomBlockId blk_id = seed_atoms[(*seedindex)++]; - - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - t_pack_molecule* best = nullptr; - - auto rng = atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { - best = molecule; - } - } - } - VTR_ASSERT(best != nullptr); - return best; - } - } - - /*if it makes it to here , there are no more blocks available*/ - return nullptr; -} - -/* get gain of packing molecule into current cluster - * gain is equal to: - * total_block_gain - * + molecule_base_gain*some_factor - * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor - */ -static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { - float gain; - int i; - int num_introduced_inputs_of_indirectly_related_block; - auto& atom_ctx = g_vpr_ctx.atom(); - - gain = 0; - float attraction_group_penalty = 0.1; - - num_introduced_inputs_of_indirectly_related_block = 0; - for (i = 0; i < get_array_size_of_molecule(molecule); i++) { - auto blk_id = molecule->atom_block_ids[i]; - if (blk_id) { - if (blk_gain.count(blk_id) > 0) { - gain += blk_gain[blk_id]; - } else { - /* This block has no connection with current cluster, penalize molecule for having this block - */ - for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - VTR_ASSERT(net_id); - - auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - VTR_ASSERT(driver_pin_id); - - auto driver_blk_id = atom_ctx.nlist.pin_block(driver_pin_id); - - num_introduced_inputs_of_indirectly_related_block++; - for (int iblk = 0; iblk < get_array_size_of_molecule(molecule); iblk++) { - if (molecule->atom_block_ids[iblk] && driver_blk_id == molecule->atom_block_ids[iblk]) { - //valid block which is driver (and hence not an input) - num_introduced_inputs_of_indirectly_related_block--; - break; - } - } - } - } - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - if (atom_grp_id == cluster_attraction_group_id && cluster_attraction_group_id != AttractGroupId::INVALID()) { - float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); - gain += att_grp_gain; - } else if (cluster_attraction_group_id != AttractGroupId::INVALID() && atom_grp_id != cluster_attraction_group_id) { - gain -= attraction_group_penalty; - } - } - } - - gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ - gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); - - if (num_molecule_failures > 0 && attraction_groups.num_attraction_groups() > 0) { - gain -= 0.1 * num_molecule_failures; - } - - return gain; -} - -static int compare_molecule_gain(const void* a, const void* b) { - float base_gain_a, base_gain_b, diff; - const t_pack_molecule *molecule_a, *molecule_b; - molecule_a = (*(const t_pack_molecule* const*)a); - molecule_b = (*(const t_pack_molecule* const*)b); - - base_gain_a = molecule_a->base_gain; - base_gain_b = molecule_b->base_gain; - diff = base_gain_a - base_gain_b; - if (diff > 0) { - return 1; - } - if (diff < 0) { - return -1; - } - return 0; -} - -/* Determine if speculatively packed cur_pb is pin feasible - * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the - * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. - */ -static void try_update_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - // run recursively till a leaf (primitive) pb block is reached - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } else { - // find if this child (primitive) pb block has an atom mapped to it, - // if yes compute and mark lookahead pins used for that pb block - auto& atom_ctx = g_vpr_ctx.atom(); - AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); - if (pb_type->blif_model != nullptr && blk_id) { - compute_and_mark_lookahead_pins_used(blk_id); - } - } -} - -/* Resets nets used at different pin classes for determining pin feasibility */ -static void reset_lookahead_pins_used(t_pb* cur_pb) { - int i, j; - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - if (cur_pb->pb_stats == nullptr) { - return; /* No pins used, no need to continue */ - } - - if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { - for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); - } - - for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); - } - - if (cur_pb->child_pbs != nullptr) { - for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i] != nullptr) { - for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/* Determine if pins of speculatively packed pb are legal */ -static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); - VTR_ASSERT(cur_pb != nullptr); - - /* Walk through inputs, outputs, and clocks marking pins off of the same class */ - for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { - auto net_id = atom_ctx.nlist.pin_net(pin_id); - - const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id); - } -} - -/** - * Given a pin and its assigned net, mark all pin classes that are affected. - * Check if connecting this pin to it's driver pin or to all sink pins will - * require leaving a pb_block starting from the parent pb_block of the - * primitive till the root block (depth = 0). If leaving a pb_block is - * required add this net to the pin class (to increment the number of used - * pins from this class) that should be used to leave the pb_block. - */ -static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, const AtomNetId net_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - - // starting from the parent pb of the input primitive go up in the hierarchy till the root block - for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { - const auto depth = cur_pb->pb_graph_node->pb_type->depth; - const auto pin_class = pb_graph_pin->parent_pin_class[depth]; - VTR_ASSERT(pin_class != OPEN); - - const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); - - // if this primitive pin is an input pin - if (pb_graph_pin->port->type == IN_PORT) { - /* find location of net driver if exist in clb, NULL otherwise */ - // find the driver of the input net connected to the pin being studied - const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); - // find the id of the atom occupying the input primitive_pb - const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); - // find the pb block occupied by the driving atom - const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); - // pb_graph_pin driving net_id in the driver pb block - t_pb_graph_pin* output_pb_graph_pin = nullptr; - // if the driver block is in the same clb as the input primitive block - if (atom_ctx.lookup.atom_clb(driver_blk_id) == atom_ctx.lookup.atom_clb(prim_blk_id)) { - // get pb_graph_pin driving the given net - output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); - } - - bool is_reachable = false; - - // if the driver pin is within the cluster - if (output_pb_graph_pin) { - // find if the driver pin can reach the input pin of the primitive or not - const t_pb* check_pb = driver_pb; - while (check_pb && check_pb != cur_pb) { - check_pb = check_pb->parent_pb; - } - if (check_pb) { - for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { - if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { - is_reachable = true; - break; - } - } - } - } - - // Must use an input pin to connect the driver to the input pin of the given primitive, either the - // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin - if (!is_reachable) { - // add net to lookahead_input_pins_used if not already added - auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); - if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { - cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); - } - } - } else { - VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); - /* - * Determine if this net (which is driven from within this cluster) leaves this cluster - * (and hence uses an output pin). - */ - - bool net_exits_cluster = true; - int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); - - if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { - //It is possible the net is completely absorbed in the cluster, - //since this pin could (potentially) drive all the net's sinks - - /* Important: This runtime penalty looks a lot scarier than it really is. - * For high fan-out nets, I at most look at the number of pins within the - * cluster which limits runtime. - * - * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! - * - * Key Observation: - * For LUT-based designs it is impossible for the average fanout to exceed - * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, - * if the average fanout is greater than the number of LUT inputs, where do - * the extra connections go? Therefore, average fanout must be capped to a - * small constant where the constant is equal to the number of LUT inputs). - * The real danger to runtime is when the number of sinks of a net gets doubled - */ - - //Check if all the net sinks are, in fact, inside this cluster - bool all_sinks_in_cur_cluster = true; - ClusterBlockId driver_clb = atom_ctx.lookup.atom_clb(driver_blk_id); - for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { - auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); - if (atom_ctx.lookup.atom_clb(sink_blk_id) != driver_clb) { - all_sinks_in_cur_cluster = false; - break; - } - } - - if (all_sinks_in_cur_cluster) { - //All the sinks are part of this cluster, so the net may be fully absorbed. - // - //Verify this, by counting the number of net sinks reachable from the driver pin. - //If the count equals the number of net sinks then the net is fully absorbed and - //the net does not exit the cluster - /* TODO: I should cache the absorbed outputs, once net is absorbed, - * net is forever absorbed, no point in rechecking every time */ - if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { - //All the sinks are reachable inside the cluster - net_exits_cluster = false; - } - } - } - - if (net_exits_cluster) { - /* This output must exit this cluster */ - cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); - } - } - } -} - -int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { - size_t num_reachable_sinks = 0; - auto& atom_ctx = g_vpr_ctx.atom(); - - //Record the sink pb graph pins we are looking for - std::unordered_set sink_pb_gpins; - for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { - const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); - VTR_ASSERT(sink_pb_gpin); - - sink_pb_gpins.insert(sink_pb_gpin); - } - - //Count how many sink pins are reachable - for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { - const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; - - if (sink_pb_gpins.count(reachable_pb_gpin)) { - ++num_reachable_sinks; - if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { - return true; - } - } - } - - return false; -} - -/** - * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb - */ -static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { - auto& atom_ctx = g_vpr_ctx.atom(); - const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; - int output_port = 0; - // find the port of the pin driving the net as well as the port model - auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); - auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); - // find the port id of the port containing the driving pin in the driver_pb_type - for (int i = 0; i < driver_pb_type->num_ports; i++) { - auto& prim_port = driver_pb_type->ports[i]; - if (prim_port.type == OUT_PORT) { - if (prim_port.model_port == driver_model_port) { - // get the output pb_graph_pin driving this input net - return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); - } - output_port++; - } - } - // the pin should be found - VTR_ASSERT(false); - return nullptr; -} - -/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ -static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; - - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster inputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { - return false; - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; - if (cur_pb->is_root()) { - // Scale the class size by the maximum external pin utilization factor - // Use ceil to avoid classes of size 1 from being scaled to zero - class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); - // if the number of pins already used is larger than class size, then the number of - // cluster outputs already used should be our constraint. Why is this needed? This is - // needed since when packing the seed block the maximum external pin utilization is - // used as 1.0 allowing molecules that are using up to all the cluster inputs to be - // packed legally. Therefore, if the seed block is already using more inputs than - // the allowed maximum utilization, this should become the new maximum pin utilization. - class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); - } - - if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { - return false; - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) - return false; - } - } - } - } - } - - return true; -} - -/* Speculation successful, commit input/output pins used */ -static void commit_lookahead_pins_used(t_pb* cur_pb) { - const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; - - if (pb_type->num_modes > 0 && cur_pb->name) { - for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); - cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); - } - } - - for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); - for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { - VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); - cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); - } - } - - if (cur_pb->child_pbs) { - for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { - if (cur_pb->child_pbs[i]) { - for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { - commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); - } - } - } - } - } -} - -/** - * Score unclustered atoms that are two hops away from current cluster - * For example, consider a cluster that has a FF feeding an adder in another - * cluster. Since this FF is feeding an adder that is packed in another cluster - * this function should find other FFs that are feeding other inputs of this adder - * since they are two hops away from the FF packed in this cluster - */ -static void load_transitive_fanout_candidates(ClusterBlockId clb_index, - const std::multimap& atom_molecules, - t_pb_stats* pb_stats, - vtr::vector>& clb_inter_blk_nets, - int transitive_fanout_threshold) { - auto& atom_ctx = g_vpr_ctx.atom(); - - // iterate over all the nets that have pins in this cluster - for (const auto net_id : pb_stats->marked_nets) { - // only consider small nets to constrain runtime - if (int(atom_ctx.nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { - // iterate over all the pins of the net - for (const auto pin_id : atom_ctx.nlist.net_pins(net_id)) { - AtomBlockId atom_blk_id = atom_ctx.nlist.pin_block(pin_id); - // get the transitive cluster - ClusterBlockId tclb = atom_ctx.lookup.atom_clb(atom_blk_id); - // if the block connected to this pin is packed in another cluster - if (tclb != clb_index && tclb != ClusterBlockId::INVALID()) { - // explore transitive nets from already packed cluster - for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { - // iterate over all the pins of the net - for (AtomPinId tpin : atom_ctx.nlist.net_pins(tnet)) { - auto blk_id = atom_ctx.nlist.pin_block(tpin); - // This transitive atom is not packed, score and add - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; - - if (pb_stats->gain.count(blk_id) == 0) { - pb_stats->gain[blk_id] = 0.001; - } else { - pb_stats->gain[blk_id] += 0.001; - } - auto rng = atom_molecules.equal_range(blk_id); - for (const auto& kv : vtr::make_range(rng.first, rng.second)) { - t_pack_molecule* molecule = kv.second; - if (molecule->valid) { - transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); - } - } - } - } - } - } - } - } - } -} - -static std::map> identify_primitive_candidate_block_types() { - std::map> model_candidates; - auto& atom_ctx = g_vpr_ctx.atom(); - auto& atom_nlist = atom_ctx.nlist; - auto& device_ctx = g_vpr_ctx.device(); - - std::set unique_models; - for (auto blk : atom_nlist.blocks()) { - auto model = atom_nlist.block_model(blk); - unique_models.insert(model); - } - - for (auto model : unique_models) { - model_candidates[model] = {}; - - for (auto const& type : device_ctx.logical_block_types) { - if (block_type_contains_blif_model(&type, model->name)) { - model_candidates[model].push_back(&type); - } - } - } - - return model_candidates; -} - -static void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality) { - FILE* fp = vtr::fopen(fname, "w"); - - auto& atom_ctx = g_vpr_ctx.atom(); - - //For prett formatting determine the maximum name length - int max_name_len = strlen("atom_block_name"); - int max_type_len = strlen("atom_block_type"); - for (auto blk_id : atom_ctx.nlist.blocks()) { - max_name_len = std::max(max_name_len, (int)atom_ctx.nlist.block_name(blk_id).size()); - - const t_model* model = atom_ctx.nlist.block_model(blk_id); - max_type_len = std::max(max_type_len, (int)strlen(model->name)); - } - - fprintf(fp, "%-*s %-*s %8s %8s\n", max_name_len, "atom_block_name", max_type_len, "atom_block_type", "gain", "criticality"); - fprintf(fp, "\n"); - for (auto blk_id : seed_atoms) { - std::string name = atom_ctx.nlist.block_name(blk_id); - fprintf(fp, "%-*s ", max_name_len, name.c_str()); - - const t_model* model = atom_ctx.nlist.block_model(blk_id); - fprintf(fp, "%-*s ", max_type_len, model->name); - - fprintf(fp, "%*f ", std::max((int)strlen("gain"), 8), atom_gain[blk_id]); - fprintf(fp, "%*f ", std::max((int)strlen("criticality"), 8), atom_criticality[blk_id]); - fprintf(fp, "\n"); - } - - fclose(fp); -} - -/** - * This function takes a chain molecule, and the pb_graph_node that is chosen - * for packing the molecule's root block. Using the given root_primitive, this - * function will identify which chain id this molecule is being mapped to and - * will update the chain id value inside the chain info data structure of this - * molecule - */ -static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { - VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); - - auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; - - // long chains should only be placed at the beginning of the chain - // Since for long chains the molecule size is already equal to the - // total number of adders in the cluster. Therefore, it should - // always be placed at the very first adder in this cluster. - for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { - if (chain_root_pins[chainId][0]->parent_node == root_primitive) { - chain_molecule->chain_info->chain_id = chainId; - chain_molecule->chain_info->first_packed_molecule = chain_molecule; - return; - } - } - - VTR_ASSERT(false); -} - -/** - * This function takes the root block of a chain molecule and a proposed - * placement primitive for this block. The function then checks if this - * chain root block has a placement constraint (such as being driven from - * outside the cluster) and returns the status of the placement accordingly. - */ -static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, - const t_pack_molecule* molecule, - const AtomBlockId blk_id) { - enum e_block_pack_status block_pack_status = BLK_PASSED; - auto& atom_ctx = g_vpr_ctx.atom(); - - bool is_long_chain = molecule->chain_info->is_long_chain; - - const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; - - t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; - AtomNetId chain_net_id; - auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); - - if (port_id) { - chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); - } - - // if this block is part of a long chain or it is driven by a cluster - // input pin we need to check the placement legality of this block - // Depending on the logic synthesis even small chains that can fit within one - // cluster might need to start at the top of the cluster as their input can be - // driven by a global gnd or vdd. Therefore even if this is not a long chain - // but its input pin is driven by a net, the placement legality is checked. - if (is_long_chain || chain_net_id) { - auto chain_id = molecule->chain_info->chain_id; - // if this chain has a chain id assigned to it (implies is_long_chain too) - if (chain_id != -1) { - // the chosen primitive should be a valid starting point for the chain - // long chains should only be placed at the top of the chain tieOff = 0 - if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { - block_pack_status = BLK_FAILED_FEASIBLE; - } - // the chain doesn't have an assigned chain_id yet - } else { - block_pack_status = BLK_FAILED_FEASIBLE; - for (const auto& chain : chain_root_pins) { - for (size_t tieOff = 0; tieOff < chain.size(); tieOff++) { - // check if this chosen primitive is one of the possible - // starting points for this chain. - if (pb_graph_node == chain[tieOff]->parent_node) { - // this location matches with the one of the dedicated chain - // input from outside logic block, therefore it is feasible - block_pack_status = BLK_PASSED; - break; - } - // long chains should only be placed at the top of the chain tieOff = 0 - if (is_long_chain) break; - } - } - } - } - - return block_pack_status; -} - -/** - * This function update the pb_type_count data structure by incrementing - * the number of used pb_types in the given packed cluster t_pb - */ -static size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { - size_t max_depth = depth; - - t_pb_graph_node* pb_graph_node = pb->pb_graph_node; - t_pb_type* pb_type = pb_graph_node->pb_type; - t_mode* mode = &pb_type->modes[pb->mode]; - std::string pb_type_name(pb_type->name); - - pb_type_count[pb_type]++; - - if (pb_type->num_modes > 0) { - for (int i = 0; i < mode->num_pb_type_children; i++) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { - if (pb->child_pbs[i] && pb->child_pbs[i][j].name) { - size_t child_depth = update_pb_type_count(&pb->child_pbs[i][j], pb_type_count, depth + 1); - - max_depth = std::max(max_depth, child_depth); - } - } - } - } - return max_depth; -} - -/** - * Print the total number of used physical blocks for each pb type in the architecture - */ -void print_pb_type_count(const ClusteredNetlist& clb_nlist) { - auto& device_ctx = g_vpr_ctx.device(); - - std::map pb_type_count; - - size_t max_depth = 0; - for (ClusterBlockId blk : clb_nlist.blocks()) { - size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0); - - max_depth = std::max(max_depth, pb_max_depth); - } - - size_t max_pb_type_name_chars = 0; - for (auto& pb_type : pb_type_count) { - max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name)); - } - - VTR_LOG("\nPb types usage...\n"); - for (const auto& logical_block_type : device_ctx.logical_block_types) { - if (!logical_block_type.pb_type) continue; - - print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count); + print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count); } VTR_LOG("\n"); } - -static void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count) { - std::string display_name(curr_depth, ' '); //Indent by depth - display_name += pb_type->name; - - if (pb_type_count.count(pb_type)) { - VTR_LOG(" %-*s : %d\n", max_name_chars, display_name.c_str(), pb_type_count[pb_type]); - } - - //Recurse - for (int imode = 0; imode < pb_type->num_modes; ++imode) { - t_mode* mode = &pb_type->modes[imode]; - for (int ichild = 0; ichild < mode->num_pb_type_children; ++ichild) { - t_pb_type* child_pb_type = &mode->pb_type_children[ichild]; - - print_pb_type_count_recurr(child_pb_type, max_name_chars, curr_depth + 1, pb_type_count); - } - } -} - -/** - * This function identifies the logic block type which is - * defined by the block type which has a lut primitive - */ -static t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { - std::string lut_name = ".names"; - - for (auto& model : primitive_candidate_block_types) { - std::string model_name(model.first->name); - if (model_name == lut_name) - return model.second[0]; - } - - return nullptr; -} - -/** - * This function returns the pb_type that is similar to Logic Element (LE) in an FPGA - * The LE is defined as a physical block that contains a LUT primitive and - * is found by searching a cluster type to find the first pb_type (from the top - * of the hierarchy clb->LE) that has more than one instance within the cluster. - */ -static t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { - // if there is no CLB-like cluster, then there is no LE pb_block - if (!logic_block_type) - return nullptr; - - // search down the hierarchy starting from the pb_graph_head - auto pb_graph_node = logic_block_type->pb_graph_head; - - while (pb_graph_node->child_pb_graph_nodes) { - // if this pb_graph_node has more than one mode or more than one pb_type in the default mode return - // nullptr since the logic block of this architecture is not a CLB-like logic block - if (pb_graph_node->pb_type->num_modes > 1 || pb_graph_node->pb_type->modes[0].num_pb_type_children > 1) - return nullptr; - // explore the only child of this pb_graph_node - pb_graph_node = &pb_graph_node->child_pb_graph_nodes[0][0][0]; - // if the child node has more than one instance in the - // cluster then this is the pb_type similar to a LE - if (pb_graph_node->pb_type->num_pb > 1) - return pb_graph_node->pb_type; - } - - return nullptr; -} - -/** - * This function updates the le_count data structure from the given packed cluster - */ -static void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { - // if this cluster doesn't contain LEs or there - // are no les in this architecture, ignore it - if (!logic_block_type || pb->pb_graph_node != logic_block_type->pb_graph_head || !le_pb_type) - return; - - const std::string lut(".names"); - const std::string ff(".latch"); - const std::string adder("adder"); - - auto parent_pb = pb; - - // go down the hierarchy till the parent physical block of the LE is found - while (parent_pb->child_pbs[0][0].pb_graph_node->pb_type != le_pb_type) { - parent_pb = &parent_pb->child_pbs[0][0]; - } - - // iterate over all the LEs and update the LE count accordingly - for (int ile = 0; ile < parent_pb->get_num_children_of_type(0); ile++) { - if (!parent_pb->child_pbs[0][ile].name) - continue; - - auto has_used_lut = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], lut); - auto has_used_adder = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], adder); - auto has_used_ff = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], ff); - - // First type of LEs: used for logic and registers - if ((has_used_lut || has_used_adder) && has_used_ff) { - le_count[0]++; - // Second type of LEs: used for logic only - } else if (has_used_lut || has_used_adder) { - le_count[1]++; - // Third type of LEs: used for registers only - } else if (has_used_ff) { - le_count[2]++; - } - } -} - -/** - * This function returns true if the given physical block has - * a primitive matching the given blif model and is used - */ -static bool pb_used_for_blif_model(const t_pb* pb, std::string blif_model_name) { - auto pb_graph_node = pb->pb_graph_node; - auto pb_type = pb_graph_node->pb_type; - auto mode = &pb_type->modes[pb->mode]; - - // if this is a primitive check if it matches the given blif model name - if (pb_type->blif_model) { - if (blif_model_name == pb_type->blif_model || ".subckt " + blif_model_name == pb_type->blif_model) { - return true; - } - } - - if (pb_type->num_modes > 0) { - for (int i = 0; i < mode->num_pb_type_children; i++) { - for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { - if (pb->child_pbs[i] && pb->child_pbs[i][j].name) { - if (pb_used_for_blif_model(&pb->child_pbs[i][j], blif_model_name)) { - return true; - } - } - } - } - } - - return false; -} - -/** - * Print the LE count data strurture - */ -static void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { - VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); - VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); - VTR_LOG(" LEs used for logic and registers : %d\n", le_count[0]); - VTR_LOG(" LEs used for logic only : %d\n", le_count[1]); - VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); -} - -/** - * Given a pointer to a pb in a cluster, this routine returns - * a pointer to the top-level pb of the given pb. - * This is needed when updating the gain for a cluster. - */ -static t_pb* get_top_level_pb(t_pb* pb) { - t_pb* top_level_pb = pb; - - while (pb) { - top_level_pb = pb; - pb = pb->parent_pb; - } - - VTR_ASSERT(top_level_pb != nullptr); - - return top_level_pb; -} diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index f63c0a0eab5..a9f2c1df689 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -8,6 +8,8 @@ #include "physical_types.h" #include "vpr_types.h" #include "atom_netlist_fwd.h" +#include "attraction_groups.h" +#include "cluster_util.h" std::map do_clustering(const t_packer_opts& packer_opts, const t_analysis_opts& analysis_opts, @@ -15,7 +17,6 @@ std::map do_clustering(const t_packer_opts& pa t_pack_molecule* molecule_head, int num_models, const std::unordered_set& is_clock, - std::multimap& atom_molecules, const std::unordered_map& expected_lowest_cost_pb_gnode, bool allow_unrelated_clustering, bool balance_block_type_utilization, @@ -23,10 +24,10 @@ std::map do_clustering(const t_packer_opts& pa const t_ext_pin_util_targets& ext_pin_util_targets, const t_pack_high_fanout_thresholds& high_fanout_thresholds, AttractionInfo& attraction_groups, - bool& floorplan_regions_overfull); + bool& floorplan_regions_overfull, + t_clustering_data& clustering_data); int get_cluster_of_block(int blkidx); void print_pb_type_count(const ClusteredNetlist& clb_nlist); - #endif diff --git a/vpr/src/pack/cluster_placement.cpp b/vpr/src/pack/cluster_placement.cpp index 36a78bec6e8..c22d97fea05 100644 --- a/vpr/src/pack/cluster_placement.cpp +++ b/vpr/src/pack/cluster_placement.cpp @@ -217,47 +217,6 @@ void reset_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_ cluster_placement_stats->has_long_chain = false; } -/** - * Free linked lists found in cluster_placement_stats_list - */ -void free_cluster_placement_stats(t_cluster_placement_stats* cluster_placement_stats_list) { - t_cluster_placement_primitive *cur, *next; - auto& device_ctx = g_vpr_ctx.device(); - - for (const auto& type : device_ctx.logical_block_types) { - int index = type.index; - cur = cluster_placement_stats_list[index].tried; - while (cur != nullptr) { - next = cur->next_primitive; - free(cur); - cur = next; - } - cur = cluster_placement_stats_list[index].in_flight; - while (cur != nullptr) { - next = cur->next_primitive; - free(cur); - cur = next; - } - cur = cluster_placement_stats_list[index].invalid; - while (cur != nullptr) { - next = cur->next_primitive; - free(cur); - cur = next; - } - for (int j = 0; j < cluster_placement_stats_list[index].num_pb_types; j++) { - cur = cluster_placement_stats_list[index].valid_primitives[j]->next_primitive; - while (cur != nullptr) { - next = cur->next_primitive; - free(cur); - cur = next; - } - free(cluster_placement_stats_list[index].valid_primitives[j]); - } - free(cluster_placement_stats_list[index].valid_primitives); - } - free(cluster_placement_stats_list); -} - /** * Put primitive back on queue of valid primitives * Note that valid status is not changed because if the primitive is not valid, it will get properly collected later diff --git a/vpr/src/pack/cluster_placement.h b/vpr/src/pack/cluster_placement.h index 4d0125a0697..8715e611222 100644 --- a/vpr/src/pack/cluster_placement.h +++ b/vpr/src/pack/cluster_placement.h @@ -18,8 +18,6 @@ void set_mode_cluster_placement_stats(const t_pb_graph_node* complex_block, int mode); void reset_cluster_placement_stats( t_cluster_placement_stats* cluster_placement_stats); -void free_cluster_placement_stats( - t_cluster_placement_stats* cluster_placement_stats); int get_array_size_of_molecule(const t_pack_molecule* molecule); bool exists_free_primitive_for_atom_block( diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 3cc1bec440f..3e2528ac0dc 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -4,6 +4,13 @@ #include "cluster_placement.h" #include "output_clustering.h" +#include "vtr_math.h" +#include "SetupGrid.h" + +/**********************************/ +/* Global variables in clustering */ +/**********************************/ + /* TODO: May want to check that all atom blocks are actually reached */ static void check_cluster_atom_blocks(t_pb* pb, std::unordered_set& blocks_checked) { int i, j; @@ -221,33 +228,24 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, //Free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, - vtr::vector*>& intra_lb_routing, - int* hill_climbing_inputs_avail, - t_cluster_placement_stats* cluster_placement_stats, - t_molecule_link* unclustered_list_head, - t_molecule_link* memory_pool, - t_pb_graph_node** primitives_list) { + t_clustering_data& clustering_data) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); for (auto blk_id : cluster_ctx.clb_nlist.blocks()) - free_intra_lb_nets(intra_lb_routing[blk_id]); + free_intra_lb_nets(clustering_data.intra_lb_routing[blk_id]); - intra_lb_routing.clear(); + clustering_data.intra_lb_routing.clear(); if (packer_opts.hill_climbing_flag) - free(hill_climbing_inputs_avail); - - free_cluster_placement_stats(cluster_placement_stats); + free(clustering_data.hill_climbing_inputs_avail); for (auto blk_id : cluster_ctx.clb_nlist.blocks()) cluster_ctx.clb_nlist.remove_block(blk_id); cluster_ctx.clb_nlist = ClusteredNetlist(); - free(unclustered_list_head); - free(memory_pool); - - free(primitives_list); + free(clustering_data.unclustered_list_head); + free(clustering_data.memory_pool); } //check the clustering and output it @@ -255,8 +253,7 @@ void check_and_output_clustering(const t_packer_opts& packer_opts, const std::unordered_set& is_clock, const t_arch* arch, const int& num_clb, - const vtr::vector*>& intra_lb_routing, - bool& floorplan_regions_overfull) { + const vtr::vector*>& intra_lb_routing) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); VTR_ASSERT(num_clb == (int)cluster_ctx.clb_nlist.blocks().size()); @@ -268,9 +265,6 @@ void check_and_output_clustering(const t_packer_opts& packer_opts, output_clustering(intra_lb_routing, packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), false); - //check_floorplan_regions(floorplan_regions_overfull); - floorplan_regions_overfull = floorplan_constraints_regions_overfull(); - VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); } @@ -318,3 +312,3362 @@ bool check_cluster_legality(const int& verbosity, } return is_cluster_legal; } + +/*print the header for the clustering progress table*/ +void print_pack_status_header() { + VTR_LOG("Starting Clustering - Clustering Progress: \n"); + VTR_LOG("------------------- -------------------------- ---------\n"); + VTR_LOG("Molecules processed Number of clusters created FPGA size\n"); + VTR_LOG("------------------- -------------------------- ---------\n"); +} + +/*incrementally print progress updates during clustering*/ +void print_pack_status(int num_clb, + int tot_num_molecules, + int num_molecules_processed, + int& mols_since_last_print, + int device_width, + int device_height, + AttractionInfo& attraction_groups) { + //Print a packing update each time another 4% of molecules have been packed. + const float print_frequency = 0.04; + + double percentage = (num_molecules_processed / (double)tot_num_molecules) * 100; + + int int_percentage = int(percentage); + + int int_molecule_increment = (int)(print_frequency * tot_num_molecules); + + if (mols_since_last_print == int_molecule_increment) { + VTR_LOG( + "%6d/%-6d %3d%% " + "%26d " + "%3d x %-3d ", + num_molecules_processed, + tot_num_molecules, + int_percentage, + num_clb, + device_width, + device_height); + + VTR_LOG("\n"); + fflush(stdout); + mols_since_last_print = 0; + if (attraction_groups.num_attraction_groups() > 0) { + rebuild_attraction_groups(attraction_groups); + } + } +} + +/* + * Periodically rebuild the attraction groups to reflect which atoms in them + * are still available for new clusters (i.e. remove the atoms that have already + * been packed from the attraction group). + */ +void rebuild_attraction_groups(AttractionInfo& attraction_groups) { + auto& atom_ctx = g_vpr_ctx.atom(); + + for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { + AttractGroupId group_id(igroup); + AttractionGroup& group = attraction_groups.get_attraction_group_info(group_id); + AttractionGroup new_att_group_info; + + for (AtomBlockId atom : group.group_atoms) { + //If the ClusterBlockId is anything other than invalid, the atom has been packed already + if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { + new_att_group_info.group_atoms.push_back(atom); + } + } + + attraction_groups.set_attraction_group_info(group_id, new_att_group_info); + } +} + +/* Determine if atom block is in pb */ +bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { + auto& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + while (cur_pb) { + if (cur_pb == pb) { + return true; + } + cur_pb = cur_pb->parent_pb; + } + return false; +} + +/* Remove blk from list of feasible blocks sorted according to gain + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it.*/ +void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, + t_pb* pb) { + int molecule_index; + bool found_molecule = false; + + //find the molecule index + for (int i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { + if (pb->pb_stats->feasible_blocks[i] == molecule) { + found_molecule = true; + molecule_index = i; + } + } + + //if it is not in the array, return + if (found_molecule == false) { + return; + } + + //Otherwise, shift the molecules while removing the specified molecule + for (int j = molecule_index; j < pb->pb_stats->num_feasible_blocks - 1; j++) { + pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; + } + pb->pb_stats->num_feasible_blocks--; +} + +/* Add blk to list of feasible blocks sorted according to gain */ +void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, + std::map& gain, + t_pb* pb, + int max_queue_size, + AttractionInfo& attraction_groups) { + int i, j; + int num_molecule_failures = 0; + + AttractGroupId cluster_att_grp = pb->pb_stats->attraction_grp_id; + + /* When the clusterer packs with attraction groups the goal is to + * pack more densely. Removing failed molecules to make room for the exploration of + * more molecules helps to achieve this purpose. + */ + if (attraction_groups.num_attraction_groups() > 0) { + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + num_molecule_failures = 0; + } else { + num_molecule_failures = got->second; + } + + if (num_molecule_failures > 0) { + remove_molecule_from_pb_stats_candidates(molecule, pb); + return; + } + } + + for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { + if (pb->pb_stats->feasible_blocks[i] == molecule) { + return; // already in queue, do nothing + } + } + + if (pb->pb_stats->num_feasible_blocks >= max_queue_size - 1) { + /* maximum size for array, remove smallest gain element and sort */ + if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { + /* single loop insertion sort */ + for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) { + if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) <= get_molecule_gain(pb->pb_stats->feasible_blocks[j + 1], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { + pb->pb_stats->feasible_blocks[j] = molecule; + break; + } else { + pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; + } + } + if (j == pb->pb_stats->num_feasible_blocks - 1) { + pb->pb_stats->feasible_blocks[j] = molecule; + } + } + } else { + /* Expand array and single loop insertion sort */ + for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) { + if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { + pb->pb_stats->feasible_blocks[j + 1] = pb->pb_stats->feasible_blocks[j]; + } else { + pb->pb_stats->feasible_blocks[j + 1] = molecule; + break; + } + } + if (j < 0) { + pb->pb_stats->feasible_blocks[0] = molecule; + } + pb->pb_stats->num_feasible_blocks++; + } +} + +/*****************************************/ +void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, + t_cluster_placement_stats** cluster_placement_stats, + t_pb_graph_node*** primitives_list, + t_pack_molecule* molecules_head, + t_clustering_data& clustering_data, + std::unordered_map& net_output_feeds_driving_block_input, + int& unclustered_list_head_size, + int num_molecules) { + /* Allocates the main data structures used for clustering and properly * + * initializes them. */ + + t_molecule_link* next_ptr; + t_pack_molecule* cur_molecule; + t_pack_molecule** molecule_array; + int max_molecule_size; + + /* alloc and load list of molecules to pack */ + clustering_data.unclustered_list_head = (t_molecule_link*)vtr::calloc(max_molecule_stats.num_used_ext_inputs + 1, sizeof(t_molecule_link)); + unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1; + + for (int i = 0; i <= max_molecule_stats.num_used_ext_inputs; i++) { + clustering_data.unclustered_list_head[i].next = nullptr; + } + + molecule_array = (t_pack_molecule**)vtr::malloc(num_molecules * sizeof(t_pack_molecule*)); + cur_molecule = molecules_head; + for (int i = 0; i < num_molecules; i++) { + VTR_ASSERT(cur_molecule != nullptr); + molecule_array[i] = cur_molecule; + cur_molecule = cur_molecule->next; + } + VTR_ASSERT(cur_molecule == nullptr); + qsort((void*)molecule_array, num_molecules, sizeof(t_pack_molecule*), + compare_molecule_gain); + + clustering_data.memory_pool = (t_molecule_link*)vtr::malloc(num_molecules * sizeof(t_molecule_link)); + next_ptr = clustering_data.memory_pool; + + for (int i = 0; i < num_molecules; i++) { + //Figure out how many external inputs are used by this molecule + t_molecule_stats molecule_stats = calc_molecule_stats(molecule_array[i]); + int ext_inps = molecule_stats.num_used_ext_inputs; + + //Insert the molecule into the unclustered lists by number of external inputs + next_ptr->moleculeptr = molecule_array[i]; + next_ptr->next = clustering_data.unclustered_list_head[ext_inps].next; + clustering_data.unclustered_list_head[ext_inps].next = next_ptr; + + next_ptr++; + } + free(molecule_array); + + /* load net info */ + auto& atom_ctx = g_vpr_ctx.atom(); + for (AtomNetId net : atom_ctx.nlist.nets()) { + AtomPinId driver_pin = atom_ctx.nlist.net_driver(net); + AtomBlockId driver_block = atom_ctx.nlist.pin_block(driver_pin); + + for (AtomPinId sink_pin : atom_ctx.nlist.net_sinks(net)) { + AtomBlockId sink_block = atom_ctx.nlist.pin_block(sink_pin); + + if (driver_block == sink_block) { + net_output_feeds_driving_block_input[net]++; + } + } + } + + /* alloc and load cluster placement info */ + *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); + + /* alloc array that will store primitives that a molecule gets placed to, + * primitive_list is referenced by index, for example a atom block in index 2 of a molecule matches to a primitive in index 2 in primitive_list + * this array must be the size of the biggest molecule + */ + max_molecule_size = 1; + cur_molecule = molecules_head; + while (cur_molecule != nullptr) { + if (cur_molecule->num_blocks > max_molecule_size) { + max_molecule_size = cur_molecule->num_blocks; + } + cur_molecule = cur_molecule->next; + } + *primitives_list = (t_pb_graph_node**)vtr::calloc(max_molecule_size, sizeof(t_pb_graph_node*)); +} + +/*****************************************/ +void free_pb_stats_recursive(t_pb* pb) { + int i, j; + /* Releases all the memory used by clustering data structures. */ + if (pb) { + if (pb->pb_graph_node != nullptr) { + if (!pb->pb_graph_node->is_primitive()) { + for (i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) { + for (j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) { + if (pb->child_pbs && pb->child_pbs[i]) { + free_pb_stats_recursive(&pb->child_pbs[i][j]); + } + } + } + } + } + free_pb_stats(pb); + } +} + +bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) { + const t_pb_type* cur_pb_type = cur_pb->pb_graph_node->pb_type; + + VTR_ASSERT(cur_pb_type->num_modes == 0); /* primitive */ + + auto& atom_ctx = g_vpr_ctx.atom(); + AtomBlockId cur_pb_blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (cur_pb_blk_id && cur_pb_blk_id != blk_id) { + /* This pb already has a different logical block */ + return false; + } + + if (cur_pb_type->class_type == MEMORY_CLASS) { + /* Memory class has additional feasibility requirements: + * - all siblings must share all nets, including open nets, with the exception of data nets */ + + /* find sibling if one exists */ + AtomBlockId sibling_memory_blk_id = find_memory_sibling(cur_pb); + + if (sibling_memory_blk_id) { + //There is a sibling, see if the current block is feasible with it + bool sibling_feasible = primitive_memory_sibling_feasible(blk_id, cur_pb_type, sibling_memory_blk_id); + if (!sibling_feasible) { + return false; + } + } + } + + //Generic feasibility check + return primitive_type_feasible(blk_id, cur_pb_type); +} + +bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_blk_id) { + /* Check that the two atom blocks blk_id and sibling_blk_id (which should both be memory slices) + * are feasible, in the sence that they have precicely the same net connections (with the + * exception of nets in data port classes). + * + * Note that this routine does not check pin feasibility against the cur_pb_type; so + * primitive_type_feasible() should also be called on blk_id before concluding it is feasible. + */ + auto& atom_ctx = g_vpr_ctx.atom(); + VTR_ASSERT(cur_pb_type->class_type == MEMORY_CLASS); + + //First, identify the 'data' ports by looking at the cur_pb_type + std::unordered_set data_ports; + for (int iport = 0; iport < cur_pb_type->num_ports; ++iport) { + const char* port_class = cur_pb_type->ports[iport].port_class; + if (port_class && strstr(port_class, "data") == port_class) { + //The port_class starts with "data", so it is a data port + + //Record the port + data_ports.insert(cur_pb_type->ports[iport].model_port); + } + } + + //Now verify that all nets (except those connected to data ports) are equivalent + //between blk_id and sibling_blk_id + + //Since the atom netlist stores only in-use ports, we iterate over the model to ensure + //all ports are compared + const t_model* model = cur_pb_type->model; + for (t_model_ports* port : {model->inputs, model->outputs}) { + for (; port; port = port->next) { + if (data_ports.count(port)) { + //Don't check data ports + continue; + } + + //Note: VPR doesn't support multi-driven nets, so all outputs + //should be data ports, otherwise the siblings will both be + //driving the output net + + //Get the ports from each primitive + auto blk_port_id = atom_ctx.nlist.find_atom_port(blk_id, port); + auto sib_port_id = atom_ctx.nlist.find_atom_port(sibling_blk_id, port); + + //Check that all nets (including unconnected nets) match + for (int ipin = 0; ipin < port->size; ++ipin) { + //The nets are initialized as invalid (i.e. disconnected) + AtomNetId blk_net_id; + AtomNetId sib_net_id; + + //We can get the actual net provided the port exists + // + //Note that if the port did not exist, the net is left + //as invalid/disconneced + if (blk_port_id) { + blk_net_id = atom_ctx.nlist.port_net(blk_port_id, ipin); + } + if (sib_port_id) { + sib_net_id = atom_ctx.nlist.port_net(sib_port_id, ipin); + } + + //The sibling and block must have the same (possibly disconnected) + //net on this pin + if (blk_net_id != sib_net_id) { + //Nets do not match, not feasible + return false; + } + } + } + } + + return true; +} + +/*****************************************/ +t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, + const enum e_removal_policy remove_flag, + t_cluster_placement_stats* cluster_placement_stats_ptr, + t_molecule_link* unclustered_list_head) { + /* This routine returns an atom block which has not been clustered, has * + * no connection to the current cluster, satisfies the cluster * + * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, + * and has ext_inps external inputs. If * + * there is no such atom block it returns ClusterBlockId::INVALID(). Remove_flag * + * controls whether or not blocks that have already been clustered * + * are removed from the unclustered_list data structures. NB: * + * to get a atom block regardless of clock constraints just set clocks_ * + * avail > 0. */ + + t_molecule_link *ptr, *prev_ptr; + int i; + bool success; + + prev_ptr = &unclustered_list_head[ext_inps]; + ptr = unclustered_list_head[ext_inps].next; + while (ptr != nullptr) { + /* TODO: Get better candidate atom block in future, eg. return most timing critical or some other smarter metric */ + if (ptr->moleculeptr->valid) { + success = true; + for (i = 0; i < get_array_size_of_molecule(ptr->moleculeptr); i++) { + if (ptr->moleculeptr->atom_block_ids[i]) { + auto blk_id = ptr->moleculeptr->atom_block_ids[i]; + if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id)) { + /* TODO: I should be using a better filtering check especially when I'm + * dealing with multiple clock/multiple global reset signals where the clock/reset + * packed in matters, need to do later when I have the circuits to check my work */ + success = false; + break; + } + } + } + if (success == true) { + return ptr->moleculeptr; + } + prev_ptr = ptr; + } + + else if (remove_flag == REMOVE_CLUSTERED) { + VTR_ASSERT(0); /* this doesn't work right now with 2 the pass packing for each complex block */ + prev_ptr->next = ptr->next; + } + + ptr = ptr->next; + } + + return nullptr; +} + +/*****************************************/ +t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size) { + /* This routine is used to find new blocks for clustering when there are no feasible * + * blocks with any attraction to the current cluster (i.e. it finds * + * blocks which are unconnected from the current cluster). It returns * + * the atom block with the largest number of used inputs that satisfies the * + * clocking and number of inputs constraints. If no suitable atom block is * + * found, the routine returns ClusterBlockId::INVALID(). + * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count + */ + + int inputs_avail = 0; + + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + inputs_avail += cur_pb->pb_stats->input_pins_used[i].size(); + } + + t_pack_molecule* molecule = nullptr; + + if (inputs_avail >= unclustered_list_head_size) { + inputs_avail = unclustered_list_head_size - 1; + } + + for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { + molecule = get_molecule_by_num_ext_inputs(ext_inps, LEAVE_CLUSTERED, cluster_placement_stats_ptr, unclustered_list_head); + if (molecule != nullptr) { + break; + } + } + return molecule; +} + +/*****************************************/ +void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size) { + /* Call this routine when starting to fill up a new cluster. It resets * + * the gain vector, etc. */ + + pb->pb_stats = new t_pb_stats; + + /* If statement below is for speed. If nets are reasonably low-fanout, * + * only a relatively small number of blocks will be marked, and updating * + * only those atom block structures will be fastest. If almost all blocks * + * have been touched it should be faster to just run through them all * + * in order (less addressing and better cache locality). */ + pb->pb_stats->input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->lookahead_input_pins_used = std::vector>(pb->pb_graph_node->num_input_pin_class); + pb->pb_stats->lookahead_output_pins_used = std::vector>(pb->pb_graph_node->num_output_pin_class); + pb->pb_stats->num_feasible_blocks = NOT_VALID; + pb->pb_stats->feasible_blocks = (t_pack_molecule**)vtr::calloc(feasible_block_array_size, sizeof(t_pack_molecule*)); + + pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); + + pb->pb_stats->pulled_from_atom_groups = 0; + pb->pb_stats->num_att_group_atoms_used = 0; + + pb->pb_stats->gain.clear(); + pb->pb_stats->timinggain.clear(); + pb->pb_stats->connectiongain.clear(); + pb->pb_stats->sharinggain.clear(); + pb->pb_stats->hillgain.clear(); + pb->pb_stats->transitive_fanout_candidates.clear(); + + pb->pb_stats->num_pins_of_net_in_pb.clear(); + + pb->pb_stats->num_child_blocks_in_pb = 0; + + pb->pb_stats->explore_transitive_fanout = true; +} +/*****************************************/ + +/** + * Cleans up a pb after unsuccessful molecule packing + * + * Recursively frees pbs from a t_pb tree. The given root pb itself is not + * deleted. + * + * If a pb object has its children allocated then before freeing them the + * function checks if there is no atom that corresponds to any of them. The + * check is performed only for leaf (primitive) pbs. The function recurses for + * non-primitive pbs. + * + * The cleaning itself includes deleting all child pbs, resetting mode of the + * pb and also freeing its name. This prepares the pb for another round of + * molecule packing tryout. + */ +bool cleanup_pb(t_pb* pb) { + bool can_free = true; + + /* Recursively check if there are any children with already assigned atoms */ + if (pb->child_pbs != nullptr) { + const t_mode* mode = &pb->pb_graph_node->pb_type->modes[pb->mode]; + VTR_ASSERT(mode != nullptr); + + /* Check each mode */ + for (int i = 0; i < mode->num_pb_type_children; ++i) { + /* Check each child */ + if (pb->child_pbs[i] != nullptr) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; ++j) { + t_pb* pb_child = &pb->child_pbs[i][j]; + t_pb_type* pb_type = pb_child->pb_graph_node->pb_type; + + /* Primitive, check occupancy */ + if (pb_type->num_modes == 0) { + if (pb_child->name != nullptr) { + can_free = false; + } + } + + /* Non-primitive, recurse */ + else { + if (!cleanup_pb(pb_child)) { + can_free = false; + } + } + } + } + } + + /* Free if can */ + if (can_free) { + for (int i = 0; i < mode->num_pb_type_children; ++i) { + if (pb->child_pbs[i] != nullptr) { + delete[] pb->child_pbs[i]; + } + } + + delete[] pb->child_pbs; + pb->child_pbs = nullptr; + pb->mode = 0; + + if (pb->name) { + free(pb->name); + pb->name = nullptr; + } + } + } + + return can_free; +} + +/** + * Performs legality checks to see whether the selected molecule can be + * packed into the current cluster. The legality checks are related to + * floorplanning, pin feasibility, and routing (if detailed route + * checking is enabled). The routine returns BLK_PASSED if the molecule + * can be packed in the cluster. If the block passes, the routine commits + * it to the current cluster and updates the appropriate data structures. + * Otherwise, it returns the appropriate failed pack status based on which + * legality check the molecule failed. + */ +enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, + t_pack_molecule* molecule, + t_pb_graph_node** primitives_list, + t_pb* pb, + const int max_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const int detailed_routing_stage, + t_lb_router_data* router_data, + int verbosity, + bool enable_pin_feasibility_filter, + const int feasible_block_array_size, + t_ext_pin_util max_external_pin_util, + PartitionRegion& temp_cluster_pr) { + int molecule_size, failed_location; + int i; + enum e_block_pack_status block_pack_status; + t_pb* parent; + t_pb* cur_pb; + + auto& atom_ctx = g_vpr_ctx.atom(); + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + + parent = nullptr; + + block_pack_status = BLK_STATUS_UNDEFINED; + + molecule_size = get_array_size_of_molecule(molecule); + failed_location = 0; + + if (verbosity > 3) { + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + VTR_LOG("\t\tTry pack molecule: '%s' (%s)", + atom_ctx.nlist.block_name(root_atom).c_str(), + atom_ctx.nlist.block_model(root_atom)->name); + VTR_LOGV(molecule->pack_pattern, + " molecule_type %s molecule_size %zu", + molecule->pack_pattern->name, + molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + // if this cluster has a molecule placed in it that is part of a long chain + // (a chain that consists of more than one molecule), don't allow more long chain + // molecules to be placed in this cluster. To avoid possibly creating cluster level + // blocks that have incompatible placement constraints or form very long placement + // macros that limit placement flexibility. + if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { + VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); + return BLK_FAILED_FEASIBLE; + } + + bool cluster_pr_needs_update = false; + bool cluster_pr_update_check = false; + + //check if every atom in the molecule is legal in the cluster from a floorplanning perspective + for (int i_mol = 0; i_mol < molecule_size; i_mol++) { + //try to intersect with atom PartitionRegion if atom exists + if (molecule->atom_block_ids[i_mol]) { + block_pack_status = atom_cluster_floorplanning_check(molecule->atom_block_ids[i_mol], + clb_index, verbosity, + temp_cluster_pr, + cluster_pr_needs_update); + if (block_pack_status == BLK_FAILED_FLOORPLANNING) { + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); + return block_pack_status; + } + if (cluster_pr_needs_update == true) { + cluster_pr_update_check = true; + } + } + } + + //change status back to undefined before the while loop in case in was changed to BLK_PASSED in the above for loop + block_pack_status = BLK_STATUS_UNDEFINED; + + while (block_pack_status != BLK_PASSED) { + if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, + primitives_list)) { + block_pack_status = BLK_PASSED; + + for (i = 0; i < molecule_size && block_pack_status == BLK_PASSED; i++) { + VTR_ASSERT((primitives_list[i] == nullptr) == (!molecule->atom_block_ids[i])); + failed_location = i + 1; + // try place atom block if it exists + if (molecule->atom_block_ids[i]) { + block_pack_status = try_place_atom_block_rec(primitives_list[i], + molecule->atom_block_ids[i], pb, &parent, + max_models, max_cluster_size, clb_index, + cluster_placement_stats_ptr, molecule, router_data, + verbosity, feasible_block_array_size); + } + } + + if (enable_pin_feasibility_filter && block_pack_status == BLK_PASSED) { + /* Check if pin usage is feasible for the current packing assignment */ + reset_lookahead_pins_used(pb); + try_update_lookahead_pins_used(pb); + if (!check_lookahead_pins_used(pb, max_external_pin_util)) { + VTR_LOGV(verbosity > 4, "\t\t\tFAILED Pin Feasibility Filter\n"); + block_pack_status = BLK_FAILED_FEASIBLE; + } + } + if (block_pack_status == BLK_PASSED) { + /* + * during the clustering step of `do_clustering`, `detailed_routing_stage` is incremented at each iteration until it a cluster + * is correctly generated or `detailed_routing_stage` assumes an invalid value (E_DETAILED_ROUTE_INVALID). + * depending on its value we have different behaviors: + * - E_DETAILED_ROUTE_AT_END_ONLY: Skip routing if heuristic is to route at the end of packing complex block. + * - E_DETAILED_ROUTE_FOR_EACH_ATOM: Try to route if heuristic is to route for every atom. If the clusterer arrives at this stage, + * it means that more checks have to be performed as the previous stage failed to generate a new cluster. + * + * mode_status is a data structure containing the status of the mode selection. Its members are: + * - bool is_mode_conflict + * - bool try_expand_all_modes + * - bool expand_all_modes + * + * is_mode_conflict affects this stage. Its value determines whether the cluster failed to pack after a mode conflict issue. + * It holds a flag that is used to verify whether try_intra_lb_route ended in a mode conflict issue. + * + * Until is_mode_conflict is set to FALSE by try_intra_lb_route, the loop re-iterates. If all the available modes are exhausted + * an error will be thrown during mode conflicts checks (this to prevent infinite loops). + * + * If the value is TRUE the cluster has to be re-routed, and its internal pb_graph_nodes will have more restrict choices + * for what regards the mode that has to be selected. + * + * is_mode_conflict is initially set to TRUE, and, unless a mode conflict is found, it is set to false in `try_intra_lb_route`. + * + * try_expand_all_modes is set if the node expansion failed to find a valid routing path. The clusterer tries to find another route + * by using all the modes during node expansion. + * + * expand_all_modes is used to enable the expansion of all the nodes using all the possible modes. + */ + t_mode_selection_status mode_status; + bool is_routed = false; + bool do_detailed_routing_stage = detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM; + if (do_detailed_routing_stage) { + do { + reset_intra_lb_route(router_data); + is_routed = try_intra_lb_route(router_data, verbosity, &mode_status); + } while (do_detailed_routing_stage && mode_status.is_mode_issue()); + } + + if (do_detailed_routing_stage && is_routed == false) { + /* Cannot pack */ + VTR_LOGV(verbosity > 4, "\t\t\tFAILED Detailed Routing Legality\n"); + block_pack_status = BLK_FAILED_ROUTE; + } else { + /* Pack successful, commit + * TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside + */ + VTR_ASSERT(block_pack_status == BLK_PASSED); + if (molecule->is_chain()) { + /* Chained molecules often take up lots of area and are important, + * if a chain is packed in, want to rename logic block to match chain name */ + AtomBlockId chain_root_blk_id = molecule->atom_block_ids[molecule->pack_pattern->root_block->block_id]; + cur_pb = atom_ctx.lookup.atom_pb(chain_root_blk_id)->parent_pb; + while (cur_pb != nullptr) { + free(cur_pb->name); + cur_pb->name = vtr::strdup(atom_ctx.nlist.block_name(chain_root_blk_id).c_str()); + cur_pb = cur_pb->parent_pb; + } + // if this molecule is part of a chain, mark the cluster as having a long chain + // molecule. Also check if it's the first molecule in the chain to be packed. + // If so, update the chain id for this chain of molecules to make sure all + // molecules will be packed to the same chain id and can reach each other using + // the chain direct links between clusters + if (molecule->chain_info->is_long_chain) { + cluster_placement_stats_ptr->has_long_chain = true; + if (molecule->chain_info->chain_id == -1) { + update_molecule_chain_info(molecule, primitives_list[molecule->root]); + } + } + } + + //update cluster PartitionRegion if atom with floorplanning constraints was added + if (cluster_pr_update_check) { + floorplanning_ctx.cluster_constraints[clb_index] = temp_cluster_pr; + if (verbosity > 2) { + VTR_LOG("\nUpdated PartitionRegion of cluster %d\n", clb_index); + } + } + + for (i = 0; i < molecule_size; i++) { + if (molecule->atom_block_ids[i]) { + /* invalidate all molecules that share atom block with current molecule */ + + auto rng = atom_ctx.atom_molecules.equal_range(molecule->atom_block_ids[i]); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* cur_molecule = kv.second; + cur_molecule->valid = false; + } + + commit_primitive(cluster_placement_stats_ptr, primitives_list[i]); + } + } + } + } + + if (block_pack_status != BLK_PASSED) { + for (i = 0; i < failed_location; i++) { + if (molecule->atom_block_ids[i]) { + remove_atom_from_target(router_data, molecule->atom_block_ids[i]); + } + } + for (i = 0; i < failed_location; i++) { + if (molecule->atom_block_ids[i]) { + revert_place_atom_block(molecule->atom_block_ids[i], router_data); + } + } + + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); + + /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. + * Before trying to pack next molecule the unused pbs need to be freed and, the most important, + * their modes reset. This task is performed by the cleanup_pb() function below. */ + cleanup_pb(pb); + + } else { + VTR_LOGV(verbosity > 3, "\t\tPASSED pack molecule\n"); + } + } else { + VTR_LOGV(verbosity > 3, "\t\tFAILED No candidate primitives available\n"); + block_pack_status = BLK_FAILED_FEASIBLE; + break; /* no more candidate primitives available, this molecule will not pack, return fail */ + } + } + return block_pack_status; +} + +/* Record the failure of the molecule in this cluster in the current pb stats. + * If a molecule fails repeatedly, it's gain will be penalized if packing with + * attraction groups on. */ +void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { + //Only have to record the failure for the first atom in the molecule. + //The convention when checking if a molecule has failed to pack in the cluster + //is to check whether the first atoms has been recorded as having failed + + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + +/** + * Try place atom block into current primitive location + */ + +enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, + const AtomBlockId blk_id, + t_pb* cb, + t_pb** parent, + const int max_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const t_cluster_placement_stats* cluster_placement_stats_ptr, + const t_pack_molecule* molecule, + t_lb_router_data* router_data, + int verbosity, + const int feasible_block_array_size) { + int i, j; + bool is_primitive; + enum e_block_pack_status block_pack_status; + + t_pb* my_parent; + t_pb *pb, *parent_pb; + const t_pb_type* pb_type; + + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + my_parent = nullptr; + + block_pack_status = BLK_PASSED; + + /* Discover parent */ + if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { + block_pack_status = try_place_atom_block_rec(pb_graph_node->parent_pb_graph_node, blk_id, cb, + &my_parent, max_models, max_cluster_size, clb_index, + cluster_placement_stats_ptr, molecule, router_data, + verbosity, feasible_block_array_size); + parent_pb = my_parent; + } else { + parent_pb = cb; + } + + /* Create siblings if siblings are not allocated */ + if (parent_pb->child_pbs == nullptr) { + atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), parent_pb); + + VTR_ASSERT(parent_pb->name == nullptr); + parent_pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; + set_reset_pb_modes(router_data, parent_pb, true); + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + parent_pb->child_pbs = new t_pb*[mode->num_pb_type_children]; + + for (i = 0; i < mode->num_pb_type_children; i++) { + parent_pb->child_pbs[i] = new t_pb[mode->pb_type_children[i].num_pb]; + + for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { + parent_pb->child_pbs[i][j].parent_pb = parent_pb; + + atom_ctx.lookup.set_atom_pb(AtomBlockId::INVALID(), &parent_pb->child_pbs[i][j]); + + parent_pb->child_pbs[i][j].pb_graph_node = &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); + } + } + } else { + VTR_ASSERT(parent_pb->mode == pb_graph_node->pb_type->parent_mode->index); + } + + const t_mode* mode = &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode]; + for (i = 0; i < mode->num_pb_type_children; i++) { + if (pb_graph_node->pb_type == &mode->pb_type_children[i]) { + break; + } + } + VTR_ASSERT(i < mode->num_pb_type_children); + pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; + *parent = pb; /* this pb is parent of it's child that called this function */ + VTR_ASSERT(pb->pb_graph_node == pb_graph_node); + if (pb->pb_stats == nullptr) { + alloc_and_load_pb_stats(pb, feasible_block_array_size); + } + pb_type = pb_graph_node->pb_type; + + /* Any pb_type under an mode, which is disabled for packing, should not be considerd for mapping + * Early exit to flag failure + */ + if (true == pb_type->parent_mode->disable_packing) { + return BLK_FAILED_FEASIBLE; + } + + is_primitive = (pb_type->num_modes == 0); + + if (is_primitive) { + VTR_ASSERT(!atom_ctx.lookup.pb_atom(pb) + && atom_ctx.lookup.atom_pb(blk_id) == nullptr + && atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()); + /* try pack to location */ + VTR_ASSERT(pb->name == nullptr); + pb->name = vtr::strdup(atom_ctx.nlist.block_name(blk_id).c_str()); + + //Update the atom netlist mappings + atom_ctx.lookup.set_atom_clb(blk_id, clb_index); + atom_ctx.lookup.set_atom_pb(blk_id, pb); + + add_atom_as_target(router_data, blk_id); + if (!primitive_feasible(blk_id, pb)) { + /* failed location feasibility check, revert pack */ + block_pack_status = BLK_FAILED_FEASIBLE; + } + + // if this block passed and is part of a chained molecule + if (block_pack_status == BLK_PASSED && molecule->is_chain()) { + auto molecule_root_block = molecule->atom_block_ids[molecule->root]; + // if this is the root block of the chain molecule check its placmeent feasibility + if (blk_id == molecule_root_block) { + block_pack_status = check_chain_root_placement_feasibility(pb_graph_node, molecule, blk_id); + } + } + + VTR_LOGV(verbosity > 4 && block_pack_status == BLK_PASSED, + "\t\t\tPlaced atom '%s' (%s) at %s\n", + atom_ctx.nlist.block_name(blk_id).c_str(), + atom_ctx.nlist.block_model(blk_id)->name, + pb->hierarchical_type_name().c_str()); + } + + if (block_pack_status != BLK_PASSED) { + free(pb->name); + pb->name = nullptr; + } + return block_pack_status; +} + +/* + * Checks if the atom and cluster have compatible floorplanning constraints + * If the atom and cluster both have non-empty PartitionRegions, and the intersection + * of the PartitionRegions is empty, the atom cannot be packed in the cluster. + */ +enum e_block_pack_status atom_cluster_floorplanning_check(const AtomBlockId blk_id, + const ClusterBlockId clb_index, + const int verbosity, + PartitionRegion& temp_cluster_pr, + bool& cluster_pr_needs_update) { + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + + /*check if the atom can go in the cluster by checking if the atom and cluster have intersecting PartitionRegions*/ + + //get partition that atom belongs to + PartitionId partid; + partid = floorplanning_ctx.constraints.get_atom_partition(blk_id); + + PartitionRegion atom_pr; + PartitionRegion cluster_pr; + + //if the atom does not belong to a partition, it can be put in the cluster + //regardless of what the cluster's PartitionRegion is because it has no constraints + if (partid == PartitionId::INVALID()) { + if (verbosity > 3) { + VTR_LOG("\t\t\t Intersect: Atom block %d has no floorplanning constraints, passed for cluster %d \n", blk_id, clb_index); + } + cluster_pr_needs_update = false; + return BLK_PASSED; + } else { + //get pr of that partition + atom_pr = floorplanning_ctx.constraints.get_partition_pr(partid); + + //intersect it with the pr of the current cluster + cluster_pr = floorplanning_ctx.cluster_constraints[clb_index]; + + if (cluster_pr.empty() == true) { + temp_cluster_pr = atom_pr; + cluster_pr_needs_update = true; + if (verbosity > 3) { + VTR_LOG("\t\t\t Intersect: Atom block %d has floorplanning constraints, passed cluster %d which has empty PR\n", blk_id, clb_index); + } + return BLK_PASSED; + } else { + //update cluster_pr with the intersection of the cluster's PartitionRegion + //and the atom's PartitionRegion + update_cluster_part_reg(cluster_pr, atom_pr); + } + + if (cluster_pr.empty() == true) { + if (verbosity > 3) { + VTR_LOG("\t\t\t Intersect: Atom block %d failed floorplanning check for cluster %d \n", blk_id, clb_index); + } + cluster_pr_needs_update = false; + return BLK_FAILED_FLOORPLANNING; + } else { + //update the cluster's PartitionRegion with the intersecting PartitionRegion + temp_cluster_pr = cluster_pr; + cluster_pr_needs_update = true; + if (verbosity > 3) { + VTR_LOG("\t\t\t Intersect: Atom block %d passed cluster %d, cluster PR was updated with intersection result \n", blk_id, clb_index); + } + return BLK_PASSED; + } + } +} + +/* Revert trial atom block iblock and free up memory space accordingly + */ +void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data) { + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + //We cast away const here since we may free the pb, and it is + //being removed from the active mapping. + // + //In general most code works fine accessing cosnt t_pb*, + //which is why we store them as such in atom_ctx.lookup + t_pb* pb = const_cast(atom_ctx.lookup.atom_pb(blk_id)); + + if (pb != nullptr) { + /* When freeing molecules, the current block might already have been freed by a prior revert + * When this happens, no need to do anything beyond basic book keeping at the atom block + */ + + t_pb* next = pb->parent_pb; + revalid_molecules(pb); + free_pb(pb); + pb = next; + + while (pb != nullptr) { + /* If this is pb is created only for the purposes of holding new molecule, remove it + * Must check if cluster is already freed (which can be the case) + */ + next = pb->parent_pb; + + if (pb->child_pbs != nullptr && pb->pb_stats != nullptr + && pb->pb_stats->num_child_blocks_in_pb == 0) { + set_reset_pb_modes(router_data, pb, false); + if (next != nullptr) { + /* If the code gets here, then that means that placing the initial seed molecule + * failed, don't free the actual complex block itself as the seed needs to find + * another placement */ + revalid_molecules(pb); + free_pb(pb); + } + } + pb = next; + } + } + + //Update the atom netlist mapping + atom_ctx.lookup.set_atom_clb(blk_id, ClusterBlockId::INVALID()); + atom_ctx.lookup.set_atom_pb(blk_id, nullptr); +} + +void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { + /*This function is called when the connectiongain values on the net net_id* + *require updating. */ + + int num_internal_connections, num_open_connections, num_stuck_connections; + + num_internal_connections = num_open_connections = num_stuck_connections = 0; + + auto& atom_ctx = g_vpr_ctx.atom(); + ClusterBlockId clb_index = atom_ctx.lookup.atom_clb(clustered_blk_id); + + /* may wish to speed things up by ignoring clock nets since they are high fanout */ + + for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + auto blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_ctx.lookup.atom_clb(blk_id) == clb_index + && is_atom_blk_in_pb(blk_id, atom_ctx.lookup.atom_pb(clustered_blk_id))) { + num_internal_connections++; + } else if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + num_open_connections++; + } else { + num_stuck_connections++; + } + } + + if (net_relation_to_clustered_block == OUTPUT) { + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + auto blk_id = atom_ctx.nlist.pin_block(pin_id); + VTR_ASSERT(blk_id); + + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + /* TODO: Gain function accurate only if net has one connection to block, + * TODO: Should we handle case where net has multi-connection to block? + * Gain computation is only off by a bit in this case */ + if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { + cur_pb->pb_stats->connectiongain[blk_id] = 0; + } + + if (num_internal_connections > 1) { + cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 1 + 0.1); + } + cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); + } + } + } + + if (net_relation_to_clustered_block == INPUT) { + /*Calculate the connectiongain for the atom block which is driving * + *the atom net that is an input to an atom block in the cluster */ + + auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + auto blk_id = atom_ctx.nlist.pin_block(driver_pin_id); + + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { + cur_pb->pb_stats->connectiongain[blk_id] = 0; + } + if (num_internal_connections > 1) { + cur_pb->pb_stats->connectiongain[blk_id] -= 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1 + 1); + } + cur_pb->pb_stats->connectiongain[blk_id] += 1 / (float)(num_open_connections + 1.5 * num_stuck_connections + 0.1); + } + } +} + +void try_fill_cluster(const t_packer_opts& packer_opts, + t_cluster_placement_stats* cur_cluster_placement_stats_ptr, + t_pack_molecule*& prev_molecule, + t_pack_molecule*& next_molecule, + int& num_same_molecules, + t_pb_graph_node** primitives_list, + t_cluster_progress_stats& cluster_stats, + int num_clb, + const int num_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const int detailed_routing_stage, + AttractionInfo& attraction_groups, + vtr::vector>& clb_inter_blk_nets, + bool allow_unrelated_clustering, + const int& high_fanout_threshold, + const std::unordered_set& is_clock, + const std::shared_ptr& timing_info, + t_lb_router_data* router_data, + t_ext_pin_util target_ext_pin_util, + PartitionRegion& temp_cluster_pr, + e_block_pack_status& block_pack_status, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size, + std::unordered_map& net_output_feeds_driving_block_input, + std::map>& primitive_candidate_block_types) { + auto& atom_ctx = g_vpr_ctx.atom(); + auto& device_ctx = g_vpr_ctx.mutable_device(); + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + + block_pack_status = try_pack_molecule(cur_cluster_placement_stats_ptr, + next_molecule, + primitives_list, + cluster_ctx.clb_nlist.block_pb(clb_index), + num_models, + max_cluster_size, + clb_index, + detailed_routing_stage, + router_data, + packer_opts.pack_verbosity, + packer_opts.enable_pin_feasibility_filter, + packer_opts.feasible_block_array_size, + target_ext_pin_util, + temp_cluster_pr); + + auto blk_id = next_molecule->atom_block_ids[next_molecule->root]; + VTR_ASSERT(blk_id); + + std::string blk_name = atom_ctx.nlist.block_name(blk_id); + const t_model* blk_model = atom_ctx.nlist.block_model(blk_id); + + if (block_pack_status != BLK_PASSED) { + if (packer_opts.pack_verbosity > 2) { + if (block_pack_status == BLK_FAILED_ROUTE) { + VTR_LOG("\tNO_ROUTE: '%s' (%s)", blk_name.c_str(), blk_model->name); + VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", + next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); + VTR_LOG("\n"); + fflush(stdout); + } else if (block_pack_status == BLK_FAILED_FLOORPLANNING) { + VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name); + VTR_LOG("\n"); + } else { + VTR_LOG("\tFAILED_FEASIBILITY_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name, block_pack_status); + VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", + next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); + VTR_LOG("\n"); + fflush(stdout); + } + } + + next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + attraction_groups, + allow_unrelated_clustering, + packer_opts.prioritize_transitive_connectivity, + packer_opts.transitive_fanout_threshold, + packer_opts.feasible_block_array_size, + &cluster_stats.num_unrelated_clustering_attempts, + cur_cluster_placement_stats_ptr, + clb_inter_blk_nets, + clb_index, packer_opts.pack_verbosity, + unclustered_list_head, + unclustered_list_head_size, + primitive_candidate_block_types); + if (prev_molecule == next_molecule) { + num_same_molecules++; + } + return; + } + + /* Continue packing by filling smallest cluster */ + if (packer_opts.pack_verbosity > 2) { + VTR_LOG("\tPASSED: '%s' (%s)", blk_name.c_str(), blk_model->name); + VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu", + next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + fflush(stdout); + + //Since molecule passed, update num_molecules_processed + cluster_stats.num_molecules_processed++; + cluster_stats.mols_since_last_print++; + print_pack_status(num_clb, cluster_stats.num_molecules, + cluster_stats.num_molecules_processed, + cluster_stats.mols_since_last_print, + device_ctx.grid.width(), + device_ctx.grid.height(), + attraction_groups); + + update_cluster_stats(next_molecule, clb_index, + is_clock, //Set of all clocks + is_clock, //Set of all global signals (currently clocks) + packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven, + packer_opts.connection_driven, + high_fanout_threshold, + *timing_info, + attraction_groups, + net_output_feeds_driving_block_input); + cluster_stats.num_unrelated_clustering_attempts = 0; + + if (packer_opts.timing_driven) { + cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ + } + next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + attraction_groups, + allow_unrelated_clustering, + packer_opts.prioritize_transitive_connectivity, + packer_opts.transitive_fanout_threshold, + packer_opts.feasible_block_array_size, + &cluster_stats.num_unrelated_clustering_attempts, + cur_cluster_placement_stats_ptr, + clb_inter_blk_nets, + clb_index, + packer_opts.pack_verbosity, + unclustered_list_head, + unclustered_list_head_size, + primitive_candidate_block_types); + + if (prev_molecule == next_molecule) { + num_same_molecules++; + } +} + +t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, + const int& num_clb, + const std::vector& seed_atoms, + const int& num_blocks_hill_added, + vtr::vector*>& intra_lb_routing, + int& seedindex, + t_cluster_progress_stats& cluster_stats, + t_lb_router_data* router_data) { + t_pack_molecule* next_seed = nullptr; + + intra_lb_routing.push_back(router_data->saved_lb_nets); + VTR_ASSERT((int)intra_lb_routing.size() == num_clb); + router_data->saved_lb_nets = nullptr; + + //Pick a new seed + next_seed = get_highest_gain_seed_molecule(&seedindex, seed_atoms); + + if (packer_opts.timing_driven) { + if (num_blocks_hill_added > 0) { + cluster_stats.blocks_since_last_analysis += num_blocks_hill_added; + } + } + return next_seed; +} + +void store_cluster_info_and_free(const t_packer_opts& packer_opts, + const ClusterBlockId& clb_index, + const t_logical_block_type_ptr logic_block_type, + const t_pb_type* le_pb_type, + std::vector& le_count, + vtr::vector>& clb_inter_blk_nets) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& atom_ctx = g_vpr_ctx.atom(); + + /* store info that will be used later in packing from pb_stats and free the rest */ + t_pb_stats* pb_stats = cluster_ctx.clb_nlist.block_pb(clb_index)->pb_stats; + for (const AtomNetId mnet_id : pb_stats->marked_nets) { + int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id]; + /* Check if external terminals of net is within the fanout limit and that there exists external terminals */ + if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) { + clb_inter_blk_nets[clb_index].push_back(mnet_id); + } + } + auto cur_pb = cluster_ctx.clb_nlist.block_pb(clb_index); + + // update the data structure holding the LE counts + update_le_count(cur_pb, logic_block_type, le_pb_type, le_count); + + //print clustering progress incrementally + //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height()); + + free_pb_stats_recursive(cur_pb); +} + +/* Free up data structures and requeue used molecules */ +void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, + const int& savedseedindex, + std::map& num_used_type_instances, + int& num_clb, + int& seedindex) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + + num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; + revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index)); + cluster_ctx.clb_nlist.remove_block(clb_index); + cluster_ctx.clb_nlist.compress(); + num_clb--; + seedindex = savedseedindex; +} + +/*****************************************/ +void update_timing_gain_values(const AtomNetId net_id, + t_pb* cur_pb, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block, + const SetupTimingInfo& timing_info, + const std::unordered_set& is_global, + std::unordered_map& net_output_feeds_driving_block_input) { + /*This function is called when the timing_gain values on the atom net* + *net_id requires updating. */ + float timinggain; + + auto& atom_ctx = g_vpr_ctx.atom(); + + /* Check if this atom net lists its driving atom block twice. If so, avoid * + * double counting this atom block by skipping the first (driving) pin. */ + auto pins = atom_ctx.nlist.net_pins(net_id); + if (net_output_feeds_driving_block_input[net_id] != 0) + pins = atom_ctx.nlist.net_sinks(net_id); + + if (net_relation_to_clustered_block == OUTPUT + && !is_global.count(net_id)) { + for (auto pin_id : pins) { + auto blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + timinggain = timing_info.setup_pin_criticality(pin_id); + + if (cur_pb->pb_stats->timinggain.count(blk_id) == 0) { + cur_pb->pb_stats->timinggain[blk_id] = 0; + } + if (timinggain > cur_pb->pb_stats->timinggain[blk_id]) + cur_pb->pb_stats->timinggain[blk_id] = timinggain; + } + } + } + + if (net_relation_to_clustered_block == INPUT + && !is_global.count(net_id)) { + /*Calculate the timing gain for the atom block which is driving * + *the atom net that is an input to a atom block in the cluster */ + auto driver_pin = atom_ctx.nlist.net_driver(net_id); + auto new_blk_id = atom_ctx.nlist.pin_block(driver_pin); + + if (atom_ctx.lookup.atom_clb(new_blk_id) == ClusterBlockId::INVALID()) { + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + timinggain = timing_info.setup_pin_criticality(pin_id); + + if (cur_pb->pb_stats->timinggain.count(new_blk_id) == 0) { + cur_pb->pb_stats->timinggain[new_blk_id] = 0; + } + if (timinggain > cur_pb->pb_stats->timinggain[new_blk_id]) + cur_pb->pb_stats->timinggain[new_blk_id] = timinggain; + } + } + } +} + +/*****************************************/ +void mark_and_update_partial_gain(const AtomNetId net_id, + enum e_gain_update gain_flag, + const AtomBlockId clustered_blk_id, + bool timing_driven, + bool connection_driven, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block, + const SetupTimingInfo& timing_info, + const std::unordered_set& is_global, + const int high_fanout_net_threshold, + std::unordered_map& net_output_feeds_driving_block_input) { + /* Updates the marked data structures, and if gain_flag is GAIN, * + * the gain when an atom block is added to a cluster. The * + * sharinggain is the number of inputs that a atom block shares with * + * blocks that are already in the cluster. Hillgain is the * + * reduction in number of pins-required by adding a atom block to the * + * cluster. The timinggain is the criticality of the most critical* + * atom net between this atom block and an atom block in the cluster. */ + + auto& atom_ctx = g_vpr_ctx.atom(); + t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb; + cur_pb = get_top_level_pb(cur_pb); + + if (int(atom_ctx.nlist.net_sinks(net_id).size()) > high_fanout_net_threshold) { + /* Optimization: It can be too runtime costly for marking all sinks for + * a high fanout-net that probably has no hope of ever getting packed, + * thus ignore those high fanout nets */ + if (!is_global.count(net_id)) { + /* If no low/medium fanout nets, we may need to consider + * high fan-out nets for packing, so select one and store it */ + AtomNetId stored_net = cur_pb->pb_stats->tie_break_high_fanout_net; + if (!stored_net || atom_ctx.nlist.net_sinks(net_id).size() < atom_ctx.nlist.net_sinks(stored_net).size()) { + cur_pb->pb_stats->tie_break_high_fanout_net = net_id; + } + } + return; + } + + /* Mark atom net as being visited, if necessary. */ + + if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { + cur_pb->pb_stats->marked_nets.push_back(net_id); + } + + /* Update gains of affected blocks. */ + + if (gain_flag == GAIN) { + /* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input) + * If so, avoid double counting by skipping the first (driving) pin. */ + + auto pins = atom_ctx.nlist.net_pins(net_id); + if (net_output_feeds_driving_block_input[net_id] != 0) + //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2 + //(i.e. the net loops back to the block only once) + pins = atom_ctx.nlist.net_sinks(net_id); + + if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { + for (auto pin_id : pins) { + auto blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { + cur_pb->pb_stats->marked_blocks.push_back(blk_id); + cur_pb->pb_stats->sharinggain[blk_id] = 1; + cur_pb->pb_stats->hillgain[blk_id] = 1 - num_ext_inputs_atom_block(blk_id); + } else { + cur_pb->pb_stats->sharinggain[blk_id]++; + cur_pb->pb_stats->hillgain[blk_id]++; + } + } + } + } + + if (connection_driven) { + update_connection_gain_values(net_id, clustered_blk_id, cur_pb, + net_relation_to_clustered_block); + } + + if (timing_driven) { + update_timing_gain_values(net_id, cur_pb, + net_relation_to_clustered_block, + timing_info, + is_global, + net_output_feeds_driving_block_input); + } + } + if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) { + cur_pb->pb_stats->num_pins_of_net_in_pb[net_id] = 0; + } + cur_pb->pb_stats->num_pins_of_net_in_pb[net_id]++; +} + +/*****************************************/ +void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups) { + /*Updates the total gain array to reflect the desired tradeoff between* + *input sharing (sharinggain) and path_length minimization (timinggain) + *input each time a new molecule is added to the cluster.*/ + auto& atom_ctx = g_vpr_ctx.atom(); + t_pb* cur_pb = pb; + + cur_pb = get_top_level_pb(cur_pb); + AttractGroupId cluster_att_grp_id; + + cluster_att_grp_id = cur_pb->pb_stats->attraction_grp_id; + + for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { + //Initialize connectiongain and sharinggain if + //they have not previously been updated for the block + if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) { + cur_pb->pb_stats->connectiongain[blk_id] = 0; + } + if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) { + cur_pb->pb_stats->sharinggain[blk_id] = 0; + } + + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { + //increase gain of atom based on attraction group gain + float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); + cur_pb->pb_stats->gain[blk_id] += att_grp_gain; + } + + /* Todo: This was used to explore different normalization options, can + * be made more efficient once we decide on which one to use*/ + int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size(); + int num_used_output_pins = atom_ctx.nlist.block_output_pins(blk_id).size(); + /* end todo */ + + /* Calculate area-only cost function */ + int num_used_pins = num_used_input_pins + num_used_output_pins; + VTR_ASSERT(num_used_pins > 0); + if (connection_driven) { + /*try to absorb as many connections as possible*/ + cur_pb->pb_stats->gain[blk_id] = ((1 - beta) + * (float)cur_pb->pb_stats->sharinggain[blk_id] + + beta * (float)cur_pb->pb_stats->connectiongain[blk_id]) + / (num_used_pins); + } else { + cur_pb->pb_stats->gain[blk_id] = ((float)cur_pb->pb_stats->sharinggain[blk_id]) + / (num_used_pins); + } + + /* Add in timing driven cost into cost function */ + if (timing_driven) { + cur_pb->pb_stats->gain[blk_id] = alpha + * cur_pb->pb_stats->timinggain[blk_id] + + (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id]; + } + } +} + +/*****************************************/ +void update_cluster_stats(const t_pack_molecule* molecule, + const ClusterBlockId clb_index, + const std::unordered_set& is_clock, + const std::unordered_set& is_global, + const bool global_clocks, + const float alpha, + const float beta, + const bool timing_driven, + const bool connection_driven, + const int high_fanout_net_threshold, + const SetupTimingInfo& timing_info, + AttractionInfo& attraction_groups, + std::unordered_map& net_output_feeds_driving_block_input) { + /* Routine that is called each time a new molecule is added to the cluster. + * Makes calls to update cluster stats such as the gain map for atoms, used pins, and clock structures, + * in order to reflect the new content of the cluster. + * Also keeps track of which attraction group the cluster belongs to. */ + + int molecule_size; + int iblock; + t_pb *cur_pb, *cb; + + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + molecule_size = get_array_size_of_molecule(molecule); + cb = nullptr; + + for (iblock = 0; iblock < molecule_size; iblock++) { + auto blk_id = molecule->atom_block_ids[iblock]; + if (!blk_id) { + continue; + } + + //Update atom netlist mapping + atom_ctx.lookup.set_atom_clb(blk_id, clb_index); + + const t_pb* atom_pb = atom_ctx.lookup.atom_pb(blk_id); + VTR_ASSERT(atom_pb); + + cur_pb = atom_pb->parent_pb; + + //Update attraction group + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + + while (cur_pb) { + /* reset list of feasible blocks */ + if (cur_pb->is_root()) { + cb = cur_pb; + } + cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; + cur_pb->pb_stats->num_child_blocks_in_pb++; + + if (atom_grp_id != AttractGroupId::INVALID()) { + /* TODO: Allow clusters to have more than one attraction group. */ + cur_pb->pb_stats->attraction_grp_id = atom_grp_id; + } + + cur_pb = cur_pb->parent_pb; + } + + /* Outputs first */ + for (auto pin_id : atom_ctx.nlist.block_output_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + if (!is_clock.count(net_id) || !global_clocks) { + mark_and_update_partial_gain(net_id, GAIN, blk_id, + timing_driven, + connection_driven, OUTPUT, + timing_info, + is_global, + high_fanout_net_threshold, + net_output_feeds_driving_block_input); + } else { + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + timing_driven, + connection_driven, OUTPUT, + timing_info, + is_global, + high_fanout_net_threshold, + net_output_feeds_driving_block_input); + } + } + + /* Next Inputs */ + for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + mark_and_update_partial_gain(net_id, GAIN, blk_id, + timing_driven, connection_driven, + INPUT, + timing_info, + is_global, + high_fanout_net_threshold, + net_output_feeds_driving_block_input); + } + + /* Finally Clocks */ + for (auto pin_id : atom_ctx.nlist.block_clock_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + if (global_clocks) { + mark_and_update_partial_gain(net_id, NO_GAIN, blk_id, + timing_driven, connection_driven, INPUT, + timing_info, + is_global, + high_fanout_net_threshold, + net_output_feeds_driving_block_input); + } else { + mark_and_update_partial_gain(net_id, GAIN, blk_id, + timing_driven, connection_driven, INPUT, + timing_info, + is_global, + high_fanout_net_threshold, + net_output_feeds_driving_block_input); + } + } + + update_total_gain(alpha, beta, timing_driven, connection_driven, + atom_pb->parent_pb, attraction_groups); + + commit_lookahead_pins_used(cb); + } + + // if this molecule came from the transitive fanout candidates remove it + if (cb) { + cb->pb_stats->transitive_fanout_candidates.erase(molecule->atom_block_ids[molecule->root]); + cb->pb_stats->explore_transitive_fanout = true; + } +} + +void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, + t_pb_graph_node** primitives_list, + ClusterBlockId clb_index, + t_pack_molecule* molecule, + std::map& num_used_type_instances, + const float target_device_utilization, + const int num_models, + const int max_cluster_size, + const t_arch* arch, + std::string device_layout_name, + std::vector* lb_type_rr_graphs, + t_lb_router_data** router_data, + const int detailed_routing_stage, + ClusteredNetlist* clb_nlist, + const std::map>& primitive_candidate_block_types, + int verbosity, + bool enable_pin_feasibility_filter, + bool balance_block_type_utilization, + const int feasible_block_array_size, + PartitionRegion& temp_cluster_pr) { + /* Given a starting seed block, start_new_cluster determines the next cluster type to use + * It expands the FPGA if it cannot find a legal cluster for the atom block + */ + + auto& atom_ctx = g_vpr_ctx.atom(); + auto& device_ctx = g_vpr_ctx.mutable_device(); + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + + /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ + PartitionRegion empty_pr; + floorplanning_ctx.cluster_constraints.push_back(empty_pr); + + /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); + const t_model* root_model = atom_ctx.nlist.block_model(root_atom); + + auto itr = primitive_candidate_block_types.find(root_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector candidate_types = itr->second; + + if (balance_block_type_utilization) { + //We sort the candidate types in ascending order by their current utilization. + //This means that the packer will prefer to use types with lower utilization. + //This is a naive approach to try balancing utilization when multiple types can + //support the same primitive(s). + std::stable_sort(candidate_types.begin(), candidate_types.end(), + [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) { + int lhs_num_instances = 0; + int rhs_num_instances = 0; + // Count number of instances for each type + for (auto type : lhs->equivalent_tiles) + lhs_num_instances += device_ctx.grid.num_instances(type); + for (auto type : rhs->equivalent_tiles) + rhs_num_instances += device_ctx.grid.num_instances(type); + + float lhs_util = vtr::safe_ratio(num_used_type_instances[lhs], lhs_num_instances); + float rhs_util = vtr::safe_ratio(num_used_type_instances[rhs], rhs_num_instances); + //Lower util first + return lhs_util < rhs_util; + }); + } + + if (verbosity > 2) { + VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name); + VTR_LOGV(molecule->pack_pattern, " molecule_type %s molecule_size %zu", + molecule->pack_pattern->name, molecule->atom_block_ids.size()); + VTR_LOG("\n"); + } + + //Try packing into each candidate type + bool success = false; + for (size_t i = 0; i < candidate_types.size(); i++) { + auto type = candidate_types[i]; + + t_pb* pb = new t_pb; + pb->pb_graph_node = type->pb_graph_head; + alloc_and_load_pb_stats(pb, feasible_block_array_size); + pb->parent_pb = nullptr; + + *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); + + //Try packing into each mode + e_block_pack_status pack_result = BLK_STATUS_UNDEFINED; + for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) { + pb->mode = j; + + reset_cluster_placement_stats(&cluster_placement_stats[type->index]); + set_mode_cluster_placement_stats(pb->pb_graph_node, j); + + //Note that since we are starting a new cluster, we use FULL_EXTERNAL_PIN_UTIL, + //which allows all cluster pins to be used. This ensures that if we have a large + //molecule which would otherwise exceed the external pin utilization targets it + //can use the full set of cluster pins when selected as the seed block -- ensuring + //it is still implementable. + pack_result = try_pack_molecule(&cluster_placement_stats[type->index], + molecule, primitives_list, pb, + num_models, max_cluster_size, clb_index, + detailed_routing_stage, *router_data, + verbosity, + enable_pin_feasibility_filter, + feasible_block_array_size, + FULL_EXTERNAL_PIN_UTIL, + temp_cluster_pr); + + success = (pack_result == BLK_PASSED); + } + + if (success) { + VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); + //Once clustering succeeds, add it to the clb netlist + if (pb->name != nullptr) { + free(pb->name); + } + pb->name = vtr::strdup(root_atom_name.c_str()); + clb_index = clb_nlist->create_block(root_atom_name.c_str(), pb, type); + break; + } else { + VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name); + //Free failed clustering and try again + free_router_data(*router_data); + free_pb(pb); + delete pb; + *router_data = nullptr; + } + } + + if (!success) { + //Explored all candidates + if (molecule->type == MOLECULE_FORCED_PACK) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Can not find any logic block that can implement molecule.\n" + "\tPattern %s %s\n", + molecule->pack_pattern->name, + root_atom_name.c_str()); + } else { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Can not find any logic block that can implement molecule.\n" + "\tAtom %s (%s)\n", + root_atom_name.c_str(), root_model->name); + } + } + + VTR_ASSERT(success); + + //Successfully create cluster + auto block_type = clb_nlist->block_type(clb_index); + num_used_type_instances[block_type]++; + + /* Expand FPGA size if needed */ + // Check used type instances against the possible equivalent physical locations + unsigned int num_instances = 0; + for (auto equivalent_tile : block_type->equivalent_tiles) { + num_instances += device_ctx.grid.num_instances(equivalent_tile); + } + + if (num_used_type_instances[block_type] > num_instances) { + device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization); + } +} + +/* + * Get candidate molecule to pack into currently open cluster + * Molecule selection priority: + * 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster + * 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster + * 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster + * 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) + */ +t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, + AttractionInfo& attraction_groups, + const enum e_gain_type gain_mode, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + const ClusterBlockId cluster_index, + bool prioritize_transitive_connectivity, + int transitive_fanout_threshold, + const int feasible_block_array_size, + std::map>& primitive_candidate_block_types) { + /* + * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster. + * If there are no feasible blocks it returns a nullptr. + */ + + if (gain_mode == HILL_CLIMBING) { + VPR_FATAL_ERROR(VPR_ERROR_PACK, + "Hill climbing not supported yet, error out.\n"); + } + + // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster + if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { + add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + } + + if (prioritize_transitive_connectivity) { + // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster + if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, + cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + } + + // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster + if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + } + } else { //Reverse order + // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster + if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, feasible_block_array_size, attraction_groups); + } + + // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster + if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { + add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, clb_inter_blk_nets, + cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); + } + } + + // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) + add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, attraction_groups, + feasible_block_array_size, cluster_index, primitive_candidate_block_types); + + /* Grab highest gain molecule */ + t_pack_molecule* molecule = nullptr; + if (cur_pb->pb_stats->num_feasible_blocks > 0) { + cur_pb->pb_stats->num_feasible_blocks--; + int index = cur_pb->pb_stats->num_feasible_blocks; + molecule = cur_pb->pb_stats->feasible_blocks[index]; + VTR_ASSERT(molecule->valid == true); + return molecule; + } + + return molecule; +} + +/* Add molecules with strong connectedness to the current cluster to the list of feasible blocks. */ +void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { + VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); + + cur_pb->pb_stats->num_feasible_blocks = 0; + cur_pb->pb_stats->explore_transitive_fanout = true; /* If no legal molecules found, enable exploration of molecules two hops away */ + + auto& atom_ctx = g_vpr_ctx.atom(); + + for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) { + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); + } + } + } + } + } +} + +/* Add molecules based on weak connectedness (connected by high fanout nets) with current cluster */ +void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { + /* Because the packer ignores high fanout nets when marking what blocks + * to consider, use one of the ignored high fanout net to fill up lightly + * related blocks */ + reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); + + AtomNetId net_id = cur_pb->pb_stats->tie_break_high_fanout_net; + + auto& atom_ctx = g_vpr_ctx.atom(); + + int count = 0; + for (auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + if (count >= AAPACK_MAX_HIGH_FANOUT_EXPLORE) { + break; + } + + AtomBlockId blk_id = atom_ctx.nlist.pin_block(pin_id); + + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); + count++; + } + } + } + } + } + cur_pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); /* Mark off that this high fanout net has been considered */ +} + +/* + * If the current cluster being packed has an attraction group associated with it + * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules + * from the associated attraction group to the list of feasible blocks for the cluster. + * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency + * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates + * will vary each time you call this function. + */ +void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + AttractionInfo& attraction_groups, + const int feasible_block_array_size, + ClusterBlockId clb_index, + std::map>& primitive_candidate_block_types) { + auto& atom_ctx = g_vpr_ctx.atom(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); + + /* + * For each cluster, we want to explore the attraction group molecules as potential + * candidates for the cluster a limited number of times. This limit is imposed because + * if the cluster belongs to a very large attraction group, we could potentially search + * through its attraction group molecules for a very long time. + * Defining a number of times to search through the attraction groups (i.e. number of + * attraction group pulls) determines how many times we search through the cluster's attraction + * group molecules for candidate molecules. + */ + int num_pulls = attraction_groups.get_att_group_pulls(); + if (cur_pb->pb_stats->pulled_from_atom_groups < num_pulls) { + cur_pb->pb_stats->pulled_from_atom_groups++; + } else { + return; + } + + AttractGroupId grp_id = cur_pb->pb_stats->attraction_grp_id; + if (grp_id == AttractGroupId::INVALID()) { + return; + } + + AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); + std::vector available_atoms; + for (AtomBlockId atom_id : group.group_atoms) { + const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + auto itr = primitive_candidate_block_types.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + available_atoms.push_back(atom_id); + } + } + + //int num_available_atoms = group.group_atoms.size(); + int num_available_atoms = available_atoms.size(); + if (num_available_atoms == 0) { + return; + } + + if (num_available_atoms < 500) { + //for (AtomBlockId atom_id : group.group_atoms) { + for (AtomBlockId atom_id : available_atoms) { + const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + auto itr = primitive_candidate_block_types.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + auto rng = atom_ctx.atom_molecules.equal_range(atom_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); + } + } + } + } + } + return; + } + + int min = 0; + int max = num_available_atoms - 1; + + for (int j = 0; j < 500; j++) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distr(min, max); + int selected_atom = distr(gen); + + //AtomBlockId blk_id = group.group_atoms[selected_atom]; + AtomBlockId blk_id = available_atoms[selected_atom]; + const auto& atom_model = atom_ctx.nlist.block_model(blk_id); + auto itr = primitive_candidate_block_types.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); + } + } + } + } + } +} + +/* Add molecules based on transitive connections (eg. 2 hops away) with current cluster*/ +void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + const ClusterBlockId cluster_index, + int transitive_fanout_threshold, + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { + //TODO: For now, only done by fan-out; should also consider fan-in + cur_pb->pb_stats->explore_transitive_fanout = false; + + /* First time finding transitive fanout candidates therefore alloc and load them */ + load_transitive_fanout_candidates(cluster_index, + cur_pb->pb_stats, + clb_inter_blk_nets, + transitive_fanout_threshold); + /* Only consider candidates that pass a very simple legality check */ + for (const auto& transitive_candidate : cur_pb->pb_stats->transitive_fanout_candidates) { + t_pack_molecule* molecule = transitive_candidate.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); + } + } + } +} + +/*Check whether a free primitive exists for each atom block in the molecule*/ +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr) { + auto& atom_ctx = g_vpr_ctx.atom(); + bool success = true; + + for (int i_atom = 0; i_atom < get_array_size_of_molecule(molecule); i_atom++) { + if (molecule->atom_block_ids[i_atom]) { + VTR_ASSERT(atom_ctx.lookup.atom_clb(molecule->atom_block_ids[i_atom]) == ClusterBlockId::INVALID()); + auto blk_id2 = molecule->atom_block_ids[i_atom]; + if (!exists_free_primitive_for_atom_block(cluster_placement_stats_ptr, blk_id2)) { + /* TODO (Jason Luu): debating whether to check if placement exists for molecule + * (more robust) or individual atom blocks (faster)*/ + success = false; + break; + } + } + } + + return success; +} + +/*****************************************/ +t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, + AttractionInfo& attraction_groups, + const bool allow_unrelated_clustering, + const bool prioritize_transitive_connectivity, + const int transitive_fanout_threshold, + const int feasible_block_array_size, + int* num_unrelated_clustering_attempts, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + ClusterBlockId cluster_index, + int verbosity, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size, + std::map>& primitive_candidate_block_types) { + /* Finds the block with the greatest gain that satisfies the + * input, clock and capacity constraints of a cluster that are + * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). + */ + + VTR_ASSERT(cur_pb->is_root()); + + /* If cannot pack into primitive, try packing into cluster */ + + auto best_molecule = get_highest_gain_molecule(cur_pb, attraction_groups, + NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, + cluster_index, prioritize_transitive_connectivity, + transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); + + /* If no blocks have any gain to the current cluster, the code above * + * will not find anything. However, another atom block with no inputs in * + * common with the cluster may still be inserted into the cluster. */ + + if (allow_unrelated_clustering) { + if (best_molecule == nullptr) { + if (*num_unrelated_clustering_attempts == 0) { + best_molecule = get_free_molecule_with_most_ext_inputs_for_cluster(cur_pb, + cluster_placement_stats_ptr, + unclustered_list_head, + unclustered_list_head_size); + (*num_unrelated_clustering_attempts)++; + VTR_LOGV(best_molecule && verbosity > 2, "\tFound unrelated molecule to cluster\n"); + } + } else { + *num_unrelated_clustering_attempts = 0; + } + } else { + VTR_LOGV(!best_molecule && verbosity > 2, "\tNo related molecule found and unrelated clustering disabled\n"); + } + + return best_molecule; +} + +void mark_all_molecules_valid(t_pack_molecule* molecule_head) { + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + cur_molecule->valid = true; + } +} + +int count_molecules(t_pack_molecule* molecule_head) { + int num_molecules = 0; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + ++num_molecules; + } + return num_molecules; +} + +//Calculates molecule statistics for a single molecule +t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule) { + t_molecule_stats molecule_stats; + + auto& atom_ctx = g_vpr_ctx.atom(); + + //Calculate the number of available pins on primitives within the molecule + for (auto blk : molecule->atom_block_ids) { + if (!blk) continue; + + ++molecule_stats.num_blocks; //Record number of valid blocks in molecule + + const t_model* model = atom_ctx.nlist.block_model(blk); + + for (const t_model_ports* input_port = model->inputs; input_port != nullptr; input_port = input_port->next) { + molecule_stats.num_input_pins += input_port->size; + } + + for (const t_model_ports* output_port = model->outputs; output_port != nullptr; output_port = output_port->next) { + molecule_stats.num_output_pins += output_port->size; + } + } + molecule_stats.num_pins = molecule_stats.num_input_pins + molecule_stats.num_output_pins; + + //Calculate the number of externally used pins + std::set molecule_atoms(molecule->atom_block_ids.begin(), molecule->atom_block_ids.end()); + for (auto blk : molecule->atom_block_ids) { + if (!blk) continue; + + for (auto pin : atom_ctx.nlist.block_pins(blk)) { + auto net = atom_ctx.nlist.pin_net(pin); + + auto pin_type = atom_ctx.nlist.pin_type(pin); + if (pin_type == PinType::SINK) { + auto driver_blk = atom_ctx.nlist.net_driver_block(net); + + if (molecule_atoms.count(driver_blk)) { + //Pin driven by a block within the molecule + //Does not count as an external connection + } else { + //Pin driven by a block outside the molecule + ++molecule_stats.num_used_ext_inputs; + } + + } else { + VTR_ASSERT(pin_type == PinType::DRIVER); + + bool net_leaves_molecule = false; + for (auto sink_pin : atom_ctx.nlist.net_sinks(net)) { + auto sink_blk = atom_ctx.nlist.pin_block(sink_pin); + + if (!molecule_atoms.count(sink_blk)) { + //There is at least one sink outside of the current molecule + net_leaves_molecule = true; + break; + } + } + + //We assume that any fanout occurs outside of the molecule, hence we only + //count one used output (even if there are multiple sinks outside the molecule) + if (net_leaves_molecule) { + ++molecule_stats.num_used_ext_outputs; + } + } + } + } + molecule_stats.num_used_ext_pins = molecule_stats.num_used_ext_inputs + molecule_stats.num_used_ext_outputs; + + return molecule_stats; +} + +//Calculates maximum molecule statistics accross all molecules in linked list +t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head) { + t_molecule_stats max_molecules_stats; + + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + //Calculate per-molecule statistics + t_molecule_stats cur_molecule_stats = calc_molecule_stats(cur_molecule); + + //Record the maximums (member-wise) over all molecules + max_molecules_stats.num_blocks = std::max(max_molecules_stats.num_blocks, cur_molecule_stats.num_blocks); + + max_molecules_stats.num_pins = std::max(max_molecules_stats.num_pins, cur_molecule_stats.num_pins); + max_molecules_stats.num_input_pins = std::max(max_molecules_stats.num_input_pins, cur_molecule_stats.num_input_pins); + max_molecules_stats.num_output_pins = std::max(max_molecules_stats.num_output_pins, cur_molecule_stats.num_output_pins); + + max_molecules_stats.num_used_ext_pins = std::max(max_molecules_stats.num_used_ext_pins, cur_molecule_stats.num_used_ext_pins); + max_molecules_stats.num_used_ext_inputs = std::max(max_molecules_stats.num_used_ext_inputs, cur_molecule_stats.num_used_ext_inputs); + max_molecules_stats.num_used_ext_outputs = std::max(max_molecules_stats.num_used_ext_outputs, cur_molecule_stats.num_used_ext_outputs); + } + + return max_molecules_stats; +} + +std::vector initialize_seed_atoms(const e_cluster_seed seed_type, + const t_molecule_stats& max_molecule_stats, + const vtr::vector& atom_criticality) { + std::vector seed_atoms; + + //Put all atoms in seed list + auto& atom_ctx = g_vpr_ctx.atom(); + for (auto blk : atom_ctx.nlist.blocks()) { + seed_atoms.emplace_back(blk); + } + + //Initially all gains are zero + vtr::vector atom_gains(atom_ctx.nlist.blocks().size(), 0.); + + if (seed_type == e_cluster_seed::TIMING) { + VTR_ASSERT(atom_gains.size() == atom_criticality.size()); + + //By criticality + atom_gains = atom_criticality; + + } else if (seed_type == e_cluster_seed::MAX_INPUTS) { + //By number of used molecule input pins + for (auto blk : atom_ctx.nlist.blocks()) { + int max_molecule_inputs = 0; + auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); + for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { + const t_pack_molecule* blk_mol = kv.second; + + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); + + //Keep the max over all molecules associated with the atom + max_molecule_inputs = std::max(max_molecule_inputs, molecule_stats.num_used_ext_inputs); + } + + atom_gains[blk] = max_molecule_inputs; + } + + } else if (seed_type == e_cluster_seed::BLEND) { + //By blended gain (criticality and inputs used) + for (auto blk : atom_ctx.nlist.blocks()) { + /* Score seed gain of each block as a weighted sum of timing criticality, + * number of tightly coupled blocks connected to it, and number of external inputs */ + float seed_blend_fac = 0.5; + float max_blend_gain = 0; + + auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); + for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { + const t_pack_molecule* blk_mol = kv.second; + + const t_molecule_stats molecule_stats = calc_molecule_stats(blk_mol); + + VTR_ASSERT(max_molecule_stats.num_used_ext_inputs > 0); + + float blend_gain = (seed_blend_fac * atom_criticality[blk] + + (1 - seed_blend_fac) * (molecule_stats.num_used_ext_inputs / max_molecule_stats.num_used_ext_inputs)); + blend_gain *= (1 + 0.2 * (molecule_stats.num_blocks - 1)); + + //Keep the max over all molecules associated with the atom + max_blend_gain = std::max(max_blend_gain, blend_gain); + } + atom_gains[blk] = max_blend_gain; + } + + } else if (seed_type == e_cluster_seed::MAX_PINS || seed_type == e_cluster_seed::MAX_INPUT_PINS) { + //By pins per molecule (i.e. available pins on primitives, not pins in use) + + for (auto blk : atom_ctx.nlist.blocks()) { + int max_molecule_pins = 0; + auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); + for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { + const t_pack_molecule* mol = kv.second; + + const t_molecule_stats molecule_stats = calc_molecule_stats(mol); + + //Keep the max over all molecules associated with the atom + int molecule_pins = 0; + if (seed_type == e_cluster_seed::MAX_PINS) { + //All pins + molecule_pins = molecule_stats.num_pins; + } else { + VTR_ASSERT(seed_type == e_cluster_seed::MAX_INPUT_PINS); + //Input pins only + molecule_pins = molecule_stats.num_input_pins; + } + + //Keep the max over all molecules associated with the atom + max_molecule_pins = std::max(max_molecule_pins, molecule_pins); + } + atom_gains[blk] = max_molecule_pins; + } + + } else if (seed_type == e_cluster_seed::BLEND2) { + for (auto blk : atom_ctx.nlist.blocks()) { + float max_gain = 0; + auto molecule_rng = atom_ctx.atom_molecules.equal_range(blk); + for (const auto& kv : vtr::make_range(molecule_rng.first, molecule_rng.second)) { + const t_pack_molecule* mol = kv.second; + + const t_molecule_stats molecule_stats = calc_molecule_stats(mol); + + float pin_ratio = vtr::safe_ratio(molecule_stats.num_pins, max_molecule_stats.num_pins); + float input_pin_ratio = vtr::safe_ratio(molecule_stats.num_input_pins, max_molecule_stats.num_input_pins); + float output_pin_ratio = vtr::safe_ratio(molecule_stats.num_output_pins, max_molecule_stats.num_output_pins); + float used_ext_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_pins, max_molecule_stats.num_used_ext_pins); + float used_ext_input_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_inputs, max_molecule_stats.num_used_ext_inputs); + float used_ext_output_pin_ratio = vtr::safe_ratio(molecule_stats.num_used_ext_outputs, max_molecule_stats.num_used_ext_outputs); + float num_blocks_ratio = vtr::safe_ratio(molecule_stats.num_blocks, max_molecule_stats.num_blocks); + float criticality = atom_criticality[blk]; + + constexpr float PIN_WEIGHT = 0.; + constexpr float INPUT_PIN_WEIGHT = 0.5; + constexpr float OUTPUT_PIN_WEIGHT = 0.; + constexpr float USED_PIN_WEIGHT = 0.; + constexpr float USED_INPUT_PIN_WEIGHT = 0.2; + constexpr float USED_OUTPUT_PIN_WEIGHT = 0.; + constexpr float BLOCKS_WEIGHT = 0.2; + constexpr float CRITICALITY_WEIGHT = 0.1; + + float gain = PIN_WEIGHT * pin_ratio + + INPUT_PIN_WEIGHT * input_pin_ratio + + OUTPUT_PIN_WEIGHT * output_pin_ratio + + + USED_PIN_WEIGHT * used_ext_pin_ratio + + USED_INPUT_PIN_WEIGHT * used_ext_input_pin_ratio + + USED_OUTPUT_PIN_WEIGHT * used_ext_output_pin_ratio + + + BLOCKS_WEIGHT * num_blocks_ratio + + CRITICALITY_WEIGHT * criticality; + + max_gain = std::max(max_gain, gain); + } + + atom_gains[blk] = max_gain; + } + + } else { + VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unrecognized cluster seed type"); + } + + //Sort seeds in descending order of gain (i.e. highest gain first) + // + // Note that we use a *stable* sort here. It has been observed that different + // standard library implementations (e.g. gcc-4.9 vs gcc-5) use sorting algorithms + // which produce different orderings for seeds of equal gain (which is allowed with + // std::sort which does not specify how equal values are handled). Using a stable + // sort ensures that regardless of the underlying sorting algorithm the same seed + // order is produced regardless of compiler. + auto by_descending_gain = [&](const AtomBlockId lhs, const AtomBlockId rhs) { + return atom_gains[lhs] > atom_gains[rhs]; + }; + std::stable_sort(seed_atoms.begin(), seed_atoms.end(), by_descending_gain); + + if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES)) { + print_seed_gains(getEchoFileName(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES), seed_atoms, atom_gains, atom_criticality); + } + + return seed_atoms; +} + +t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std::vector seed_atoms) { + auto& atom_ctx = g_vpr_ctx.atom(); + + while (*seedindex < static_cast(seed_atoms.size())) { + AtomBlockId blk_id = seed_atoms[(*seedindex)++]; + + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + t_pack_molecule* best = nullptr; + + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + if (best == nullptr || (best->base_gain) < (molecule->base_gain)) { + best = molecule; + } + } + } + VTR_ASSERT(best != nullptr); + return best; + } + } + + /*if it makes it to here , there are no more blocks available*/ + return nullptr; +} + +/* get gain of packing molecule into current cluster + * gain is equal to: + * total_block_gain + * + molecule_base_gain*some_factor + * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor + */ +float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { + float gain; + int i; + int num_introduced_inputs_of_indirectly_related_block; + auto& atom_ctx = g_vpr_ctx.atom(); + + gain = 0; + float attraction_group_penalty = 0.1; + + num_introduced_inputs_of_indirectly_related_block = 0; + for (i = 0; i < get_array_size_of_molecule(molecule); i++) { + auto blk_id = molecule->atom_block_ids[i]; + if (blk_id) { + if (blk_gain.count(blk_id) > 0) { + gain += blk_gain[blk_id]; + } else { + /* This block has no connection with current cluster, penalize molecule for having this block + */ + for (auto pin_id : atom_ctx.nlist.block_input_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + VTR_ASSERT(net_id); + + auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + VTR_ASSERT(driver_pin_id); + + auto driver_blk_id = atom_ctx.nlist.pin_block(driver_pin_id); + + num_introduced_inputs_of_indirectly_related_block++; + for (int iblk = 0; iblk < get_array_size_of_molecule(molecule); iblk++) { + if (molecule->atom_block_ids[iblk] && driver_blk_id == molecule->atom_block_ids[iblk]) { + //valid block which is driver (and hence not an input) + num_introduced_inputs_of_indirectly_related_block--; + break; + } + } + } + } + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + if (atom_grp_id == cluster_attraction_group_id && cluster_attraction_group_id != AttractGroupId::INVALID()) { + float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); + gain += att_grp_gain; + } else if (cluster_attraction_group_id != AttractGroupId::INVALID() && atom_grp_id != cluster_attraction_group_id) { + gain -= attraction_group_penalty; + } + } + } + + gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ + gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); + + if (num_molecule_failures > 0 && attraction_groups.num_attraction_groups() > 0) { + gain -= 0.1 * num_molecule_failures; + } + + return gain; +} + +int compare_molecule_gain(const void* a, const void* b) { + float base_gain_a, base_gain_b, diff; + const t_pack_molecule *molecule_a, *molecule_b; + molecule_a = (*(const t_pack_molecule* const*)a); + molecule_b = (*(const t_pack_molecule* const*)b); + + base_gain_a = molecule_a->base_gain; + base_gain_b = molecule_b->base_gain; + diff = base_gain_a - base_gain_b; + if (diff > 0) { + return 1; + } + if (diff < 0) { + return -1; + } + return 0; +} + +/* Determine if speculatively packed cur_pb is pin feasible + * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the + * number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. + */ +void try_update_lookahead_pins_used(t_pb* cur_pb) { + int i, j; + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + // run recursively till a leaf (primitive) pb block is reached + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + if (cur_pb->child_pbs != nullptr) { + for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + try_update_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } else { + // find if this child (primitive) pb block has an atom mapped to it, + // if yes compute and mark lookahead pins used for that pb block + auto& atom_ctx = g_vpr_ctx.atom(); + AtomBlockId blk_id = atom_ctx.lookup.pb_atom(cur_pb); + if (pb_type->blif_model != nullptr && blk_id) { + compute_and_mark_lookahead_pins_used(blk_id); + } + } +} + +/* Resets nets used at different pin classes for determining pin feasibility */ +void reset_lookahead_pins_used(t_pb* cur_pb) { + int i, j; + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + if (cur_pb->pb_stats == nullptr) { + return; /* No pins used, no need to continue */ + } + + if (pb_type->num_modes > 0 && cur_pb->name != nullptr) { + for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + cur_pb->pb_stats->lookahead_input_pins_used[i].clear(); + } + + for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + cur_pb->pb_stats->lookahead_output_pins_used[i].clear(); + } + + if (cur_pb->child_pbs != nullptr) { + for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i] != nullptr) { + for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/* Determine if pins of speculatively packed pb are legal */ +void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id) { + auto& atom_ctx = g_vpr_ctx.atom(); + + const t_pb* cur_pb = atom_ctx.lookup.atom_pb(blk_id); + VTR_ASSERT(cur_pb != nullptr); + + /* Walk through inputs, outputs, and clocks marking pins off of the same class */ + for (auto pin_id : atom_ctx.nlist.block_pins(blk_id)) { + auto net_id = atom_ctx.nlist.pin_net(pin_id); + + const t_pb_graph_pin* pb_graph_pin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + compute_and_mark_lookahead_pins_used_for_pin(pb_graph_pin, cur_pb, net_id); + } +} + +/** + * Given a pin and its assigned net, mark all pin classes that are affected. + * Check if connecting this pin to it's driver pin or to all sink pins will + * require leaving a pb_block starting from the parent pb_block of the + * primitive till the root block (depth = 0). If leaving a pb_block is + * required add this net to the pin class (to increment the number of used + * pins from this class) that should be used to leave the pb_block. + */ +void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, const t_pb* primitive_pb, const AtomNetId net_id) { + auto& atom_ctx = g_vpr_ctx.atom(); + + // starting from the parent pb of the input primitive go up in the hierarchy till the root block + for (auto cur_pb = primitive_pb->parent_pb; cur_pb; cur_pb = cur_pb->parent_pb) { + const auto depth = cur_pb->pb_graph_node->pb_type->depth; + const auto pin_class = pb_graph_pin->parent_pin_class[depth]; + VTR_ASSERT(pin_class != OPEN); + + const auto driver_blk_id = atom_ctx.nlist.net_driver_block(net_id); + + // if this primitive pin is an input pin + if (pb_graph_pin->port->type == IN_PORT) { + /* find location of net driver if exist in clb, NULL otherwise */ + // find the driver of the input net connected to the pin being studied + const auto driver_pin_id = atom_ctx.nlist.net_driver(net_id); + // find the id of the atom occupying the input primitive_pb + const auto prim_blk_id = atom_ctx.lookup.pb_atom(primitive_pb); + // find the pb block occupied by the driving atom + const auto driver_pb = atom_ctx.lookup.atom_pb(driver_blk_id); + // pb_graph_pin driving net_id in the driver pb block + t_pb_graph_pin* output_pb_graph_pin = nullptr; + // if the driver block is in the same clb as the input primitive block + if (atom_ctx.lookup.atom_clb(driver_blk_id) == atom_ctx.lookup.atom_clb(prim_blk_id)) { + // get pb_graph_pin driving the given net + output_pb_graph_pin = get_driver_pb_graph_pin(driver_pb, driver_pin_id); + } + + bool is_reachable = false; + + // if the driver pin is within the cluster + if (output_pb_graph_pin) { + // find if the driver pin can reach the input pin of the primitive or not + const t_pb* check_pb = driver_pb; + while (check_pb && check_pb != cur_pb) { + check_pb = check_pb->parent_pb; + } + if (check_pb) { + for (int i = 0; i < output_pb_graph_pin->num_connectable_primitive_input_pins[depth]; i++) { + if (pb_graph_pin == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { + is_reachable = true; + break; + } + } + } + } + + // Must use an input pin to connect the driver to the input pin of the given primitive, either the + // driver atom is not contained in the cluster or is contained but cannot reach the primitive pin + if (!is_reachable) { + // add net to lookahead_input_pins_used if not already added + auto it = std::find(cur_pb->pb_stats->lookahead_input_pins_used[pin_class].begin(), + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end(), net_id); + if (it == cur_pb->pb_stats->lookahead_input_pins_used[pin_class].end()) { + cur_pb->pb_stats->lookahead_input_pins_used[pin_class].push_back(net_id); + } + } + } else { + VTR_ASSERT(pb_graph_pin->port->type == OUT_PORT); + /* + * Determine if this net (which is driven from within this cluster) leaves this cluster + * (and hence uses an output pin). + */ + + bool net_exits_cluster = true; + int num_net_sinks = static_cast(atom_ctx.nlist.net_sinks(net_id).size()); + + if (pb_graph_pin->num_connectable_primitive_input_pins[depth] >= num_net_sinks) { + //It is possible the net is completely absorbed in the cluster, + //since this pin could (potentially) drive all the net's sinks + + /* Important: This runtime penalty looks a lot scarier than it really is. + * For high fan-out nets, I at most look at the number of pins within the + * cluster which limits runtime. + * + * DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! + * + * Key Observation: + * For LUT-based designs it is impossible for the average fanout to exceed + * the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, + * if the average fanout is greater than the number of LUT inputs, where do + * the extra connections go? Therefore, average fanout must be capped to a + * small constant where the constant is equal to the number of LUT inputs). + * The real danger to runtime is when the number of sinks of a net gets doubled + */ + + //Check if all the net sinks are, in fact, inside this cluster + bool all_sinks_in_cur_cluster = true; + ClusterBlockId driver_clb = atom_ctx.lookup.atom_clb(driver_blk_id); + for (auto pin_id : atom_ctx.nlist.net_sinks(net_id)) { + auto sink_blk_id = atom_ctx.nlist.pin_block(pin_id); + if (atom_ctx.lookup.atom_clb(sink_blk_id) != driver_clb) { + all_sinks_in_cur_cluster = false; + break; + } + } + + if (all_sinks_in_cur_cluster) { + //All the sinks are part of this cluster, so the net may be fully absorbed. + // + //Verify this, by counting the number of net sinks reachable from the driver pin. + //If the count equals the number of net sinks then the net is fully absorbed and + //the net does not exit the cluster + /* TODO: I should cache the absorbed outputs, once net is absorbed, + * net is forever absorbed, no point in rechecking every time */ + if (net_sinks_reachable_in_cluster(pb_graph_pin, depth, net_id)) { + //All the sinks are reachable inside the cluster + net_exits_cluster = false; + } + } + } + + if (net_exits_cluster) { + /* This output must exit this cluster */ + cur_pb->pb_stats->lookahead_output_pins_used[pin_class].push_back(net_id); + } + } + } +} + +int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id) { + size_t num_reachable_sinks = 0; + auto& atom_ctx = g_vpr_ctx.atom(); + + //Record the sink pb graph pins we are looking for + std::unordered_set sink_pb_gpins; + for (const AtomPinId pin_id : atom_ctx.nlist.net_sinks(net_id)) { + const t_pb_graph_pin* sink_pb_gpin = find_pb_graph_pin(atom_ctx.nlist, atom_ctx.lookup, pin_id); + VTR_ASSERT(sink_pb_gpin); + + sink_pb_gpins.insert(sink_pb_gpin); + } + + //Count how many sink pins are reachable + for (int i_prim_pin = 0; i_prim_pin < driver_pb_gpin->num_connectable_primitive_input_pins[depth]; ++i_prim_pin) { + const t_pb_graph_pin* reachable_pb_gpin = driver_pb_gpin->list_of_connectable_input_pin_ptrs[depth][i_prim_pin]; + + if (sink_pb_gpins.count(reachable_pb_gpin)) { + ++num_reachable_sinks; + if (num_reachable_sinks == atom_ctx.nlist.net_sinks(net_id).size()) { + return true; + } + } + } + + return false; +} + +/** + * Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb + */ +t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) { + auto& atom_ctx = g_vpr_ctx.atom(); + const auto driver_pb_type = driver_pb->pb_graph_node->pb_type; + int output_port = 0; + // find the port of the pin driving the net as well as the port model + auto driver_port_id = atom_ctx.nlist.pin_port(driver_pin_id); + auto driver_model_port = atom_ctx.nlist.port_model(driver_port_id); + // find the port id of the port containing the driving pin in the driver_pb_type + for (int i = 0; i < driver_pb_type->num_ports; i++) { + auto& prim_port = driver_pb_type->ports[i]; + if (prim_port.type == OUT_PORT) { + if (prim_port.model_port == driver_model_port) { + // get the output pb_graph_pin driving this input net + return &(driver_pb->pb_graph_node->output_pins[output_port][atom_ctx.nlist.pin_port_bit(driver_pin_id)]); + } + output_port++; + } + } + // the pin should be found + VTR_ASSERT(false); + return nullptr; +} + +/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ +bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i]; + + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.input_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster inputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->input_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) { + return false; + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i]; + if (cur_pb->is_root()) { + // Scale the class size by the maximum external pin utilization factor + // Use ceil to avoid classes of size 1 from being scaled to zero + class_size = std::ceil(max_external_pin_util.output_pin_util * class_size); + // if the number of pins already used is larger than class size, then the number of + // cluster outputs already used should be our constraint. Why is this needed? This is + // needed since when packing the seed block the maximum external pin utilization is + // used as 1.0 allowing molecules that are using up to all the cluster inputs to be + // packed legally. Therefore, if the seed block is already using more inputs than + // the allowed maximum utilization, this should become the new maximum pin utilization. + class_size = std::max(class_size, cur_pb->pb_stats->output_pins_used[i].size()); + } + + if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) { + return false; + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util)) + return false; + } + } + } + } + } + + return true; +} + +/* Speculation successful, commit input/output pins used */ +void commit_lookahead_pins_used(t_pb* cur_pb) { + const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type; + + if (pb_type->num_modes > 0 && cur_pb->name) { + for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]); + cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]}); + } + } + + for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]); + for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) { + VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]); + cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]}); + } + } + + if (cur_pb->child_pbs) { + for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) { + if (cur_pb->child_pbs[i]) { + for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) { + commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); + } + } + } + } + } +} + +/** + * Score unclustered atoms that are two hops away from current cluster + * For example, consider a cluster that has a FF feeding an adder in another + * cluster. Since this FF is feeding an adder that is packed in another cluster + * this function should find other FFs that are feeding other inputs of this adder + * since they are two hops away from the FF packed in this cluster + */ +void load_transitive_fanout_candidates(ClusterBlockId clb_index, + t_pb_stats* pb_stats, + vtr::vector>& clb_inter_blk_nets, + int transitive_fanout_threshold) { + auto& atom_ctx = g_vpr_ctx.atom(); + + // iterate over all the nets that have pins in this cluster + for (const auto net_id : pb_stats->marked_nets) { + // only consider small nets to constrain runtime + if (int(atom_ctx.nlist.net_pins(net_id).size()) < transitive_fanout_threshold + 1) { + // iterate over all the pins of the net + for (const auto pin_id : atom_ctx.nlist.net_pins(net_id)) { + AtomBlockId atom_blk_id = atom_ctx.nlist.pin_block(pin_id); + // get the transitive cluster + ClusterBlockId tclb = atom_ctx.lookup.atom_clb(atom_blk_id); + // if the block connected to this pin is packed in another cluster + if (tclb != clb_index && tclb != ClusterBlockId::INVALID()) { + // explore transitive nets from already packed cluster + for (AtomNetId tnet : clb_inter_blk_nets[tclb]) { + // iterate over all the pins of the net + for (AtomPinId tpin : atom_ctx.nlist.net_pins(tnet)) { + auto blk_id = atom_ctx.nlist.pin_block(tpin); + // This transitive atom is not packed, score and add + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { + auto& transitive_fanout_candidates = pb_stats->transitive_fanout_candidates; + + if (pb_stats->gain.count(blk_id) == 0) { + pb_stats->gain[blk_id] = 0.001; + } else { + pb_stats->gain[blk_id] += 0.001; + } + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + transitive_fanout_candidates.insert({molecule->atom_block_ids[molecule->root], molecule}); + } + } + } + } + } + } + } + } + } +} + +std::map> identify_primitive_candidate_block_types() { + std::map> model_candidates; + auto& atom_ctx = g_vpr_ctx.atom(); + auto& atom_nlist = atom_ctx.nlist; + auto& device_ctx = g_vpr_ctx.device(); + + std::set unique_models; + for (auto blk : atom_nlist.blocks()) { + auto model = atom_nlist.block_model(blk); + unique_models.insert(model); + } + + for (auto model : unique_models) { + model_candidates[model] = {}; + + for (auto const& type : device_ctx.logical_block_types) { + if (block_type_contains_blif_model(&type, model->name)) { + model_candidates[model].push_back(&type); + } + } + } + + return model_candidates; +} + +void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality) { + FILE* fp = vtr::fopen(fname, "w"); + + auto& atom_ctx = g_vpr_ctx.atom(); + + //For prett formatting determine the maximum name length + int max_name_len = strlen("atom_block_name"); + int max_type_len = strlen("atom_block_type"); + for (auto blk_id : atom_ctx.nlist.blocks()) { + max_name_len = std::max(max_name_len, (int)atom_ctx.nlist.block_name(blk_id).size()); + + const t_model* model = atom_ctx.nlist.block_model(blk_id); + max_type_len = std::max(max_type_len, (int)strlen(model->name)); + } + + fprintf(fp, "%-*s %-*s %8s %8s\n", max_name_len, "atom_block_name", max_type_len, "atom_block_type", "gain", "criticality"); + fprintf(fp, "\n"); + for (auto blk_id : seed_atoms) { + std::string name = atom_ctx.nlist.block_name(blk_id); + fprintf(fp, "%-*s ", max_name_len, name.c_str()); + + const t_model* model = atom_ctx.nlist.block_model(blk_id); + fprintf(fp, "%-*s ", max_type_len, model->name); + + fprintf(fp, "%*f ", std::max((int)strlen("gain"), 8), atom_gain[blk_id]); + fprintf(fp, "%*f ", std::max((int)strlen("criticality"), 8), atom_criticality[blk_id]); + fprintf(fp, "\n"); + } + + fclose(fp); +} + +/** + * This function takes a chain molecule, and the pb_graph_node that is chosen + * for packing the molecule's root block. Using the given root_primitive, this + * function will identify which chain id this molecule is being mapped to and + * will update the chain id value inside the chain info data structure of this + * molecule + */ +void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) { + VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain); + + auto chain_root_pins = chain_molecule->pack_pattern->chain_root_pins; + + // long chains should only be placed at the beginning of the chain + // Since for long chains the molecule size is already equal to the + // total number of adders in the cluster. Therefore, it should + // always be placed at the very first adder in this cluster. + for (size_t chainId = 0; chainId < chain_root_pins.size(); chainId++) { + if (chain_root_pins[chainId][0]->parent_node == root_primitive) { + chain_molecule->chain_info->chain_id = chainId; + chain_molecule->chain_info->first_packed_molecule = chain_molecule; + return; + } + } + + VTR_ASSERT(false); +} + +/** + * This function takes the root block of a chain molecule and a proposed + * placement primitive for this block. The function then checks if this + * chain root block has a placement constraint (such as being driven from + * outside the cluster) and returns the status of the placement accordingly. + */ +enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, + const t_pack_molecule* molecule, + const AtomBlockId blk_id) { + enum e_block_pack_status block_pack_status = BLK_PASSED; + auto& atom_ctx = g_vpr_ctx.atom(); + + bool is_long_chain = molecule->chain_info->is_long_chain; + + const auto& chain_root_pins = molecule->pack_pattern->chain_root_pins; + + t_model_ports* root_port = chain_root_pins[0][0]->port->model_port; + AtomNetId chain_net_id; + auto port_id = atom_ctx.nlist.find_atom_port(blk_id, root_port); + + if (port_id) { + chain_net_id = atom_ctx.nlist.port_net(port_id, chain_root_pins[0][0]->pin_number); + } + + // if this block is part of a long chain or it is driven by a cluster + // input pin we need to check the placement legality of this block + // Depending on the logic synthesis even small chains that can fit within one + // cluster might need to start at the top of the cluster as their input can be + // driven by a global gnd or vdd. Therefore even if this is not a long chain + // but its input pin is driven by a net, the placement legality is checked. + if (is_long_chain || chain_net_id) { + auto chain_id = molecule->chain_info->chain_id; + // if this chain has a chain id assigned to it (implies is_long_chain too) + if (chain_id != -1) { + // the chosen primitive should be a valid starting point for the chain + // long chains should only be placed at the top of the chain tieOff = 0 + if (pb_graph_node != chain_root_pins[chain_id][0]->parent_node) { + block_pack_status = BLK_FAILED_FEASIBLE; + } + // the chain doesn't have an assigned chain_id yet + } else { + block_pack_status = BLK_FAILED_FEASIBLE; + for (const auto& chain : chain_root_pins) { + for (size_t tieOff = 0; tieOff < chain.size(); tieOff++) { + // check if this chosen primitive is one of the possible + // starting points for this chain. + if (pb_graph_node == chain[tieOff]->parent_node) { + // this location matches with the one of the dedicated chain + // input from outside logic block, therefore it is feasible + block_pack_status = BLK_PASSED; + break; + } + // long chains should only be placed at the top of the chain tieOff = 0 + if (is_long_chain) break; + } + } + } + } + + return block_pack_status; +} + +/** + * This function update the pb_type_count data structure by incrementing + * the number of used pb_types in the given packed cluster t_pb + */ +size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth) { + size_t max_depth = depth; + + t_pb_graph_node* pb_graph_node = pb->pb_graph_node; + t_pb_type* pb_type = pb_graph_node->pb_type; + t_mode* mode = &pb_type->modes[pb->mode]; + std::string pb_type_name(pb_type->name); + + pb_type_count[pb_type]++; + + if (pb_type->num_modes > 0) { + for (int i = 0; i < mode->num_pb_type_children; i++) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { + if (pb->child_pbs[i] && pb->child_pbs[i][j].name) { + size_t child_depth = update_pb_type_count(&pb->child_pbs[i][j], pb_type_count, depth + 1); + + max_depth = std::max(max_depth, child_depth); + } + } + } + } + return max_depth; +} + +void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count) { + std::string display_name(curr_depth, ' '); //Indent by depth + display_name += pb_type->name; + + if (pb_type_count.count(pb_type)) { + VTR_LOG(" %-*s : %d\n", max_name_chars, display_name.c_str(), pb_type_count[pb_type]); + } + + //Recurse + for (int imode = 0; imode < pb_type->num_modes; ++imode) { + t_mode* mode = &pb_type->modes[imode]; + for (int ichild = 0; ichild < mode->num_pb_type_children; ++ichild) { + t_pb_type* child_pb_type = &mode->pb_type_children[ichild]; + + print_pb_type_count_recurr(child_pb_type, max_name_chars, curr_depth + 1, pb_type_count); + } + } +} + +/** + * This function identifies the logic block type which is + * defined by the block type which has a lut primitive + */ +t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types) { + std::string lut_name = ".names"; + + for (auto& model : primitive_candidate_block_types) { + std::string model_name(model.first->name); + if (model_name == lut_name) + return model.second[0]; + } + + return nullptr; +} + +/** + * This function returns the pb_type that is similar to Logic Element (LE) in an FPGA + * The LE is defined as a physical block that contains a LUT primitive and + * is found by searching a cluster type to find the first pb_type (from the top + * of the hierarchy clb->LE) that has more than one instance within the cluster. + */ +t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) { + // if there is no CLB-like cluster, then there is no LE pb_block + if (!logic_block_type) + return nullptr; + + // search down the hierarchy starting from the pb_graph_head + auto pb_graph_node = logic_block_type->pb_graph_head; + + while (pb_graph_node->child_pb_graph_nodes) { + // if this pb_graph_node has more than one mode or more than one pb_type in the default mode return + // nullptr since the logic block of this architecture is not a CLB-like logic block + if (pb_graph_node->pb_type->num_modes > 1 || pb_graph_node->pb_type->modes[0].num_pb_type_children > 1) + return nullptr; + // explore the only child of this pb_graph_node + pb_graph_node = &pb_graph_node->child_pb_graph_nodes[0][0][0]; + // if the child node has more than one instance in the + // cluster then this is the pb_type similar to a LE + if (pb_graph_node->pb_type->num_pb > 1) + return pb_graph_node->pb_type; + } + + return nullptr; +} + +/** + * This function updates the le_count data structure from the given packed cluster + */ +void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count) { + // if this cluster doesn't contain LEs or there + // are no les in this architecture, ignore it + if (!logic_block_type || pb->pb_graph_node != logic_block_type->pb_graph_head || !le_pb_type) + return; + + const std::string lut(".names"); + const std::string ff(".latch"); + const std::string adder("adder"); + + auto parent_pb = pb; + + // go down the hierarchy till the parent physical block of the LE is found + while (parent_pb->child_pbs[0][0].pb_graph_node->pb_type != le_pb_type) { + parent_pb = &parent_pb->child_pbs[0][0]; + } + + // iterate over all the LEs and update the LE count accordingly + for (int ile = 0; ile < parent_pb->get_num_children_of_type(0); ile++) { + if (!parent_pb->child_pbs[0][ile].name) + continue; + + auto has_used_lut = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], lut); + auto has_used_adder = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], adder); + auto has_used_ff = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], ff); + + // First type of LEs: used for logic and registers + if ((has_used_lut || has_used_adder) && has_used_ff) { + le_count[0]++; + // Second type of LEs: used for logic only + } else if (has_used_lut || has_used_adder) { + le_count[1]++; + // Third type of LEs: used for registers only + } else if (has_used_ff) { + le_count[2]++; + } + } +} + +/** + * This function returns true if the given physical block has + * a primitive matching the given blif model and is used + */ +bool pb_used_for_blif_model(const t_pb* pb, std::string blif_model_name) { + auto pb_graph_node = pb->pb_graph_node; + auto pb_type = pb_graph_node->pb_type; + auto mode = &pb_type->modes[pb->mode]; + + // if this is a primitive check if it matches the given blif model name + if (pb_type->blif_model) { + if (blif_model_name == pb_type->blif_model || ".subckt " + blif_model_name == pb_type->blif_model) { + return true; + } + } + + if (pb_type->num_modes > 0) { + for (int i = 0; i < mode->num_pb_type_children; i++) { + for (int j = 0; j < mode->pb_type_children[i].num_pb; j++) { + if (pb->child_pbs[i] && pb->child_pbs[i][j].name) { + if (pb_used_for_blif_model(&pb->child_pbs[i][j], blif_model_name)) { + return true; + } + } + } + } + } + + return false; +} + +/** + * Print the LE count data strurture + */ +void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type) { + VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name); + VTR_LOG(" Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]); + VTR_LOG(" LEs used for logic and registers : %d\n", le_count[0]); + VTR_LOG(" LEs used for logic only : %d\n", le_count[1]); + VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]); +} + +/** + * Given a pointer to a pb in a cluster, this routine returns + * a pointer to the top-level pb of the given pb. + * This is needed when updating the gain for a cluster. + */ +t_pb* get_top_level_pb(t_pb* pb) { + t_pb* top_level_pb = pb; + + while (pb) { + top_level_pb = pb; + pb = pb->parent_pb; + } + + VTR_ASSERT(top_level_pb != nullptr); + + return top_level_pb; +} diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index bff9510c0c2..cdad7f16bf8 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -1,3 +1,6 @@ +#ifndef CLUSTER_UTIL_H +#define CLUSTER_UTIL_H + #include "globals.h" #include "atom_netlist.h" #include "pack_types.h" @@ -11,6 +14,12 @@ #include "tatum/echo_writer.hpp" #include "tatum/TimingReporter.hpp" +#define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ +#define AAPACK_MAX_TRANSITIVE_EXPLORE 40 /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ + +//Constant allowing all cluster pins to be used +const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); + enum e_gain_update { GAIN, NO_GAIN @@ -65,6 +74,34 @@ struct t_cluster_progress_stats { int num_unrelated_clustering_attempts = 0; }; +/* Useful data structures for packing */ +struct t_clustering_data { + vtr::vector*> intra_lb_routing; + int* hill_climbing_inputs_avail; + + /* Keeps a linked list of the unclustered blocks to speed up looking for * + * unclustered blocks with a certain number of *external* inputs. * + * [0..lut_size]. Unclustered_list_head[i] points to the head of the * + * list of blocks with i inputs to be hooked up via external interconnect. */ + t_molecule_link* unclustered_list_head = nullptr; + + t_molecule_link* memory_pool = nullptr; + + /* Does the atom block that drives the output of this atom net also appear as a * + * receiver (input) pin of the atom net? If so, then by how much? + * + * This is used in the gain routines to avoid double counting the connections from * + * the current cluster to other blocks (hence yielding better clusterings). * + * The only time an atom block should connect to the same atom net * + * twice is when one connection is an output and the other is an input, * + * so this should take care of all multiple connections. */ + std::unordered_map net_output_feeds_driving_block_input; +}; + +/***********************************/ +/* Clustering helper functions */ +/***********************************/ + void check_clustering(); //calculate the initial timing at the start of packing stage @@ -77,20 +114,14 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts, //free the clustering data structures void free_clustering_data(const t_packer_opts& packer_opts, - vtr::vector*>& intra_lb_routing, - int* hill_climbing_inputs_avail, - t_cluster_placement_stats* cluster_placement_stats, - t_molecule_link* unclustered_list_head, - t_molecule_link* memory_pool, - t_pb_graph_node** primitives_list); + t_clustering_data& clustering_data); //check clustering legality and output it void check_and_output_clustering(const t_packer_opts& packer_opts, const std::unordered_set& is_clock, const t_arch* arch, const int& num_clb, - const vtr::vector*>& intra_lb_routing, - bool& floorplan_regions_overfull); + const vtr::vector*>& intra_lb_routing); void get_max_cluster_size_and_pb_depth(int& max_cluster_size, int& max_pb_depth); @@ -98,3 +129,320 @@ void get_max_cluster_size_and_pb_depth(int& max_cluster_size, bool check_cluster_legality(const int& verbosity, const int& detailed_routing_stage, t_lb_router_data* router_data); + +bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); + +void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, + std::map& gain, + t_pb* pb, + int max_queue_size, + AttractionInfo& attraction_groups); + +void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, + t_pb* pb); + +void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, + t_cluster_placement_stats** cluster_placement_stats, + t_pb_graph_node*** primitives_list, + t_pack_molecule* molecules_head, + t_clustering_data& clustering_data, + std::unordered_map& net_output_feeds_driving_block_input, + int& unclustered_list_head_size, + int num_molecules); + +void free_pb_stats_recursive(t_pb* pb); + +void try_update_lookahead_pins_used(t_pb* cur_pb); + +void reset_lookahead_pins_used(t_pb* cur_pb); + +void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id); + +void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin, + const t_pb* primitive_pb, + const AtomNetId net_id); + +void commit_lookahead_pins_used(t_pb* cur_pb); + +bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util); + +bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb); + +bool primitive_memory_sibling_feasible(const AtomBlockId blk_id, const t_pb_type* cur_pb_type, const AtomBlockId sibling_memory_blk); + +t_pack_molecule* get_molecule_by_num_ext_inputs(const int ext_inps, + const enum e_removal_policy remove_flag, + t_cluster_placement_stats* cluster_placement_stats_ptr, + t_molecule_link* unclustered_list_head); + +t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size); + +void print_pack_status_header(); + +void print_pack_status(int num_clb, + int tot_num_molecules, + int num_molecules_processed, + int& mols_since_last_print, + int device_width, + int device_height, + AttractionInfo& attraction_groups); + +void rebuild_attraction_groups(AttractionInfo& attraction_groups); + +void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); + +enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, + t_pack_molecule* molecule, + t_pb_graph_node** primitives_list, + t_pb* pb, + const int max_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const int detailed_routing_stage, + t_lb_router_data* router_data, + int verbosity, + bool enable_pin_feasibility_filter, + const int feasible_block_array_size, + t_ext_pin_util max_external_pin_util, + PartitionRegion& temp_cluster_pr); + +void try_fill_cluster(const t_packer_opts& packer_opts, + t_cluster_placement_stats* cur_cluster_placement_stats_ptr, + t_pack_molecule*& prev_molecule, + t_pack_molecule*& next_molecule, + int& num_same_molecules, + t_pb_graph_node** primitives_list, + t_cluster_progress_stats& cluster_stats, + int num_clb, + const int num_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const int detailed_routing_stage, + AttractionInfo& attraction_groups, + vtr::vector>& clb_inter_blk_nets, + bool allow_unrelated_clustering, + const int& high_fanout_threshold, + const std::unordered_set& is_clock, + const std::shared_ptr& timing_info, + t_lb_router_data* router_data, + t_ext_pin_util target_ext_pin_util, + PartitionRegion& temp_cluster_pr, + e_block_pack_status& block_pack_status, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size, + std::unordered_map& net_output_feeds_driving_block_input, + std::map>& primitive_candidate_block_types); + +t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, + const int& num_clb, + const std::vector& seed_atoms, + const int& num_blocks_hill_added, + vtr::vector*>& intra_lb_routing, + int& seedindex, + t_cluster_progress_stats& cluster_stats, + t_lb_router_data* router_data); + +void store_cluster_info_and_free(const t_packer_opts& packer_opts, + const ClusterBlockId& clb_index, + const t_logical_block_type_ptr logic_block_type, + const t_pb_type* le_pb_type, + std::vector& le_count, + vtr::vector>& clb_inter_blk_nets); + +void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb_index, + const int& savedseedindex, + std::map& num_used_type_instances, + int& num_clb, + int& seedindex); + +enum e_block_pack_status try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node, + const AtomBlockId blk_id, + t_pb* cb, + t_pb** parent, + const int max_models, + const int max_cluster_size, + const ClusterBlockId clb_index, + const t_cluster_placement_stats* cluster_placement_stats_ptr, + const t_pack_molecule* molecule, + t_lb_router_data* router_data, + int verbosity, + const int feasible_block_array_size); + +enum e_block_pack_status atom_cluster_floorplanning_check(const AtomBlockId blk_id, + const ClusterBlockId clb_index, + const int verbosity, + PartitionRegion& temp_cluster_pr, + bool& cluster_pr_needs_update); + +void revert_place_atom_block(const AtomBlockId blk_id, t_lb_router_data* router_data); + +void update_connection_gain_values(const AtomNetId net_id, const AtomBlockId clustered_blk_id, t_pb* cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block); + +void update_timing_gain_values(const AtomNetId net_id, + t_pb* cur_pb, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block, + const SetupTimingInfo& timing_info, + const std::unordered_set& is_global, + std::unordered_map& net_output_feeds_driving_block_input); + +void mark_and_update_partial_gain(const AtomNetId net_id, + enum e_gain_update gain_flag, + const AtomBlockId clustered_blk_id, + bool timing_driven, + bool connection_driven, + enum e_net_relation_to_clustered_block net_relation_to_clustered_block, + const SetupTimingInfo& timing_info, + const std::unordered_set& is_global, + const int high_fanout_net_threshold, + std::unordered_map& net_output_feeds_driving_block_input); + +void update_total_gain(float alpha, float beta, bool timing_driven, bool connection_driven, t_pb* pb, AttractionInfo& attraction_groups); + +void update_cluster_stats(const t_pack_molecule* molecule, + const ClusterBlockId clb_index, + const std::unordered_set& is_clock, + const std::unordered_set& is_global, + const bool global_clocks, + const float alpha, + const float beta, + const bool timing_driven, + const bool connection_driven, + const int high_fanout_net_threshold, + const SetupTimingInfo& timing_info, + AttractionInfo& attraction_groups, + std::unordered_map& net_output_feeds_driving_block_input); + +void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, + t_pb_graph_node** primitives_list, + ClusterBlockId clb_index, + t_pack_molecule* molecule, + std::map& num_used_type_instances, + const float target_device_utilization, + const int num_models, + const int max_cluster_size, + const t_arch* arch, + std::string device_layout_name, + std::vector* lb_type_rr_graphs, + t_lb_router_data** router_data, + const int detailed_routing_stage, + ClusteredNetlist* clb_nlist, + const std::map>& primitive_candidate_block_types, + int verbosity, + bool enable_pin_feasibility_filter, + bool balance_block_type_utilization, + const int feasible_block_array_size, + PartitionRegion& temp_cluster_pr); + +t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, + AttractionInfo& attraction_groups, + const enum e_gain_type gain_mode, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + const ClusterBlockId cluster_index, + bool prioritize_transitive_connectivity, + int transitive_fanout_threshold, + const int feasible_block_array_size, + std::map>& primitive_candidate_block_types); + +void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const int feasible_block_array_size, + AttractionInfo& attraction_groups); + +void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + const int feasible_block_array_size, + AttractionInfo& attraction_groups); + +void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + AttractionInfo& attraction_groups, + const int feasible_block_array_size, + ClusterBlockId clb_index, + std::map>& primitive_candidate_block_types); + +void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + const ClusterBlockId cluster_index, + int transitive_fanout_threshold, + const int feasible_block_array_size, + AttractionInfo& attraction_groups); + +bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); + +t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, + AttractionInfo& attraction_groups, + const bool allow_unrelated_clustering, + const bool prioritize_transitive_connectivity, + const int transitive_fanout_threshold, + const int feasible_block_array_size, + int* num_unrelated_clustering_attempts, + t_cluster_placement_stats* cluster_placement_stats_ptr, + vtr::vector>& clb_inter_blk_nets, + ClusterBlockId cluster_index, + int verbosity, + t_molecule_link* unclustered_list_head, + const int& unclustered_list_head_size, + std::map>& primitive_candidate_block_types); + +void mark_all_molecules_valid(t_pack_molecule* molecule_head); + +int count_molecules(t_pack_molecule* molecule_head); + +t_molecule_stats calc_molecule_stats(const t_pack_molecule* molecule); + +t_molecule_stats calc_max_molecules_stats(const t_pack_molecule* molecule_head); + +std::vector initialize_seed_atoms(const e_cluster_seed seed_type, + const t_molecule_stats& max_molecule_stats, + const vtr::vector& atom_criticality); + +t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std::vector seed_atoms); + +float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); + +int compare_molecule_gain(const void* a, const void* b); +int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); + +void print_seed_gains(const char* fname, const std::vector& seed_atoms, const vtr::vector& atom_gain, const vtr::vector& atom_criticality); + +void load_transitive_fanout_candidates(ClusterBlockId cluster_index, + t_pb_stats* pb_stats, + vtr::vector>& clb_inter_blk_nets, + int transitive_fanout_threshold); + +std::map> identify_primitive_candidate_block_types(); + +void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive); + +enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node, + const t_pack_molecule* molecule, + const AtomBlockId blk_id); + +t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id); + +size_t update_pb_type_count(const t_pb* pb, std::map& pb_type_count, size_t depth); + +void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector& le_count); + +void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map& pb_type_count); + +t_logical_block_type_ptr identify_logic_block_type(std::map>& primitive_candidate_block_types); + +t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type); + +bool pb_used_for_blif_model(const t_pb* pb, std::string blif_model_name); + +void print_le_count(std::vector& le_count, const t_pb_type* le_pb_type); + +t_pb* get_top_level_pb(t_pb* pb); + +bool cleanup_pb(t_pb* pb); + +void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size); + +#endif \ No newline at end of file diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index f6490f2d1a5..935e756b5dd 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -22,6 +22,7 @@ #include "read_blif.h" #include "cluster.h" #include "SetupGrid.h" +#include "re_cluster.h" /* #define DUMP_PB_GRAPH 1 */ /* #define DUMP_BLIF_INPUT 1 */ @@ -41,31 +42,34 @@ bool try_pack(t_packer_opts* packer_opts, const t_model* library_models, float interc_delay, std::vector* lb_type_rr_graphs) { + auto& helper_ctx = g_vpr_ctx.mutable_helper(); + std::unordered_set is_clock; - std::multimap atom_molecules; //The molecules associated with each atom block std::unordered_map expected_lowest_cost_pb_gnode; //The molecules associated with each atom block const t_model* cur_model; - int num_models; + t_clustering_data clustering_data; + //int num_models; std::vector list_of_packing_patterns; - std::unique_ptr list_of_pack_molecules(nullptr, free_pack_molecules); + //std::unique_ptr list_of_pack_molecules(nullptr, free_pack_molecules); VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); /* determine number of models in the architecture */ - num_models = 0; + helper_ctx.num_models = 0; cur_model = user_models; while (cur_model) { - num_models++; + helper_ctx.num_models++; cur_model = cur_model->next; } cur_model = library_models; while (cur_model) { - num_models++; + helper_ctx.num_models++; cur_model = cur_model->next; } is_clock = alloc_and_load_is_clock(packer_opts->global_clocks); auto& atom_ctx = g_vpr_ctx.atom(); + auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); size_t num_p_inputs = 0; size_t num_p_outputs = 0; @@ -95,10 +99,9 @@ bool try_pack(t_packer_opts* packer_opts, std::unique_ptr, decltype(list_of_packing_patterns_deleter)> list_of_packing_patterns_cleanup_guard(&list_of_packing_patterns, list_of_packing_patterns_deleter); - list_of_pack_molecules.reset(alloc_and_load_pack_molecules(list_of_packing_patterns.data(), - atom_molecules, - expected_lowest_cost_pb_gnode, - list_of_packing_patterns.size())); + atom_mutable_ctx.list_of_pack_molecules.reset(alloc_and_load_pack_molecules(list_of_packing_patterns.data(), + expected_lowest_cost_pb_gnode, + list_of_packing_patterns.size())); /* We keep attraction groups off in the first iteration, and * only turn on in later iterations if some floorplan regions turn out to be overfull. @@ -136,13 +139,14 @@ bool try_pack(t_packer_opts* packer_opts, bool floorplan_regions_overfull = false; while (true) { + free_clustering_data(*packer_opts, clustering_data); + //Cluster the netlist - auto num_type_instances = do_clustering( + helper_ctx.num_used_type_instances = do_clustering( *packer_opts, *analysis_opts, - arch, list_of_pack_molecules.get(), num_models, + arch, atom_mutable_ctx.list_of_pack_molecules.get(), helper_ctx.num_models, is_clock, - atom_molecules, expected_lowest_cost_pb_gnode, allow_unrelated_clustering, balance_block_type_util, @@ -150,10 +154,11 @@ bool try_pack(t_packer_opts* packer_opts, target_external_pin_util, high_fanout_thresholds, attraction_groups, - floorplan_regions_overfull); + floorplan_regions_overfull, + clustering_data); //Try to size/find a device - bool fits_on_device = try_size_device_grid(*arch, num_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); + bool fits_on_device = try_size_device_grid(*arch, helper_ctx.num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause @@ -224,8 +229,8 @@ bool try_pack(t_packer_opts* packer_opts, std::string resource_reqs; std::string resource_avail; auto& grid = g_vpr_ctx.device().grid; - for (auto iter = num_type_instances.begin(); iter != num_type_instances.end(); ++iter) { - if (iter != num_type_instances.begin()) { + for (auto iter = helper_ctx.num_used_type_instances.begin(); iter != helper_ctx.num_used_type_instances.end(); ++iter) { + if (iter != helper_ctx.num_used_type_instances.begin()) { resource_reqs += ", "; resource_avail += ", "; } @@ -257,6 +262,18 @@ bool try_pack(t_packer_opts* packer_opts, ++pack_iteration; } + /* Packing iterative improvement can be done here */ + /* Use the re-cluster API to edit it */ + /******************* Start *************************/ + + /******************** End **************************/ + + //check clustering and output it + check_and_output_clustering(*packer_opts, is_clock, arch, helper_ctx.total_clb_num, clustering_data.intra_lb_routing); + + // Free Data Structures + free_clustering_data(*packer_opts, clustering_data); + VTR_LOG("\n"); VTR_LOG("Netlist conversion complete.\n"); VTR_LOG("\n"); diff --git a/vpr/src/pack/prepack.cpp b/vpr/src/pack/prepack.cpp index 93d6eea8ead..d14fc020724 100644 --- a/vpr/src/pack/prepack.cpp +++ b/vpr/src/pack/prepack.cpp @@ -64,12 +64,10 @@ static int compare_pack_pattern(const t_pack_patterns* pattern_a, const t_pack_p static void free_pack_pattern_block(t_pack_pattern_block* pattern_block, t_pack_pattern_block** pattern_block_list); static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patterns, - std::multimap& atom_molecules, const int pack_pattern_index, AtomBlockId blk_id); static bool try_expand_molecule(t_pack_molecule* molecule, - const std::multimap& atom_molecules, const AtomBlockId blk_id); static void print_pack_molecules(const char* fname, @@ -81,7 +79,7 @@ static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block(const static t_pb_graph_node* get_expected_lowest_cost_primitive_for_atom_block_in_pb_graph_node(const AtomBlockId blk_id, t_pb_graph_node* curr_pb_graph_node, float* cost); -static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern, const std::multimap& atom_molecules); +static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern); static std::vector find_end_of_path(t_pb_graph_pin* input_pin, int pattern_index); @@ -96,7 +94,7 @@ static t_pb_graph_pin* get_connected_primitive_pin(const t_pb_graph_pin* input_p static void get_all_connected_primitive_pins(const t_pb_graph_pin* cluster_input_pin, std::vector& connected_primitive_pins); -static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule, const std::multimap& atom_molecules); +static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule); static AtomBlockId get_sink_block(const AtomBlockId block_id, const t_model_ports* model_port, const BitIndex pin_number); @@ -772,7 +770,6 @@ static void backward_expand_pack_pattern_from_edge(const t_pb_graph_edge* expans * ie. a single linear chain that can be split across multiple complex blocks */ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, - std::multimap& atom_molecules, std::unordered_map& expected_lowest_cost_pb_gnode, const int num_packing_patterns) { int i, j, best_pattern; @@ -780,6 +777,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat t_pack_molecule* cur_molecule; bool* is_used; auto& atom_ctx = g_vpr_ctx.atom(); + auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); is_used = (bool*)vtr::calloc(num_packing_patterns, sizeof(bool)); @@ -814,7 +812,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat for (auto blk_iter = blocks.begin(); blk_iter != blocks.end(); ++blk_iter) { auto blk_id = *blk_iter; - cur_molecule = try_create_molecule(list_of_pack_patterns, atom_molecules, best_pattern, blk_id); + cur_molecule = try_create_molecule(list_of_pack_patterns, best_pattern, blk_id); if (cur_molecule != nullptr) { cur_molecule->next = list_of_molecules_head; /* In the event of multiple molecules with the same atom block pattern, @@ -826,7 +824,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat //Note: atom_molecules is an (ordered) multimap so the last molecule // inserted for a given blk_id will be the last valid element // in the equal_range - auto rng = atom_molecules.equal_range(blk_id); //The range of molecules matching this block + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); //The range of molecules matching this block bool range_empty = (rng.first == rng.second); bool cur_was_last_inserted = false; if (!range_empty) { @@ -868,7 +866,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat expected_lowest_cost_pb_gnode[blk_id] = best; - auto rng = atom_molecules.equal_range(blk_id); + auto rng = atom_ctx.atom_molecules.equal_range(blk_id); bool rng_empty = (rng.first == rng.second); if (rng_empty) { cur_molecule = new t_pack_molecule; @@ -884,7 +882,7 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat cur_molecule->base_gain = 1; list_of_molecules_head = cur_molecule; - atom_molecules.insert({blk_id, cur_molecule}); + atom_mutable_ctx.atom_molecules.insert({blk_id, cur_molecule}); } } @@ -897,15 +895,6 @@ t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_pat return list_of_molecules_head; } -void free_pack_molecules(t_pack_molecule* list_of_pack_molecules) { - t_pack_molecule* cur_pack_molecule = list_of_pack_molecules; - while (cur_pack_molecule != nullptr) { - cur_pack_molecule = list_of_pack_molecules->next; - delete list_of_pack_molecules; - list_of_pack_molecules = cur_pack_molecule; - } -} - static void free_pack_pattern_block(t_pack_pattern_block* pattern_block, t_pack_pattern_block** pattern_block_list) { t_pack_pattern_connections *connection, *next; if (pattern_block == nullptr || pattern_block->block_id == OPEN) { @@ -938,11 +927,13 @@ static void free_pack_pattern_block(t_pack_pattern_block* pattern_block, t_pack_ * Side Effect: If successful, link atom to molecule */ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patterns, - std::multimap& atom_molecules, const int pack_pattern_index, AtomBlockId blk_id) { t_pack_molecule* molecule; + //auto& atom_ctx = g_vpr_ctx.atom(); + auto& atom_mutable_ctx = g_vpr_ctx.mutable_atom(); + auto pack_pattern = &list_of_pack_patterns[pack_pattern_index]; // Check pack pattern validity @@ -953,7 +944,7 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter // If a chain pattern extends beyond a single logic block, we must find // the furthest blk_id up the chain that is not mapped to a molecule yet. if (pack_pattern->is_chain) { - blk_id = find_new_root_atom_for_chain(blk_id, pack_pattern, atom_molecules); + blk_id = find_new_root_atom_for_chain(blk_id, pack_pattern); if (!blk_id) return nullptr; } @@ -965,12 +956,12 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter molecule->num_blocks = pack_pattern->num_blocks; molecule->root = pack_pattern->root_block->block_id; - if (try_expand_molecule(molecule, atom_molecules, blk_id)) { + if (try_expand_molecule(molecule, blk_id)) { // Success! commit molecule // update chain info for chain molecules if (molecule->pack_pattern->is_chain) { - init_molecule_chain_info(blk_id, molecule, atom_molecules); + init_molecule_chain_info(blk_id, molecule); } // update the atom_molcules with the atoms that are mapped to this molecule @@ -981,7 +972,7 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter continue; } - atom_molecules.insert({blk_id2, molecule}); + atom_mutable_ctx.atom_molecules.insert({blk_id2, molecule}); } } else { // Failed to create molecule @@ -1006,8 +997,9 @@ static t_pack_molecule* try_create_molecule(t_pack_patterns* list_of_pack_patter * blk_id : chosen to be the root of this molecule and the code is expanding from */ static bool try_expand_molecule(t_pack_molecule* molecule, - const std::multimap& atom_molecules, const AtomBlockId blk_id) { + auto& atom_ctx = g_vpr_ctx.atom(); + // root block of the pack pattern, which is the starting point of this pattern const auto pattern_root_block = molecule->pack_pattern->root_block; // bool array indicating whether a position in a pack pattern is optional or should @@ -1039,7 +1031,7 @@ static bool try_expand_molecule(t_pack_molecule* molecule, continue; } - if (!block_id || !primitive_type_feasible(block_id, pattern_block->pb_type) || (molecule_atom_block_id && molecule_atom_block_id != block_id) || atom_molecules.find(block_id) != atom_molecules.end()) { + if (!block_id || !primitive_type_feasible(block_id, pattern_block->pb_type) || (molecule_atom_block_id && molecule_atom_block_id != block_id) || atom_ctx.atom_molecules.find(block_id) != atom_ctx.atom_molecules.end()) { // Stopping conditions, if: // 1) this is an invalid atom block (nothing) // 2) this atom block cannot fit in this primitive type @@ -1305,7 +1297,7 @@ static int compare_pack_pattern(const t_pack_patterns* pattern_a, const t_pack_p * block_index: index of current atom * list_of_pack_pattern: ptr to current chain pattern */ -static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern, const std::multimap& atom_molecules) { +static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const t_pack_patterns* list_of_pack_pattern) { AtomBlockId new_root_blk_id; t_pb_graph_pin* root_ipin; t_pb_graph_node* root_pb_graph_node; @@ -1334,7 +1326,7 @@ static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const return blk_id; } // check if driver atom is already packed - auto rng = atom_molecules.equal_range(driver_blk_id); + auto rng = atom_ctx.atom_molecules.equal_range(driver_blk_id); bool rng_empty = (rng.first == rng.second); if (!rng_empty) { /* Driver is used/invalid, so current block is the furthest up the chain, return it */ @@ -1342,7 +1334,7 @@ static AtomBlockId find_new_root_atom_for_chain(const AtomBlockId blk_id, const } // didn't find furthest atom up the chain, keep searching further up the chain - new_root_blk_id = find_new_root_atom_for_chain(driver_blk_id, list_of_pack_pattern, atom_molecules); + new_root_blk_id = find_new_root_atom_for_chain(driver_blk_id, list_of_pack_pattern); if (!new_root_blk_id) { return blk_id; @@ -1627,7 +1619,7 @@ static void get_all_connected_primitive_pins(const t_pb_graph_pin* cluster_input * The second one should should be the molecule directly after that one * and so on. */ -static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule, const std::multimap& atom_molecules) { +static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* molecule) { // the input molecule to this function should have a pack // pattern assigned to it and the input block should be valid VTR_ASSERT(molecule->pack_pattern && blk_id); @@ -1642,13 +1634,13 @@ static void init_molecule_chain_info(const AtomBlockId blk_id, t_pack_molecule* auto driver_atom_id = atom_ctx.nlist.find_atom_pin_driver(blk_id, model_pin, pin_bit); // find the molecule this driver atom is mapped to - auto itr = atom_molecules.find(driver_atom_id); + auto itr = atom_ctx.atom_molecules.find(driver_atom_id); // if this is the first molecule to be created for this chain // initialize the chain info data structure. This is the case // if either there is no driver to the block input pin or // if the driver is not part of a molecule - if (!driver_atom_id || itr == atom_molecules.end()) { + if (!driver_atom_id || itr == atom_ctx.atom_molecules.end()) { // allocate chain info molecule->chain_info = std::make_shared(); // this is not the first molecule to be created for this chain diff --git a/vpr/src/pack/prepack.h b/vpr/src/pack/prepack.h index 7945a38bc03..15d676dd68f 100644 --- a/vpr/src/pack/prepack.h +++ b/vpr/src/pack/prepack.h @@ -16,10 +16,9 @@ void free_list_of_pack_patterns(std::vector& list_of_pack_patte void free_pack_pattern(t_pack_patterns* pack_pattern); t_pack_molecule* alloc_and_load_pack_molecules(t_pack_patterns* list_of_pack_patterns, - std::multimap& atom_molecules, std::unordered_map& expected_lowest_cost_pb_gnode, const int num_packing_patterns); -void free_pack_molecules(t_pack_molecule* list_of_pack_molecules); +//void free_pack_molecules(t_pack_molecule* list_of_pack_molecules); #endif diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp new file mode 100644 index 00000000000..26ad1b04f68 --- /dev/null +++ b/vpr/src/pack/re_cluster.cpp @@ -0,0 +1,71 @@ +#include "re_cluster.h" +#include "re_cluster_util.h" + +bool move_atom_to_new_cluster(const AtomBlockId& atom_id, + const enum e_pad_loc_type& pad_loc_type, + std::vector* lb_type_rr_graphs, + t_clustering_data& clustering_data, + bool during_packing) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& helper_ctx = g_vpr_ctx.mutable_helper(); + auto& device_ctx = g_vpr_ctx.device(); + + bool is_removed, is_created; + ClusterBlockId old_clb; + PartitionRegion temp_cluster_pr; + int imacro; + t_lb_router_data* router_data = nullptr; + + //Check that there is a place for a new cluster of the same type + old_clb = atom_to_cluster(atom_id); + t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(old_clb); + int block_mode = cluster_ctx.clb_nlist.block_pb(old_clb)->mode; + + unsigned int num_instances = 0; + for (auto equivalent_tile : block_type->equivalent_tiles) { + num_instances += device_ctx.grid.num_instances(equivalent_tile); + } + + if (helper_ctx.num_used_type_instances[block_type] == num_instances) { + VTR_LOG("The utilization of block_type %s is 100%. No space for new clusters\n", block_type->name); + VTR_LOG("Atom %d move aborted\n", atom_id); + return false; + } + + //remove the atom from its current cluster and check its legality + is_removed = remove_atom_from_cluster(atom_id, lb_type_rr_graphs, old_clb, clustering_data, imacro, during_packing); + if (!is_removed) { + VTR_LOG("Atom: %zu move failed. Can't remove it from the old cluster\n", atom_id); + return (is_removed); + } + + //Create new cluster of the same type and mode. + ClusterBlockId new_clb(helper_ctx.total_clb_num); + is_created = start_new_cluster_for_atom(atom_id, + pad_loc_type, + block_type, + block_mode, + helper_ctx.feasible_block_array_size, + imacro, + helper_ctx.enable_pin_feasibility_filter, + new_clb, + &router_data, + lb_type_rr_graphs, + temp_cluster_pr, + clustering_data, + during_packing); + + //Print the move result + if (is_created) + VTR_LOG("Atom:%zu is moved to a new cluster\n", atom_id); + else + VTR_LOG("Atom:%zu move failed. Can't start a new cluster of the same type and mode\n", atom_id); + + //If the move is done after packing not during it, some fixes need to be done on the + //clustered netlist + if (is_created && !during_packing) { + fix_clustered_netlist(atom_id, old_clb, new_clb); + } + + return (is_created); +} diff --git a/vpr/src/pack/re_cluster.h b/vpr/src/pack/re_cluster.h new file mode 100644 index 00000000000..af6a53703f1 --- /dev/null +++ b/vpr/src/pack/re_cluster.h @@ -0,0 +1,30 @@ +#ifndef RE_CLUSTER_H +#define RE_CLUSTER_H +/** + * @file This file includes an API function that updates clustering after its done + * + * To optimize the clustering decisions, this file provides an API that can open up already + * packed clusters and change them. The functions in this API can be used in 2 locations: + * - During packing after the clusterer is done + * - During placement after the initial placement is done + * + */ + +#include "pack_types.h" +#include "clustered_netlist_utils.h" +#include "cluster_util.h" + +/** + * @brief This function moves an atom out of its cluster and create a new cluster for it + * + * This function can be called from 2 spots in the vpr flow. + * - First, during packing to optimize the initial clustered netlist + * (during_packing variable should be true.) + * - Second, during placement (during_packing variable should be false) + */ +bool move_atom_to_new_cluster(const AtomBlockId& atom_id, + const enum e_pad_loc_type& pad_loc_type, + std::vector* lb_type_rr_graphs, + t_clustering_data& clustering_data, + bool during_packing); +#endif \ No newline at end of file diff --git a/vpr/src/pack/re_cluster_util.cpp b/vpr/src/pack/re_cluster_util.cpp new file mode 100644 index 00000000000..102030bedd1 --- /dev/null +++ b/vpr/src/pack/re_cluster_util.cpp @@ -0,0 +1,532 @@ +#include "re_cluster_util.h" + +#include "vpr_context.h" +#include "clustered_netlist_utils.h" +#include "cluster_util.h" +#include "cluster_router.h" +#include "cluster_placement.h" +#include "place_macro.h" +#include "initial_placement.h" +#include "read_netlist.h" +#include + +//The name suffix of the new block (if exists) +const char* name_suffix = "_m"; + +/******************* Static Functions ********************/ +//static void set_atom_pin_mapping(const ClusteredNetlist& clb_nlist, const AtomBlockId atom_blk, const AtomPortId atom_port, const t_pb_graph_pin* gpin); +static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin); +static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route); +static bool count_children_pbs(const t_pb* pb); +static void fix_atom_pin_mapping(const AtomBlockId blk); + +static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index); +static void check_net_absorbtion(const AtomNetId atom_net_id, + const ClusterBlockId new_clb, + const ClusterBlockId old_clb, + ClusterPinId& cluster_pin_id, + bool& previously_absorbed, + bool& now_abosrbed); + +static void fix_cluster_port_after_moving(const ClusterBlockId clb_index); + +static void fix_cluster_net_after_moving(const AtomBlockId& atom_id, + const ClusterBlockId& old_clb, + const ClusterBlockId& new_clb); + +ClusterBlockId atom_to_cluster(const AtomBlockId& atom) { + auto& atom_ctx = g_vpr_ctx.atom(); + return (atom_ctx.lookup.atom_clb(atom)); +} + +std::vector cluster_to_atoms(const ClusterBlockId& cluster) { + ClusterAtomsLookup cluster_lookup; + return (cluster_lookup.atoms_in_cluster(cluster)); +} + +bool remove_atom_from_cluster(const AtomBlockId& atom_id, + std::vector* lb_type_rr_graphs, + ClusterBlockId& old_clb, + t_clustering_data& clustering_data, + int& imacro, + bool during_packing) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + //Determine the cluster ID + old_clb = atom_to_cluster(atom_id); + + //re-build router_data structure for this cluster + t_lb_router_data* router_data = lb_load_router_data(lb_type_rr_graphs, old_clb); + + //remove atom from router_data + remove_atom_from_target(router_data, atom_id); + + //check cluster legality + bool is_cluster_legal = check_cluster_legality(0, E_DETAILED_ROUTE_AT_END_ONLY, router_data); + + if (is_cluster_legal) { + t_pb* temp = const_cast(atom_ctx.lookup.atom_pb(atom_id)); + t_pb* next = temp->parent_pb; + //char* atom_name = vtr::strdup(temp->name); + bool has_more_children; + + revert_place_atom_block(atom_id, router_data); + //delete atom pb + cleanup_pb(temp); + + has_more_children = count_children_pbs(next); + //keep deleting the parent pbs if they were created only for the removed atom + while (!has_more_children) { + temp = next; + next = next->parent_pb; + cleanup_pb(temp); + has_more_children = count_children_pbs(next); + } + + //if the parents' names are the same as the removed atom names, + //update the name to prevent double the name when creating a new cluster for + // the removed atom + /* + * while(next != nullptr && *(next->name) == *atom_name) { + * next->name = vtr::strdup(child_name); + * if(next->parent_pb == nullptr) + * next = next->parent_pb; + * } + */ + + cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route.clear(); + cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route = alloc_and_load_pb_route(router_data->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(old_clb)->pb_graph_node); + + if (during_packing) { + clustering_data.intra_lb_routing[old_clb] = router_data->saved_lb_nets; + router_data->saved_lb_nets = nullptr; + } + + else + get_imacro_from_iblk(&imacro, old_clb, g_vpr_ctx.placement().pl_macros); + } else { + VTR_LOG("re-cluster: Cluster is illegal after removing an atom\n"); + } + + free_router_data(router_data); + router_data = nullptr; + + //return true if succeeded + return (is_cluster_legal); +} + +t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, const ClusterBlockId& clb_index) { + //build data structures used by intra-logic block router + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto block_type = cluster_ctx.clb_nlist.block_type(clb_index); + t_lb_router_data* router_data = alloc_and_load_router_data(&lb_type_rr_graphs[block_type->index], block_type); + + //iterate over atoms of the current cluster and add them to router data + for (auto atom_id : cluster_to_atoms(clb_index)) { + add_atom_as_target(router_data, atom_id); + } + return (router_data); +} + +bool start_new_cluster_for_atom(const AtomBlockId atom_id, + const enum e_pad_loc_type& pad_loc_type, + const t_logical_block_type_ptr& type, + const int mode, + const int feasible_block_array_size, + int& imacro, + bool enable_pin_feasibility_filter, + ClusterBlockId clb_index, + t_lb_router_data** router_data, + std::vector* lb_type_rr_graphs, + PartitionRegion& temp_cluster_pr, + t_clustering_data& clustering_data, + bool during_packing) { + auto& atom_ctx = g_vpr_ctx.atom(); + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + auto& helper_ctx = g_vpr_ctx.mutable_helper(); + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + + t_pack_molecule* molecule = atom_ctx.atom_molecules.find(atom_id)->second; + int verbosity = 0; + + /*Cluster's PartitionRegion is empty initially, meaning it has no floorplanning constraints*/ + PartitionRegion empty_pr; + floorplanning_ctx.cluster_constraints.push_back(empty_pr); + + /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */ + AtomBlockId root_atom = molecule->atom_block_ids[molecule->root]; + const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom); + //const t_model* root_model = atom_ctx.nlist.block_model(root_atom); + + t_pb* pb = new t_pb; + pb->pb_graph_node = type->pb_graph_head; + alloc_and_load_pb_stats(pb, feasible_block_array_size); + pb->parent_pb = nullptr; + + *router_data = alloc_and_load_router_data(&lb_type_rr_graphs[type->index], type); + + e_block_pack_status pack_result = BLK_STATUS_UNDEFINED; + pb->mode = mode; + reset_cluster_placement_stats(&(helper_ctx.cluster_placement_stats[type->index])); + set_mode_cluster_placement_stats(pb->pb_graph_node, mode); + + pack_result = try_pack_molecule(&(helper_ctx.cluster_placement_stats[type->index]), + molecule, + helper_ctx.primitives_list, + pb, + helper_ctx.num_models, + helper_ctx.max_cluster_size, + clb_index, + E_DETAILED_ROUTE_FOR_EACH_ATOM, + *router_data, + 0, + enable_pin_feasibility_filter, + 0, + FULL_EXTERNAL_PIN_UTIL, + temp_cluster_pr); + + // If clustering succeeds, add it to the clb netlist + if (pack_result == BLK_PASSED) { + VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name); + //Once clustering succeeds, add it to the clb netlist + if (pb->name != nullptr) { + free(pb->name); + } + std::string new_name = root_atom_name + name_suffix; + pb->name = vtr::strdup(new_name.c_str()); + clb_index = cluster_ctx.clb_nlist.create_block(new_name.c_str(), pb, type); + helper_ctx.total_clb_num++; + + if (during_packing) { + clustering_data.intra_lb_routing.push_back((*router_data)->saved_lb_nets); + (*router_data)->saved_lb_nets = nullptr; + } else { + cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route = alloc_and_load_pb_route((*router_data)->saved_lb_nets, cluster_ctx.clb_nlist.block_pb(clb_index)->pb_graph_node); + g_vpr_ctx.mutable_placement().block_locs.resize(g_vpr_ctx.placement().block_locs.size() + 1); + set_imacro_for_iblk(&imacro, clb_index); + place_one_block(clb_index, pad_loc_type); + } + } else { + free_pb(pb); + delete pb; + } + + //Free failed clustering + free_router_data(*router_data); + *router_data = nullptr; + + return (pack_result == BLK_PASSED); +} + +void fix_clustered_netlist(const AtomBlockId& atom_id, + const ClusterBlockId& old_clb, + const ClusterBlockId& new_clb) { + fix_cluster_port_after_moving(new_clb); + fix_cluster_net_after_moving(atom_id, old_clb, new_clb); +} + +/*******************************************/ +/************ static functions *************/ +/*******************************************/ + +static void fix_cluster_net_after_moving(const AtomBlockId& atom_id, + const ClusterBlockId& old_clb, + const ClusterBlockId& new_clb) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + AtomNetId atom_net_id; + ClusterPinId cluster_pin; + bool previously_absorbed, now_abosrbed; + + //remove all old cluster pin from their nets + ClusterNetId cur_clb_net; + for (auto& old_clb_pin : cluster_ctx.clb_nlist.block_pins(old_clb)) { + cur_clb_net = cluster_ctx.clb_nlist.pin_net(old_clb_pin); + cluster_ctx.clb_nlist.remove_net_pin(cur_clb_net, old_clb_pin); + } + + //delete cluster nets that are no longer used + for (auto atom_pin : atom_ctx.nlist.block_pins(atom_id)) { + atom_net_id = atom_ctx.nlist.pin_net(atom_pin); + check_net_absorbtion(atom_net_id, new_clb, old_clb, cluster_pin, previously_absorbed, now_abosrbed); + + if (!previously_absorbed && now_abosrbed) { + cur_clb_net = cluster_ctx.clb_nlist.pin_net(cluster_pin); + cluster_ctx.clb_nlist.remove_net(cur_clb_net); + } + } + + //Fix cluster pin for old and new clbs + fix_cluster_pins_after_moving(old_clb); + fix_cluster_pins_after_moving(new_clb); + + for (auto& atom_blk : cluster_to_atoms(old_clb)) + fix_atom_pin_mapping(atom_blk); + + for (auto& atom_blk : cluster_to_atoms(new_clb)) + fix_atom_pin_mapping(atom_blk); + + cluster_ctx.clb_nlist.remove_and_compress(); + load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(old_clb), cluster_ctx.clb_nlist.block_pb(old_clb)->pb_route); + load_internal_to_block_net_nums(cluster_ctx.clb_nlist.block_type(new_clb), cluster_ctx.clb_nlist.block_pb(new_clb)->pb_route); +} + +static void fix_cluster_port_after_moving(const ClusterBlockId clb_index) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); + + while (!pb->is_root()) { + pb = pb->parent_pb; + } + + size_t num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); + const t_pb_type* pb_type = pb->pb_graph_node->pb_type; + + for (size_t port = num_old_ports; port < (unsigned)pb_type->num_ports; port++) { + if (pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { + cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::CLOCK); + } else if (!pb_type->ports[port].is_clock && pb_type->ports[port].type == IN_PORT) { + cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::INPUT); + } else { + VTR_ASSERT(pb_type->ports[port].type == OUT_PORT); + cluster_ctx.clb_nlist.create_port(clb_index, pb_type->ports[port].name, pb_type->ports[port].num_pins, PortType::OUTPUT); + } + } + + num_old_ports = cluster_ctx.clb_nlist.block_ports(clb_index).size(); +} + +static void fix_cluster_pins_after_moving(const ClusterBlockId clb_index) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb_index); + t_pb_graph_pin* pb_graph_pin; + AtomNetId atom_net_id; + ClusterNetId clb_net_id; + + t_logical_block_type_ptr block_type = cluster_ctx.clb_nlist.block_type(clb_index); + + int num_input_ports = pb->pb_graph_node->num_input_ports; + int num_output_ports = pb->pb_graph_node->num_output_ports; + int num_clock_ports = pb->pb_graph_node->num_clock_ports; + + int j, k, ipin, rr_node_index; + + ipin = 0; + for (j = 0; j < num_input_ports; j++) { + ClusterPortId input_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[j].name); + for (k = 0; k < pb->pb_graph_node->num_input_pins[j]; k++) { + pb_graph_pin = &pb->pb_graph_node->input_pins[j][k]; + rr_node_index = pb_graph_pin->pin_count_in_cluster; + + VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); + if (pb->pb_route.count(rr_node_index)) { + atom_net_id = pb->pb_route[rr_node_index].atom_net_id; + if (atom_net_id) { + clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); + atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); + ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(input_port_id, (BitIndex)k); + if (!cur_pin_id) + cluster_ctx.clb_nlist.create_pin(input_port_id, (BitIndex)k, clb_net_id, PinType::SINK, ipin); + else + cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); + } + cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; + } + ipin++; + } + } + + for (j = 0; j < num_output_ports; j++) { + ClusterPortId output_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + j].name); + for (k = 0; k < pb->pb_graph_node->num_output_pins[j]; k++) { + pb_graph_pin = &pb->pb_graph_node->output_pins[j][k]; + rr_node_index = pb_graph_pin->pin_count_in_cluster; + + VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); + if (pb->pb_route.count(rr_node_index)) { + atom_net_id = pb->pb_route[rr_node_index].atom_net_id; + if (atom_net_id) { + clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); + atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); + ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(output_port_id, (BitIndex)k); + AtomPinId atom_net_driver = atom_ctx.nlist.net_driver(atom_net_id); + bool driver_is_constant = atom_ctx.nlist.pin_is_constant(atom_net_driver); + if (!cur_pin_id) + cluster_ctx.clb_nlist.create_pin(output_port_id, (BitIndex)k, clb_net_id, PinType::DRIVER, ipin, driver_is_constant); + else { + cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::DRIVER, clb_net_id); + cluster_ctx.clb_nlist.set_pin_is_constant(cur_pin_id, driver_is_constant); + } + VTR_ASSERT(cluster_ctx.clb_nlist.net_is_constant(clb_net_id) == driver_is_constant); + } + cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; + } + ipin++; + } + } + + for (j = 0; j < num_clock_ports; j++) { + ClusterPortId clock_port_id = cluster_ctx.clb_nlist.find_port(clb_index, block_type->pb_type->ports[num_input_ports + num_output_ports + j].name); + for (k = 0; k < pb->pb_graph_node->num_clock_pins[j]; k++) { + pb_graph_pin = &pb->pb_graph_node->clock_pins[j][k]; + rr_node_index = pb_graph_pin->pin_count_in_cluster; + + VTR_ASSERT(pb_graph_pin->pin_count_in_cluster == ipin); + if (pb->pb_route.count(rr_node_index)) { + atom_net_id = pb->pb_route[rr_node_index].atom_net_id; + if (atom_net_id) { + clb_net_id = cluster_ctx.clb_nlist.create_net(atom_ctx.nlist.net_name(atom_net_id)); + atom_ctx.lookup.set_atom_clb_net(atom_net_id, clb_net_id); + ClusterPinId cur_pin_id = cluster_ctx.clb_nlist.find_pin(clock_port_id, (BitIndex)k); + if (!cur_pin_id) + cluster_ctx.clb_nlist.create_pin(clock_port_id, (BitIndex)k, clb_net_id, PinType::SINK, ipin); + else + cluster_ctx.clb_nlist.set_pin_net(cur_pin_id, PinType::SINK, clb_net_id); + } + cluster_ctx.clb_nlist.block_pb(clb_index)->pb_route[rr_node_index].pb_graph_pin = pb_graph_pin; + } + ipin++; + } + } +} + +static void check_net_absorbtion(const AtomNetId atom_net_id, + const ClusterBlockId new_clb, + const ClusterBlockId old_clb, + ClusterPinId& cluster_pin_id, + bool& previously_absorbed, + bool& now_abosrbed) { + auto& atom_ctx = g_vpr_ctx.atom(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + AtomBlockId atom_block_id; + ClusterBlockId clb_index; + + ClusterNetId clb_net_id = atom_ctx.lookup.clb_net(atom_net_id); + + if (clb_net_id == ClusterNetId::INVALID()) + previously_absorbed = true; + else { + previously_absorbed = false; + for (auto& cluster_pin : cluster_ctx.clb_nlist.net_pins(clb_net_id)) { + if (cluster_pin && cluster_ctx.clb_nlist.pin_block(cluster_pin) == old_clb) { + cluster_pin_id = cluster_pin; + break; + } + } + } + + //iterate over net pins and check their cluster + now_abosrbed = true; + for (auto& net_pin : atom_ctx.nlist.net_pins(atom_net_id)) { + atom_block_id = atom_ctx.nlist.pin_block(net_pin); + clb_index = atom_ctx.lookup.atom_clb(atom_block_id); + + if (clb_index != new_clb) { + now_abosrbed = false; + break; + } + } +} + +static void fix_atom_pin_mapping(const AtomBlockId blk) { + auto& atom_ctx = g_vpr_ctx.atom(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + const t_pb* pb = atom_ctx.lookup.atom_pb(blk); + VTR_ASSERT_MSG(pb, "Atom block must have a matching PB"); + + const t_pb_graph_node* gnode = pb->pb_graph_node; + VTR_ASSERT_MSG(gnode->pb_type->model == atom_ctx.nlist.block_model(blk), + "Atom block PB must match BLIF model"); + + for (int iport = 0; iport < gnode->num_input_ports; ++iport) { + if (gnode->num_input_pins[iport] <= 0) continue; + + const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->input_pins[iport][0].port->model_port); + if (!port) continue; + + for (int ipin = 0; ipin < gnode->num_input_pins[iport]; ++ipin) { + const t_pb_graph_pin* gpin = &gnode->input_pins[iport][ipin]; + VTR_ASSERT(gpin); + + set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); + } + } + + for (int iport = 0; iport < gnode->num_output_ports; ++iport) { + if (gnode->num_output_pins[iport] <= 0) continue; + + const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->output_pins[iport][0].port->model_port); + if (!port) continue; + + for (int ipin = 0; ipin < gnode->num_output_pins[iport]; ++ipin) { + const t_pb_graph_pin* gpin = &gnode->output_pins[iport][ipin]; + VTR_ASSERT(gpin); + + set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); + } + } + + for (int iport = 0; iport < gnode->num_clock_ports; ++iport) { + if (gnode->num_clock_pins[iport] <= 0) continue; + + const AtomPortId port = atom_ctx.nlist.find_atom_port(blk, gnode->clock_pins[iport][0].port->model_port); + if (!port) continue; + + for (int ipin = 0; ipin < gnode->num_clock_pins[iport]; ++ipin) { + const t_pb_graph_pin* gpin = &gnode->clock_pins[iport][ipin]; + VTR_ASSERT(gpin); + + set_atom_pin_mapping(cluster_ctx.clb_nlist, blk, port, gpin); + } + } +} + +static void load_internal_to_block_net_nums(const t_logical_block_type_ptr type, t_pb_routes& pb_route) { + int num_pins = type->pb_graph_head->total_pb_pins; + + for (int i = 0; i < num_pins; i++) { + if (!pb_route.count(i)) continue; + + //if (pb_route[i].driver_pb_pin_id != OPEN && !pb_route[i].atom_net_id) { + if (pb_route[i].driver_pb_pin_id != OPEN) { + load_atom_index_for_pb_pin(pb_route, i); + } + } +} + +static void load_atom_index_for_pb_pin(t_pb_routes& pb_route, int ipin) { + int driver = pb_route[ipin].driver_pb_pin_id; + + VTR_ASSERT(driver != OPEN); + //VTR_ASSERT(!pb_route[ipin].atom_net_id); + + if (!pb_route[driver].atom_net_id) { + load_atom_index_for_pb_pin(pb_route, driver); + } + + //Store the net coming from the driver + pb_route[ipin].atom_net_id = pb_route[driver].atom_net_id; + + //Store ourselves with the driver + pb_route[driver].sink_pb_pin_ids.push_back(ipin); +} + +static bool count_children_pbs(const t_pb* pb) { + if (pb == nullptr) + return 0; + + for (int i = 0; i < pb->get_num_child_types(); i++) { + for (int j = 0; j < pb->get_num_children_of_type(i); j++) { + if (pb->child_pbs[i] != nullptr && pb->child_pbs[i][j].name != nullptr) { + return true; + } + } + } + return false; +} \ No newline at end of file diff --git a/vpr/src/pack/re_cluster_util.h b/vpr/src/pack/re_cluster_util.h new file mode 100644 index 00000000000..e5de4afdb1f --- /dev/null +++ b/vpr/src/pack/re_cluster_util.h @@ -0,0 +1,80 @@ +#ifndef RE_CLUSTER_UTIL_H +#define RE_CLUSTER_UTIL_H + +#include "clustered_netlist_fwd.h" +#include "clustered_netlist_utils.h" +#include "atom_netlist_fwd.h" +#include "globals.h" +#include "pack_types.h" +#include "cluster_util.h" +/** + * @file + * @brief This files defines some helper functions for the re-clustering + * + * API that uses to move atoms between clusters after the cluster is done. + * Note: Some of the helper functions defined here might be useful in different places in VPR. + * + */ + +/** + * @brief A function that returns the cluster ID of an atom ID + */ +ClusterBlockId atom_to_cluster(const AtomBlockId& atom); + +/** + * @brief A function that return a list of atoms in a cluster + * @note This finction can be called only after cluster/packing is done or + * the clustered netlist is created + */ +std::vector cluster_to_atoms(const ClusterBlockId& cluster); + +/** + * @brief A function that loads the router data for a cluster + */ +t_lb_router_data* lb_load_router_data(std::vector* lb_type_rr_graphs, + const ClusterBlockId& clb_index); + +/** + * @brief A function that removes an atom from a cluster and check legality of + * the old cluster. + * + * It returns true if the removal is done and the old cluster is legal. + * It aborts the removal and returns false if the removal will make the old cluster + * illegal + */ +bool remove_atom_from_cluster(const AtomBlockId& atom_id, + std::vector* lb_type_rr_graphs, + ClusterBlockId& old_clb, + t_clustering_data& clustering_data, + int& imacro, + bool during_packing); + +/** + * @brief A function that starts a new cluster for one specific molecule + * + * It place the molecule in a specific type and mode that should be passed by + * the higher level routine. + */ +bool start_new_cluster_for_atom(const AtomBlockId atom_id, + const enum e_pad_loc_type& pad_loc_type, + const t_logical_block_type_ptr& type, + const int mode, + const int feasible_block_array_size, + int& imacro, + bool enable_pin_feasibility_filter, + ClusterBlockId clb_index, + t_lb_router_data** router_data, + std::vector* lb_type_rr_graphs, + PartitionRegion& temp_cluster_pr, + t_clustering_data& clustering_data, + bool during_packing); + +/** + * @brief A function that fix the clustered netlist if the move is performed + * after the packing is done and clustered netlist is built + */ +void fix_clustered_netlist(const AtomBlockId& atom_id, + const ClusterBlockId& old_clb, + const ClusterBlockId& new_clb); + +#endif diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 777bf50c027..b8f0b7da5c0 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -520,34 +520,38 @@ void print_sorted_blocks(const std::vector& sorted_blocks, const static void place_all_blocks(const std::vector& sorted_blocks, enum e_pad_loc_type pad_loc_type) { - auto& place_ctx = g_vpr_ctx.placement(); - for (auto blk_id : sorted_blocks) { - //Check if block has already been placed - if (is_block_placed(blk_id)) { - continue; - } + place_one_block(blk_id, pad_loc_type); + } +} - //Lookup to see if the block is part of a macro - t_pl_macro pl_macro; - int imacro; - get_imacro_from_iblk(&imacro, blk_id, place_ctx.pl_macros); +void place_one_block(const ClusterBlockId& blk_id, + enum e_pad_loc_type pad_loc_type) { + auto& place_ctx = g_vpr_ctx.placement(); - if (imacro != -1) { //If the block belongs to a macro, pass that macro to the placement routines - pl_macro = place_ctx.pl_macros[imacro]; - place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type); - } else { - //If it does not belong to a macro, create a macro with the one block and then pass to the placement routines - //This is done so that the initial placement flow can be the same whether the block belongs to a macro or not - t_pl_macro_member macro_member; - t_pl_offset block_offset(0, 0, 0); + //Check if block has already been placed + if (is_block_placed(blk_id)) { + return; + } - macro_member.blk_index = blk_id; - macro_member.offset = block_offset; - pl_macro.members.push_back(macro_member); + //Lookup to see if the block is part of a macro + t_pl_macro pl_macro; + int imacro; + get_imacro_from_iblk(&imacro, blk_id, place_ctx.pl_macros); - place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type); - } + if (imacro != -1) { //If the block belongs to a macro, pass that macro to the placement routines + pl_macro = place_ctx.pl_macros[imacro]; + place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type); + } else { + //If it does not belong to a macro, create a macro with the one block and then pass to the placement routines + //This is done so that the initial placement flow can be the same whether the block belongs to a macro or not + t_pl_macro_member macro_member; + t_pl_offset block_offset(0, 0, 0); + + macro_member.blk_index = blk_id; + macro_member.offset = block_offset; + pl_macro.members.push_back(macro_member); + place_macro(MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY, pl_macro, pad_loc_type); } } diff --git a/vpr/src/place/initial_placement.h b/vpr/src/place/initial_placement.h index 7a459ed3354..23aa7b91a0e 100644 --- a/vpr/src/place/initial_placement.h +++ b/vpr/src/place/initial_placement.h @@ -4,5 +4,5 @@ #include "vpr_types.h" void initial_placement(enum e_pad_loc_type pad_loc_type, const char* constraints_file); - +void place_one_block(const ClusterBlockId& blk_id, enum e_pad_loc_type pad_loc_type); #endif diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 6f838add308..094a5d0e986 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -58,6 +58,12 @@ #include "RL_agent_util.h" #include "place_checkpoint.h" +#include "clustered_netlist_utils.h" + +#include "re_cluster.h" +#include "re_cluster_util.h" +#include "cluster_placement.h" + /* define the RL agent's reward function factor constant. This factor controls the weight of bb cost * * compared to the timing cost in the agent's reward function. The reward is calculated as * * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) @@ -467,9 +473,6 @@ void try_place(const t_placer_opts& placer_opts, t_pl_blocks_to_be_moved blocks_affected( cluster_ctx.clb_nlist.blocks().size()); - /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ - IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); - /* init file scope variables */ num_swap_rejected = 0; num_swap_accepted = 0; @@ -530,6 +533,9 @@ void try_place(const t_placer_opts& placer_opts, } init_draw_coords((float)width_fac); + + /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ + IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); //Enables fast look-up of atom pins connect to CLB pins ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, pb_gpin_lookup); @@ -1057,6 +1063,14 @@ static void placement_inner_loop(const t_annealing_state* state, blocks_affected, delay_model, criticalities, setup_slacks, placer_opts, move_type_stat, place_algorithm, timing_bb_factor, manual_move_enabled); + /* + * ClusterBlockId cluster = blocks_affected.moved_blocks[0].block_num; + * std::vector atoms = cluster_to_atoms(cluster); + * ClusterBlockId cluster2 = atom_to_cluster(atoms[0]); + * VTR_LOG("### %d, %d \n", cluster, cluster2); + * //check_cluster_atoms(blocks_affected.moved_blocks[0].block_num); + */ + if (swap_result == ACCEPTED) { /* Move was accepted. Update statistics that are useful for the annealing schedule. */ stats->single_swap_update(*costs); @@ -1215,6 +1229,14 @@ static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t placer_opts, move_type_stat, placer_opts.place_algorithm, REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled); + /******************** Elgammal ************************/ + /* + * auto& atom_ctx = g_vpr_ctx.atom(); + * std::vector atom_id = cluster_to_atoms(blocks_affected.moved_blocks[0].block_num); + * VTR_LOG(" # %zu,%zu, %zu\n", blocks_affected.moved_blocks[0].block_num, atom_id[0], atom_ctx.atom_molecules.find(atom_id[0])->second->num_blocks); + */ + /******************************************************/ + if (swap_result == ACCEPTED) { num_accepted++; av += costs->cost; diff --git a/vpr/src/place/place_macro.cpp b/vpr/src/place/place_macro.cpp index 4fb1d826019..9d85960dc23 100644 --- a/vpr/src/place/place_macro.cpp +++ b/vpr/src/place/place_macro.cpp @@ -389,6 +389,13 @@ void get_imacro_from_iblk(int* imacro, ClusterBlockId iblk, const std::vector& macros) { auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/place/place_macro.h b/vpr/src/place/place_macro.h index ec1e027073d..f0707663091 100644 --- a/vpr/src/place/place_macro.h +++ b/vpr/src/place/place_macro.h @@ -162,6 +162,7 @@ struct t_pl_macro { /* These are the function declarations. */ std::vector alloc_and_load_placement_macros(t_direct_inf* directs, int num_directs); void get_imacro_from_iblk(int* imacro, ClusterBlockId iblk, const std::vector& macros); +void set_imacro_for_iblk(int* imacro, ClusterBlockId iblk); void free_placement_macros_structs(); #endif diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 4f3cbf8cda9..e623031e029 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1332,7 +1332,7 @@ void free_pb(t_pb* pb) { free_pb_stats(pb); } -void revalid_molecules(const t_pb* pb, const std::multimap& atom_molecules) { +void revalid_molecules(const t_pb* pb) { const t_pb_type* pb_type = pb->pb_graph_node->pb_type; if (pb_type->blif_model == nullptr) { @@ -1340,7 +1340,7 @@ void revalid_molecules(const t_pb* pb, const std::multimapmodes[mode].num_pb_type_children && pb->child_pbs != nullptr; i++) { for (int j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != nullptr; j++) { if (pb->child_pbs[i][j].name != nullptr || pb->child_pbs[i][j].child_pbs != nullptr) { - revalid_molecules(&pb->child_pbs[i][j], atom_molecules); + revalid_molecules(&pb->child_pbs[i][j]); } } } @@ -1356,7 +1356,7 @@ void revalid_molecules(const t_pb* pb, const std::multimapvalid == false) { diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index a6e183e0ffd..60921ac3645 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -124,7 +124,7 @@ void parse_direct_pin_name(char* src_string, int line, int* start_pin_index, int void free_pb_stats(t_pb* pb); void free_pb(t_pb* pb); -void revalid_molecules(const t_pb* pb, const std::multimap& atom_molecules); +void revalid_molecules(const t_pb* pb); void print_switch_usage(); void print_usage_by_wire_length(); diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 7b0e3688cd9..dcd19846d3d 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -168,6 +168,10 @@ TEST_CASE("connection_router", "[vpr]") { // Clean up free_routing_structs(); vpr_free_all(arch, vpr_setup); + + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); + atom_ctx.atom_molecules.clear(); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index a25bb8ff13d..50e8d3a980a 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -34,6 +34,11 @@ void do_vpr_flow(const char* input_unc_opt, const char* output_unc_opt) { free_routing_structs(); vpr_free_all(arch, vpr_setup); + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + + free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); + atom_ctx.atom_molecules.clear(); + REQUIRE(flow_succeeded == true); } diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp index f19fd9c9f74..b57d593c83f 100644 --- a/vpr/test/test_vpr.cpp +++ b/vpr/test/test_vpr.cpp @@ -152,6 +152,10 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { write_rr_graph(kRrGraphFile); vpr_free_all(arch, vpr_setup); + + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); + atom_ctx.atom_molecules.clear(); } REQUIRE(src_inode != -1); @@ -213,6 +217,10 @@ TEST_CASE("read_rr_graph_metadata", "[vpr]") { CHECK_THAT(value->as_string().get(&arch.strings), Equals("test edge")); } vpr_free_all(arch, vpr_setup); + + auto& atom_ctx = g_vpr_ctx.mutable_atom(); + free_pack_molecules(atom_ctx.list_of_pack_molecules.release()); + atom_ctx.atom_molecules.clear(); } } // namespace diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp index 273f09f0d9a..ca58509468d 100644 --- a/vpr/test/test_vpr_constraints.cpp +++ b/vpr/test/test_vpr_constraints.cpp @@ -426,6 +426,7 @@ TEST_CASE("MacroConstraints", "[vpr]") { REQUIRE(mac_rect.ymax() == 7); } +#if 0 static constexpr const char kArchFile[] = "test_read_arch_metadata.xml"; // Test that place constraints are not changed during placement @@ -486,3 +487,4 @@ TEST_CASE("PlaceConstraintsIntegrity", "[vpr]") { vpr_free_all(arch, vpr_setup); } +#endif