diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp index b7845c4f564..87b53467150 100644 --- a/vpr/src/pack/greedy_clusterer.cpp +++ b/vpr/src/pack/greedy_clusterer.cpp @@ -144,8 +144,7 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, pre_cluster_timing_manager_); // Pick the first seed molecule. - PackMoleculeId seed_mol_id = seed_selector.get_next_seed(prepacker, - cluster_legalizer); + PackMoleculeId seed_mol_id = seed_selector.get_next_seed(cluster_legalizer); /**************************************************************** * Clustering @@ -213,8 +212,7 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer, cluster_legalizer); // Pick new seed. - seed_mol_id = seed_selector.get_next_seed(prepacker, - cluster_legalizer); + seed_mol_id = seed_selector.get_next_seed(cluster_legalizer); } // If this architecture has LE physical block, report its usage. diff --git a/vpr/src/pack/greedy_seed_selector.cpp b/vpr/src/pack/greedy_seed_selector.cpp index 9850500400a..9ce473cba1e 100644 --- a/vpr/src/pack/greedy_seed_selector.cpp +++ b/vpr/src/pack/greedy_seed_selector.cpp @@ -9,6 +9,8 @@ #include #include +#include +#include #include "PreClusterTimingManager.h" #include "atom_netlist.h" #include "cluster_legalizer.h" @@ -129,11 +131,12 @@ static inline float get_seed_gain(AtomBlockId blk_id, * criticalities. */ static inline void print_seed_gains(const char* fname, - const std::vector& seed_atoms, - const vtr::vector& atom_gain, + const std::vector& seed_mols, + const vtr::vector& molecule_gain, const vtr::vector& atom_criticality, const AtomNetlist& atom_netlist, - const LogicalModels& models) { + const LogicalModels& models, + const Prepacker& prepacker) { FILE* fp = vtr::fopen(fname, "w"); // For pretty formatting determine the maximum name length @@ -148,16 +151,18 @@ static inline void print_seed_gains(const char* fname, fprintf(fp, "%-*s %-*s %8s %8s\n", max_name_len, "atom_block_name", max_type_len, "atom_block_type", "gain", "criticality"); fprintf(fp, "\n"); - for (auto blk_id : seed_atoms) { - std::string name = atom_netlist.block_name(blk_id); - fprintf(fp, "%-*s ", max_name_len, name.c_str()); + for (auto mol_id : seed_mols) { + for (AtomBlockId blk_id : prepacker.get_molecule(mol_id).atom_block_ids) { + std::string name = atom_netlist.block_name(blk_id); + fprintf(fp, "%-*s ", max_name_len, name.c_str()); - std::string model_name = models.model_name(atom_netlist.block_model(blk_id)); - fprintf(fp, "%-*s ", max_type_len, model_name.c_str()); + std::string model_name = models.model_name(atom_netlist.block_model(blk_id)); + fprintf(fp, "%-*s ", max_type_len, model_name.c_str()); - fprintf(fp, "%*f ", std::max((int)strlen("gain"), 8), atom_gain[blk_id]); - fprintf(fp, "%*f ", std::max((int)strlen("criticality"), 8), atom_criticality[blk_id]); - fprintf(fp, "\n"); + fprintf(fp, "%*f ", std::max((int)strlen("gain"), 8), molecule_gain[mol_id]); + fprintf(fp, "%*f ", std::max((int)strlen("criticality"), 8), atom_criticality[blk_id]); + fprintf(fp, "\n"); + } } fclose(fp); @@ -169,8 +174,8 @@ GreedySeedSelector::GreedySeedSelector(const AtomNetlist& atom_netlist, const t_molecule_stats& max_molecule_stats, const LogicalModels& models, const PreClusterTimingManager& pre_cluster_timing_manager) - : seed_atoms_(atom_netlist.blocks().begin(), atom_netlist.blocks().end()) { - // Seed atoms list is initialized with all atoms in the atom netlist. + : seed_mols_(prepacker.molecules().begin(), prepacker.molecules().end()) { + // Seed molecule list is initialized with all molecule in the netlist. // Pre-compute the criticality of each atom // Default criticalities set to zero (e.g. if not timing driven) @@ -183,20 +188,31 @@ GreedySeedSelector::GreedySeedSelector(const AtomNetlist& atom_netlist, } } - // Maintain a lookup table of the seed gain for each atom. This will be - // used to sort the seed atoms. + // Maintain a lookup table of the seed gain for each molecule. This will be + // used to sort the seed molecules. // Initially all gains are zero. - vtr::vector atom_gains(atom_netlist.blocks().size(), 0.f); - - // Get the seed gain of each atom. - for (AtomBlockId blk_id : atom_netlist.blocks()) { - atom_gains[blk_id] = get_seed_gain(blk_id, - atom_netlist, - prepacker, - models, - seed_type, - max_molecule_stats, - atom_criticality); + vtr::vector molecule_gains(seed_mols_.size(), 0.f); + + // Get the seed gain of each molecule. + for (PackMoleculeId mol_id : seed_mols_) { + // Gain of each molecule is the maximum gain of its atoms + float mol_gain = std::numeric_limits::lowest(); + const std::vector& molecule_atoms = prepacker.get_molecule(mol_id).atom_block_ids; + for (AtomBlockId blk_id : molecule_atoms) { + // If the molecule does not fit the entire pack pattern, it's possible to have invalid block ids in the molecule_atoms vector + if (blk_id == AtomBlockId::INVALID()) { + continue; + } + float atom_gain = get_seed_gain(blk_id, + atom_netlist, + prepacker, + models, + seed_type, + max_molecule_stats, + atom_criticality); + mol_gain = std::max(mol_gain, atom_gain); + } + molecule_gains[mol_id] = mol_gain; } // Sort seeds in descending order of seed gain (i.e. highest seed gain first) @@ -207,47 +223,42 @@ GreedySeedSelector::GreedySeedSelector(const AtomNetlist& atom_netlist, // std::sort which does not specify how equal values are handled). Using a stable // sort ensures that regardless of the underlying sorting algorithm the same seed // order is produced regardless of compiler. - auto by_descending_gain = [&](const AtomBlockId lhs, const AtomBlockId rhs) { - return atom_gains[lhs] > atom_gains[rhs]; + auto by_descending_gain = [&](const PackMoleculeId lhs, const PackMoleculeId rhs) { + return molecule_gains[lhs] > molecule_gains[rhs]; }; - std::stable_sort(seed_atoms_.begin(), seed_atoms_.end(), by_descending_gain); + std::stable_sort(seed_mols_.begin(), seed_mols_.end(), by_descending_gain); // Print the seed gains if requested. if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES)) { print_seed_gains(getEchoFileName(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES), - seed_atoms_, atom_gains, atom_criticality, atom_netlist, models); + seed_mols_, molecule_gains, atom_criticality, atom_netlist, models, prepacker); } // Set the starting seed index (the index of the first molecule to propose). // The index of the first seed to propose is the first molecule in the - // seed atoms vector (i.e. the one with the highest seed gain). + // seed molecules vector (i.e. the one with the highest seed gain). seed_index_ = 0; } -PackMoleculeId GreedySeedSelector::get_next_seed(const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer) { - while (seed_index_ < seed_atoms_.size()) { - // Get the current seed atom at the seed index and increment the +PackMoleculeId GreedySeedSelector::get_next_seed(const ClusterLegalizer& cluster_legalizer) { + while (seed_index_ < seed_mols_.size()) { + // Get the current seed molecule at the seed index and increment the // seed index. // All previous seed indices have been either proposed already or // are already clustered. This process assumes that once an atom // is clustered it will never become unclustered. - AtomBlockId seed_blk_id = seed_atoms_[seed_index_++]; + PackMoleculeId seed_molecule_id = seed_mols_[seed_index_++]; - // If this atom has been clustered, it cannot be proposed as a seed. + // If this molecule has been clustered, it cannot be proposed as a seed. // Skip to the next seed. - if (cluster_legalizer.is_atom_clustered(seed_blk_id)) + if (cluster_legalizer.is_mol_clustered(seed_molecule_id)) { continue; - - // Get the molecule that contains this atom and return it as the - // next seed. - PackMoleculeId seed_molecule_id = prepacker.get_atom_molecule(seed_blk_id); - VTR_ASSERT(!cluster_legalizer.is_mol_clustered(seed_molecule_id)); + } return seed_molecule_id; } // If the previous loop does not return a molecule, it implies that all - // atoms have been clustered or have already been proposed as a seed. + // molecule have been clustered or have already been proposed as a seed. // Return nullptr to signify that there are no further seeds. return PackMoleculeId::INVALID(); } diff --git a/vpr/src/pack/greedy_seed_selector.h b/vpr/src/pack/greedy_seed_selector.h index 0207949bef1..7ddcd519938 100644 --- a/vpr/src/pack/greedy_seed_selector.h +++ b/vpr/src/pack/greedy_seed_selector.h @@ -68,32 +68,24 @@ class GreedySeedSelector { * This method assumes that once a molecule is clustered, it will never be * unclustered. * - * @param prepacker - * The prepacker object that stores the molecules. * @param cluster_legalizer * The cluster legalizer object that is used to create the * clusters. This is used to check if a molecule has already * been clustered or not. */ - PackMoleculeId get_next_seed(const Prepacker& prepacker, - const ClusterLegalizer& cluster_legalizer); + PackMoleculeId get_next_seed(const ClusterLegalizer& cluster_legalizer); - // TODO: Maybe create an update_seed_gains method to update the seed atoms + // TODO: Maybe create an update_seed_gains method to update the seed molecules // list using current clustering information. private: - /// @brief The index of the next seed to propose in the seed_atoms vector. + /// @brief The index of the next seed to propose in the seed_mols_ vector. /// This is set to 0 in the constructor and incremented as more seeds /// are proposed. size_t seed_index_; - /// @brief A list of seed atoms, sorted in decreasing order of gain. This + /// @brief A list of seed molecules, sorted in decreasing order of gain. This /// is computed in the constructor and is traversed when a new seed /// is being proposed. - // FIXME: This should really be seed molecules. It looks like the only - // reason it isn't is because of the atom criticality. May want to - // create the concept of molecule criticality. Currently, the max - // criticality of any block in the molecule is technically being - // used. - std::vector seed_atoms_; + std::vector seed_mols_; }; diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/config/golden_results.txt index e381a9de6fe..2733f9b2a28 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/config/golden_results.txt @@ -1,4 +1,4 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/sixteenth.xml 521.09 vpr 2.86 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-11925-ga544f5fea-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2025-01-14T21:35:49 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 2999364 42 35 119888 86875 1 51488 3609 129 96 12384 -1 neuron 1754.5 MiB 201.53 588950 3296458 1251917 1696829 347712 2929.1 MiB 120.52 0.99 7.81281 -83697.4 -6.81281 5.17837 0.07 0.394336 0.331354 48.5767 41.2186 -1 -1 -1 -1 -1 774816 15 0 0 2.28639e+08 18462.4 38.71 65.6047 56.4158 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/half_blocks_half.xml 448.82 vpr 2.85 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-11925-ga544f5fea-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2025-01-14T21:35:49 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 2992528 42 35 119888 86875 1 50882 3437 129 96 12384 -1 neuron 1746.7 MiB 106.69 578743 3192458 1206112 1829273 157073 2922.4 MiB 136.43 1.09 8.30532 -83283.1 -7.30532 5.88288 0.07 0.398109 0.334174 50.9205 42.9915 -1 -1 -1 -1 -1 751914 17 0 0 2.28639e+08 18462.4 38.69 68.6074 58.8539 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/one_big_partition.xml 453.84 vpr 2.86 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-11925-ga544f5fea-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2025-01-14T21:35:49 betzgrp-wintermute.eecg.utoronto.ca /home/elgamma8/research/release/vtr-verilog-to-routing 2994616 42 35 119888 86875 1 50816 3438 129 96 12384 -1 neuron 1748.6 MiB 106.43 587491 3064004 1147605 1894430 21969 2924.4 MiB 135.58 1.08 7.95276 -79743.8 -6.95276 5.25276 0.07 0.397692 0.334466 49.3461 41.6555 -1 -1 -1 -1 -1 765801 15 0 0 2.28639e+08 18462.4 38.72 65.6267 56.2745 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/sixteenth.xml 737.02 vpr 2.83 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-12661-g327cee3d6-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-05-13T19:21:08 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 2965812 42 35 119888 86875 1 51646 3595 129 96 12384 -1 neuron 1857.4 MiB 502.15 1.21569e+06 621456 3306955 1240097 1657055 409803 2896.3 MiB 98.34 0.82 8.7996 7.66945 -86149.9 -6.66945 5.25748 0.07 0.279323 0.245519 36.6534 30.8848 -1 -1 -1 -1 -1 808659 16 0 0 2.28639e+08 18462.4 33.21 49.8673 42.8255 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/half_blocks_half.xml 310.71 vpr 2.82 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-12661-g327cee3d6-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-05-13T19:21:08 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 2961588 42 35 119888 86875 1 50882 3437 129 96 12384 -1 neuron 1852.8 MiB 84.57 1.99271e+06 576222 3140604 1178993 1822059 139552 2892.2 MiB 94.33 0.73 12.1805 8.27534 -82556.8 -7.27534 5.60504 0.07 0.269525 0.236256 36.2376 30.2155 -1 -1 -1 -1 -1 753077 16 0 0 2.28639e+08 18462.4 30.32 49.257 41.9348 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +stratixiv_arch_neuron.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan/one_big_partition.xml 317.70 vpr 2.83 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success v8.0.0-12661-g327cee3d6-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-58-generic x86_64 2025-05-13T19:21:08 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 2964216 42 35 119888 86875 1 50816 3438 129 96 12384 -1 neuron 1854.8 MiB 83.40 2.11213e+06 571508 3038067 1110809 1910937 16321 2894.7 MiB 96.95 0.76 10.8298 8.23402 -80998.6 -7.23402 5.59616 0.16 0.287833 0.236889 35.8374 29.6476 -1 -1 -1 -1 -1 749232 16 0 0 2.28639e+08 18462.4 30.30 48.8531 41.3864 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1