From 797e96909e841db003614fa2b37ba298737ec34e Mon Sep 17 00:00:00 2001 From: yenjames Date: Wed, 9 Apr 2025 20:19:11 -0400 Subject: [PATCH 1/3] initial documentation --- vpr/src/base/vpr_types.h | 64 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index ddbcb59b08e..aa6370d8a69 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -706,7 +706,69 @@ enum e_stage_action { /** * @brief Options for packing * - * TODO: document each packing parameter + * @param circuit_file_name + * Path to technology mapped user circuit in BLIF format. + * @param output_file + * Path to packed user circuit in net format. + * @param global_clocks + * ALWAYS TRUE. (Default: True) + * @param timing_driven + * Whether or not to do timing driven clustering. (Default: on) + * @param cluster_seed_type + * Selection algorithm for selecting next seed. (Default: blend2 if + * timing_driven is on; max_inputs otherwise) + * @param inter_cluster_net_delay + * ALWAYS 1.0 (Default: 1.0) + * @param target_device_utilization + * Sets the target device utilization. (Default: 1.0) + * @param auto_compute_inter_cluster_net_delay + * ALWAYS TRUE + * @param allow_unrelated_clustering + * Allows primitives which have no attraction to the given cluster + * to be packed into it. (Default: auto) + * @param connection_driven + * Controls whether or not packing prioritizes the absorption of nets + * with fewer connections into a complex logic block over nets with + * more connections. (Default: on) + * @param pack_verbosity + * Controls how verbose clustering's output is. (Default: 2) + * @param enable_pin_feasibility_filter + * Counts the number of available pins in groups/classes of mutually + * connected pins within a cluster, then filters out candidate + * primitives/atoms/molecules for which the cluster has insufficient + * pins to route (without performing a full routing). (Default: on) + * @param balance_block_type_utilization + * If enabled, when a primitive can potentially be mapped to multiple + * block types the packer will pick the block type which (currently) + * has the lowest utilization. (Default: auto) + * @param target_external_pin_util + * Sets the external pin utilization target. (Default: auto) + * @param prioritize_transitive_connectivity + * Whether transitive connectivity is prioritized over high-fanout + * connectivity. (Default: on) + * @param feasible_block_array_size + * Max size of the priority queue for candidates that pass the early + * filter legality test, but not the more detailed routing test. + * (Default: 30) + * @param doPacking + * Run packing stage. + * @param device_layout + * Controls which device layout/floorplan is used from the + * architecture file. (Default: smallest device which satisfies the + * circuit's resource requirements) + * @param timing_update_type + * Controls how timing analysis updates are performed. (Default: auto) + * @param use_attraction_groups + * Whether attraction groups are used to pack primitives in the same + * floorplan region together. + * @param pack_num_moves + * The number of moves that can be tried in packing stage. + * (Default: 100000) + * @param pack_move_type + * The move type used in packing. (Default: semiDirectedSwap) + * @param load_flat_placement + * Whether to reconstruct a packing solution from a flat placement + * file. (Default: off; on if is on) */ struct t_packer_opts { std::string circuit_file_name; From f76236072c22eb3ebebd0afdd2e9eb79bf4aaa0a Mon Sep 17 00:00:00 2001 From: yenjames Date: Sun, 27 Apr 2025 13:49:24 -0400 Subject: [PATCH 2/3] Standardized and renamed packer alpha and beta variable. They are now referred to as timing_gain_weight and connection_gain_weight, used as a weight parameter during timing and connection driven clustering respectively. Removed global_clocks, use_attraction_groups, pack_num_moves, pack_move_type from packer. --- doc/src/vpr/command_line_usage.rst | 4 +-- vpr/src/base/SetupVPR.cpp | 10 ++----- vpr/src/base/ShowSetup.cpp | 5 ++-- vpr/src/base/read_options.cpp | 22 ++------------- vpr/src/base/read_options.h | 6 ++-- vpr/src/base/vpr_api.cpp | 3 +- vpr/src/base/vpr_types.h | 33 +++++++++------------- vpr/src/pack/cluster_util.cpp | 1 - vpr/src/pack/greedy_candidate_selector.cpp | 16 ++++------- vpr/src/pack/output_clustering.cpp | 20 ++++++------- vpr/src/pack/output_clustering.h | 4 +-- 11 files changed, 39 insertions(+), 85 deletions(-) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index f21ee85f1eb..95ff20ca1f8 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -569,7 +569,7 @@ For people not working on CAD, you can probably leave all the options to their d **Default**: ``auto`` -.. option:: --alpha_clustering +.. option:: --timing_gain_weight A parameter that weights the optimization of timing vs area. @@ -577,7 +577,7 @@ For people not working on CAD, you can probably leave all the options to their d **Default**: ``0.75`` -.. option:: --beta_clustering +.. option:: --connection_gain_weight A tradeoff parameter that controls the optimization of smaller net absorption vs. the optimization of signal sharing. diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index e645b35e538..83089d56628 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -573,15 +573,12 @@ void SetupPackerOpts(const t_options& Options, PackerOpts->doPacking = STAGE_DO; } - //TODO: document? - PackerOpts->global_clocks = true; /* DEFAULT */ - PackerOpts->allow_unrelated_clustering = Options.allow_unrelated_clustering; PackerOpts->connection_driven = Options.connection_driven_clustering; PackerOpts->timing_driven = Options.timing_driven_clustering; PackerOpts->cluster_seed_type = Options.cluster_seed_type; - PackerOpts->alpha = Options.alpha_clustering; - PackerOpts->beta = Options.beta_clustering; + PackerOpts->timing_gain_weight = Options.timing_gain_weight; + PackerOpts->connection_gain_weight = Options.connection_gain_weight; PackerOpts->pack_verbosity = Options.pack_verbosity; PackerOpts->enable_pin_feasibility_filter = Options.enable_clustering_pin_feasibility_filter; PackerOpts->balance_block_type_utilization = Options.balance_block_type_utilization; @@ -591,13 +588,10 @@ void SetupPackerOpts(const t_options& Options, PackerOpts->high_fanout_threshold = Options.pack_high_fanout_threshold; PackerOpts->transitive_fanout_threshold = Options.pack_transitive_fanout_threshold; PackerOpts->feasible_block_array_size = Options.pack_feasible_block_array_size; - PackerOpts->use_attraction_groups = Options.use_attraction_groups; PackerOpts->device_layout = Options.device_layout; PackerOpts->timing_update_type = Options.timing_update_type; - PackerOpts->pack_num_moves = Options.pack_num_moves; - PackerOpts->pack_move_type = Options.pack_move_type; } static void SetupNetlistOpts(const t_options& Options, t_netlist_opts& NetlistOpts) { diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index f21200e97ee..8b42655eda0 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -731,8 +731,8 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) { } else { VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown packer allow_unrelated_clustering\n"); } - VTR_LOG("PackerOpts.alpha_clustering: %f\n", PackerOpts.alpha); - VTR_LOG("PackerOpts.beta_clustering: %f\n", PackerOpts.beta); + VTR_LOG("PackerOpts.timing_gain_weight: %f\n", PackerOpts.timing_gain_weight); + VTR_LOG("PackerOpts.connection_gain_weight: %f\n", PackerOpts.connection_gain_weight); VTR_LOG("PackerOpts.cluster_seed_type: "); switch (PackerOpts.cluster_seed_type) { case e_cluster_seed::TIMING: @@ -757,7 +757,6 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) { VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown packer cluster_seed_type\n"); } VTR_LOG("PackerOpts.connection_driven: %s", (PackerOpts.connection_driven ? "true\n" : "false\n")); - VTR_LOG("PackerOpts.global_clocks: %s", (PackerOpts.global_clocks ? "true\n" : "false\n")); VTR_LOG("PackerOpts.timing_driven: %s", (PackerOpts.timing_driven ? "true\n" : "false\n")); VTR_LOG("PackerOpts.target_external_pin_util: %s", vtr::join(PackerOpts.target_external_pin_util, " ").c_str()); VTR_LOG("\n"); diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 4451dd720cd..14f17921ef6 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1972,14 +1972,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("auto") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.alpha_clustering, "--alpha_clustering") + pack_grp.add_argument(args.timing_gain_weight, "--timing_gain_weight") .help( "Parameter that weights the optimization of timing vs area. 0.0 focuses solely on" " area, 1.0 solely on timing.") .default_value("0.75") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.beta_clustering, "--beta_clustering") + pack_grp.add_argument(args.connection_gain_weight, "--connection_gain_weight") .help( "Parameter that weights the absorption of small nets vs signal sharing." " 0.0 focuses solely on sharing, 1.0 solely on small net absoprtion." @@ -2101,24 +2101,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("2") .show_in(argparse::ShowIn::HELP_ONLY); - pack_grp.add_argument(args.use_attraction_groups, "--use_attraction_groups") - .help("Whether attraction groups are used to make it easier to pack primitives in the same floorplan region together.") - .default_value("on") - .show_in(argparse::ShowIn::HELP_ONLY); - - pack_grp.add_argument(args.pack_num_moves, "--pack_num_moves") - .help( - "The number of moves that can be tried in packing stage") - .default_value("100000") - .show_in(argparse::ShowIn::HELP_ONLY); - - pack_grp.add_argument(args.pack_move_type, "--pack_move_type") - .help( - "The move type used in packing." - "The available values are: randomSwap, semiDirectedSwap, semiDirectedSameTypeSwap") - .default_value("semiDirectedSwap") - .show_in(argparse::ShowIn::HELP_ONLY); - auto& place_grp = parser.add_argument_group("placement options"); place_grp.add_argument(args.Seed, "--seed") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index a71ba63428a..73269e3a060 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -108,8 +108,8 @@ struct t_options { /* Clustering options */ argparse::ArgValue connection_driven_clustering; argparse::ArgValue allow_unrelated_clustering; - argparse::ArgValue alpha_clustering; - argparse::ArgValue beta_clustering; + argparse::ArgValue timing_gain_weight; + argparse::ArgValue connection_gain_weight; argparse::ArgValue timing_driven_clustering; argparse::ArgValue cluster_seed_type; argparse::ArgValue enable_clustering_pin_feasibility_filter; @@ -120,8 +120,6 @@ struct t_options { argparse::ArgValue pack_feasible_block_array_size; argparse::ArgValue> pack_high_fanout_threshold; argparse::ArgValue pack_verbosity; - argparse::ArgValue use_attraction_groups; - argparse::ArgValue pack_num_moves; argparse::ArgValue pack_move_type; /* Placement options */ argparse::ArgValue Seed; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 1e1ae176805..9ea9215748f 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -1379,8 +1379,7 @@ bool vpr_analysis_flow(const Netlist<>& net_list, } std::string post_routing_packing_output_file_name = vpr_setup.PackerOpts.output_file + ".post_routing"; - write_packing_results_to_xml(vpr_setup.PackerOpts.global_clocks, - Arch.architecture_id, + write_packing_results_to_xml(Arch.architecture_id, post_routing_packing_output_file_name.c_str()); } else { VTR_LOG_WARN("Synchronization between packing and routing results is not applied due to illegal circuit implementation\n"); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index aa6370d8a69..23aa9468583 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -710,19 +710,24 @@ enum e_stage_action { * Path to technology mapped user circuit in BLIF format. * @param output_file * Path to packed user circuit in net format. - * @param global_clocks - * ALWAYS TRUE. (Default: True) * @param timing_driven * Whether or not to do timing driven clustering. (Default: on) + * @param timing_gain_weight + * Controls the optimization of timing vs area in timing driven + * clustering. + * A value of 0 focuses only on area; 1 focuses only on timing. + * (Default: 0.75) + * @param connection_gain_weight + * Controls the optimization of smaller net absorption vs. signal + * sharing in connection driven clustering. + * A value of 0 focuses solely on signal sharing; a value of 1 + * focuses solely on absorbing smaller nets into a cluster. + * (Default: 0.9) * @param cluster_seed_type * Selection algorithm for selecting next seed. (Default: blend2 if * timing_driven is on; max_inputs otherwise) - * @param inter_cluster_net_delay - * ALWAYS 1.0 (Default: 1.0) * @param target_device_utilization * Sets the target device utilization. (Default: 1.0) - * @param auto_compute_inter_cluster_net_delay - * ALWAYS TRUE * @param allow_unrelated_clustering * Allows primitives which have no attraction to the given cluster * to be packed into it. (Default: auto) @@ -758,14 +763,6 @@ enum e_stage_action { * circuit's resource requirements) * @param timing_update_type * Controls how timing analysis updates are performed. (Default: auto) - * @param use_attraction_groups - * Whether attraction groups are used to pack primitives in the same - * floorplan region together. - * @param pack_num_moves - * The number of moves that can be tried in packing stage. - * (Default: 100000) - * @param pack_move_type - * The move type used in packing. (Default: semiDirectedSwap) * @param load_flat_placement * Whether to reconstruct a packing solution from a flat placement * file. (Default: off; on if is on) @@ -774,11 +771,10 @@ struct t_packer_opts { std::string circuit_file_name; std::string sdc_file_name; std::string output_file; - bool global_clocks; bool timing_driven; enum e_cluster_seed cluster_seed_type; - float alpha; - float beta; + float timing_gain_weight; + float connection_gain_weight; float target_device_utilization; e_unrelated_clustering allow_unrelated_clustering; bool connection_driven; @@ -793,9 +789,6 @@ struct t_packer_opts { e_stage_action doPacking; std::string device_layout; e_timing_update_type timing_update_type; - bool use_attraction_groups; - int pack_num_moves; - std::string pack_move_type; bool load_flat_placement = false; }; diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index dd307168a36..e4579e8cf7c 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -74,7 +74,6 @@ void check_and_output_clustering(ClusterLegalizer& cluster_legalizer, } output_clustering(&cluster_legalizer, - packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp index b202035ec59..607230b8002 100644 --- a/vpr/src/pack/greedy_candidate_selector.cpp +++ b/vpr/src/pack/greedy_candidate_selector.cpp @@ -296,7 +296,7 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( AtomNetId net_id = atom_netlist_.pin_net(pin_id); e_gain_update gain_flag = e_gain_update::NO_GAIN; - if (!is_clock_.count(net_id) || !packer_opts_.global_clocks) + if (!is_clock_.count(net_id)) gain_flag = e_gain_update::GAIN; mark_and_update_partial_gain(cluster_gain_stats, @@ -324,13 +324,9 @@ void GreedyCandidateSelector::update_cluster_gain_stats_candidate_success( for (AtomPinId pin_id : atom_netlist_.block_clock_pins(blk_id)) { AtomNetId net_id = atom_netlist_.pin_net(pin_id); - e_gain_update gain_flag = e_gain_update::GAIN; - if (packer_opts_.global_clocks) - gain_flag = e_gain_update::NO_GAIN; - mark_and_update_partial_gain(cluster_gain_stats, net_id, - gain_flag, + e_gain_update::NO_GAIN, blk_id, cluster_legalizer, high_fanout_net_threshold, @@ -620,9 +616,9 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s VTR_ASSERT(num_used_pins > 0); if (packer_opts_.connection_driven) { /*try to absorb as many connections as possible*/ - cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.beta) + cluster_gain_stats.gain[blk_id] = ((1 - packer_opts_.connection_gain_weight) * (float)cluster_gain_stats.sharing_gain[blk_id] - + packer_opts_.beta * (float)cluster_gain_stats.connection_gain[blk_id]) + + packer_opts_.connection_gain_weight * (float)cluster_gain_stats.connection_gain[blk_id]) / (num_used_pins); } else { cluster_gain_stats.gain[blk_id] = ((float)cluster_gain_stats.sharing_gain[blk_id]) @@ -631,9 +627,9 @@ void GreedyCandidateSelector::update_total_gain(ClusterGainStats& cluster_gain_s /* Add in timing driven cost into cost function */ if (packer_opts_.timing_driven) { - cluster_gain_stats.gain[blk_id] = packer_opts_.alpha + cluster_gain_stats.gain[blk_id] = packer_opts_.timing_gain_weight * cluster_gain_stats.timing_gain[blk_id] - + (1.0 - packer_opts_.alpha) * (float)cluster_gain_stats.gain[blk_id]; + + (1.0 - packer_opts_.timing_gain_weight) * (float)cluster_gain_stats.gain[blk_id]; } } } diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index 60a371aee46..e48b43a4188 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -640,7 +640,7 @@ static void clustering_xml_blocks_from_netlist(pugi::xml_node& block_node, /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ -void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { +void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer) { const DeviceContext& device_ctx = g_vpr_ctx.device(); const AtomNetlist& atom_nlist = g_vpr_ctx.atom().netlist(); @@ -689,17 +689,15 @@ void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_cloc block_node.append_child("inputs").text().set(vtr::join(inputs.begin(), inputs.end(), " ").c_str()); block_node.append_child("outputs").text().set(vtr::join(outputs.begin(), outputs.end(), " ").c_str()); - if (global_clocks) { - std::vector clocks; - for (auto net_id : atom_nlist.nets()) { - if (is_clock.count(net_id)) { - clocks.push_back(atom_nlist.net_name(net_id)); - } + std::vector clocks; + for (auto net_id : atom_nlist.nets()) { + if (is_clock.count(net_id)) { + clocks.push_back(atom_nlist.net_name(net_id)); } - - block_node.append_child("clocks").text().set(vtr::join(clocks.begin(), clocks.end(), " ").c_str()); } + block_node.append_child("clocks").text().set(vtr::join(clocks.begin(), clocks.end(), " ").c_str()); + if (skip_clustering == false) { if (from_legalizer) { VTR_ASSERT(cluster_legalizer_ptr != nullptr); @@ -724,15 +722,13 @@ void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, bool global_cloc * As such, this function is expected to be a standard API * which can be called anytime and anywhere after packing is finished. ********************************************************************/ -void write_packing_results_to_xml(const bool& global_clocks, - const std::string& architecture_id, +void write_packing_results_to_xml(const std::string& architecture_id, const char* out_fname) { std::unordered_set is_clock = alloc_and_load_is_clock(); // Since the cluster legalizer is not being used to output the clustering // (from_legalizer is false), passing in nullptr. output_clustering(nullptr, - global_clocks, is_clock, architecture_id, out_fname, diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 92d734248d1..c7537ee8c39 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -17,15 +17,13 @@ class ClusterLegalizer; /// clustered netlist. If from_legalizer is false, the clustered netlist currently /// in the global scope will be used. void output_clustering(ClusterLegalizer* cluster_legalizer_ptr, - bool global_clocks, const std::unordered_set& is_clock, const std::string& architecture_id, const char* out_fname, bool skip_clustering, bool from_legalizer); -void write_packing_results_to_xml(const bool& global_clocks, - const std::string& architecture_id, +void write_packing_results_to_xml(const std::string& architecture_id, const char* out_fname); #endif From 028f8a56623b567e028ad5f47ee2d6330bd4ea71 Mon Sep 17 00:00:00 2001 From: yenjames Date: Sat, 3 May 2025 10:39:40 -0400 Subject: [PATCH 3/3] Missed a removal in read_options.h. --- vpr/src/base/read_options.h | 1 - 1 file changed, 1 deletion(-) diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 73269e3a060..9dcbd446a6c 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -120,7 +120,6 @@ struct t_options { argparse::ArgValue pack_feasible_block_array_size; argparse::ArgValue> pack_high_fanout_threshold; argparse::ArgValue pack_verbosity; - argparse::ArgValue pack_move_type; /* Placement options */ argparse::ArgValue Seed; argparse::ArgValue ShowPlaceTiming;