Skip to content

Commit f3528a5

Browse files
[ClusterLegalizer] Updated Based on PR Comments
Added more documentation. Cleaned up one set which should have been a vector.
1 parent f33dde9 commit f3528a5

8 files changed

+369
-190
lines changed

vpr/src/base/vpr_api.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <cstring>
1616
#include <cmath>
1717

18+
#include "cluster_util.h"
1819
#include "vpr_context.h"
1920
#include "vtr_assert.h"
2021
#include "vtr_math.h"
@@ -616,7 +617,7 @@ bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) {
616617

617618
}
618619

619-
// Load cluster_constraints data structure here since loading pack file
620+
// Load cluster_constraints data structure.
620621
load_cluster_constraints();
621622

622623
/* Sanity check the resulting netlist */
@@ -708,11 +709,7 @@ void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) {
708709
vpr_setup.PackerOpts.pack_verbosity);
709710

710711
/* Load the mapping between clusters and their atoms */
711-
cluster_ctx.atoms_lookup.resize(cluster_ctx.clb_nlist.blocks().size());
712-
for (AtomBlockId atom_blk_id : atom_ctx.nlist.blocks()) {
713-
ClusterBlockId atom_cluster_blk_id = atom_ctx.lookup.atom_clb(atom_blk_id);
714-
cluster_ctx.atoms_lookup[atom_cluster_blk_id].insert(atom_blk_id);
715-
}
712+
init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist);
716713

717714
process_constant_nets(g_vpr_ctx.mutable_atom().nlist,
718715
atom_ctx.lookup,

vpr/src/pack/cluster_legalizer.cpp

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* @date September 2024
55
* @brief The implementation of the Cluster Legalizer class.
66
*
7-
* Most of the code in this file was original part of cluster_util.cpp and was
7+
* Most of the code in this file was originally part of cluster_util.cpp and was
88
* highly integrated with the clusterer in VPR. All code that was used for
99
* legalizing the clusters was moved into this file and all the functionality
1010
* was moved into the ClusterLegalizer class.
@@ -40,6 +40,8 @@
4040

4141
/*
4242
* @brief Gets the max cluster size that any logical block can have.
43+
*
44+
* This is the maximum number of primitives any cluster can contain.
4345
*/
4446
static size_t calc_max_cluster_size(const std::vector<t_logical_block_type>& logical_block_types) {
4547
size_t max_cluster_size = 0;
@@ -63,11 +65,6 @@ static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_siz
6365

6466
pb->pb_stats = new t_pb_stats;
6567

66-
/* If statement below is for speed. If nets are reasonably low-fanout, *
67-
* only a relatively small number of blocks will be marked, and updating *
68-
* only those atom block structures will be fastest. If almost all blocks *
69-
* have been touched it should be faster to just run through them all *
70-
* in order (less addressing and better cache locality). */
7168
pb->pb_stats->input_pins_used = std::vector<std::unordered_map<size_t, AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
7269
pb->pb_stats->output_pins_used = std::vector<std::unordered_map<size_t, AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
7370
pb->pb_stats->lookahead_input_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
@@ -304,10 +301,10 @@ static bool check_cluster_noc_group(AtomBlockId atom_blk_id,
304301
}
305302

306303
/**
307-
* This function takes the root block of a chain molecule and a proposed
308-
* placement primitive for this block. The function then checks if this
309-
* chain root block has a placement constraint (such as being driven from
310-
* outside the cluster) and returns the status of the placement accordingly.
304+
* @brief This function takes the root block of a chain molecule and a proposed
305+
* placement primitive for this block. The function then checks if this
306+
* chain root block has a placement constraint (such as being driven from
307+
* outside the cluster) and returns the status of the placement accordingly.
311308
*/
312309
static enum e_block_pack_status check_chain_root_placement_feasibility(const t_pb_graph_node* pb_graph_node,
313310
const t_pack_molecule* molecule,
@@ -368,7 +365,7 @@ static enum e_block_pack_status check_chain_root_placement_feasibility(const t_p
368365

369366
/*
370367
* @brief Check that the two atom blocks blk_id and sibling_blk_id (which should
371-
* both be memory slices) are feasible, in the sence that they have
368+
* both be memory slices) are feasible, in the sense that they have
372369
* precicely the same net connections (with the exception of nets in data
373370
* port classes).
374371
*
@@ -480,7 +477,7 @@ static bool primitive_feasible(const AtomBlockId blk_id, t_pb* cur_pb) {
480477
}
481478

482479
/**
483-
* Try place atom block into current primitive location
480+
* @brief Try to place atom block into current primitive location
484481
*/
485482
static enum e_block_pack_status
486483
try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node,
@@ -613,7 +610,10 @@ try_place_atom_block_rec(const t_pb_graph_node* pb_graph_node,
613610
return block_pack_status;
614611
}
615612

616-
/* Resets nets used at different pin classes for determining pin feasibility */
613+
/*
614+
* @brief Resets nets used at different pin classes for determining pin
615+
* feasibility.
616+
*/
617617
static void reset_lookahead_pins_used(t_pb* cur_pb) {
618618
const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type;
619619
if (cur_pb->pb_stats == nullptr) {
@@ -674,7 +674,7 @@ static int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin,
674674
}
675675

676676
/**
677-
* Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb
677+
* @brief Returns the pb_graph_pin of the atom pin defined by the driver_pin_id in the driver_pb
678678
*/
679679
static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const AtomPinId driver_pin_id) {
680680
const AtomContext& atom_ctx = g_vpr_ctx.atom();
@@ -701,12 +701,12 @@ static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const Atom
701701
}
702702

703703
/**
704-
* Given a pin and its assigned net, mark all pin classes that are affected.
705-
* Check if connecting this pin to it's driver pin or to all sink pins will
706-
* require leaving a pb_block starting from the parent pb_block of the
707-
* primitive till the root block (depth = 0). If leaving a pb_block is
708-
* required add this net to the pin class (to increment the number of used
709-
* pins from this class) that should be used to leave the pb_block.
704+
* @brief Given a pin and its assigned net, mark all pin classes that are affected.
705+
* Check if connecting this pin to it's driver pin or to all sink pins will
706+
* require leaving a pb_block starting from the parent pb_block of the
707+
* primitive till the root block (depth = 0). If leaving a pb_block is
708+
* required add this net to the pin class (to increment the number of used
709+
* pins from this class) that should be used to leave the pb_block.
710710
*/
711711
static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* pb_graph_pin,
712712
const t_pb* primitive_pb,
@@ -834,7 +834,9 @@ static void compute_and_mark_lookahead_pins_used_for_pin(const t_pb_graph_pin* p
834834
}
835835

836836

837-
/* Determine if pins of speculatively packed pb are legal */
837+
/*
838+
* @brief Determine if pins of speculatively packed pb are legal
839+
*/
838840
static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id,
839841
const vtr::vector_map<AtomBlockId, LegalizationClusterId>& atom_cluster) {
840842
const AtomContext& atom_ctx = g_vpr_ctx.atom();
@@ -851,7 +853,9 @@ static void compute_and_mark_lookahead_pins_used(const AtomBlockId blk_id,
851853
}
852854
}
853855

854-
/* Determine if speculatively packed cur_pb is pin feasible
856+
/*
857+
* @brief Determine if speculatively packed cur_pb is pin feasible
858+
*
855859
* Runtime is actually not that bad for this. It's worst case O(k^2) where k is the
856860
* number of pb_graph pins. Can use hash tables or make incremental if becomes an issue.
857861
*/
@@ -881,7 +885,10 @@ static void try_update_lookahead_pins_used(t_pb* cur_pb,
881885
}
882886
}
883887

884-
/* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */
888+
/*
889+
* @brief Check if the number of available inputs/outputs for a pin class is
890+
* sufficient for speculatively packed blocks.
891+
*/
885892
static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_pin_util) {
886893
const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type;
887894

@@ -943,11 +950,11 @@ static bool check_lookahead_pins_used(t_pb* cur_pb, t_ext_pin_util max_external_
943950
}
944951

945952
/**
946-
* This function takes a chain molecule, and the pb_graph_node that is chosen
947-
* for packing the molecule's root block. Using the given root_primitive, this
948-
* function will identify which chain id this molecule is being mapped to and
949-
* will update the chain id value inside the chain info data structure of this
950-
* molecule
953+
* @brief This function takes a chain molecule, and the pb_graph_node that is
954+
* chosen for packing the molecule's root block. Using the given
955+
* root_primitive, this function will identify which chain id this
956+
* molecule is being mapped to and will update the chain id value inside
957+
* the chain info data structure of this molecule.
951958
*/
952959
static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_pb_graph_node* root_primitive) {
953960
VTR_ASSERT(chain_molecule->chain_info->chain_id == -1 && chain_molecule->chain_info->is_long_chain);
@@ -969,7 +976,8 @@ static void update_molecule_chain_info(t_pack_molecule* chain_molecule, const t_
969976
VTR_ASSERT(false);
970977
}
971978

972-
/* Revert trial atom block iblock and free up memory space accordingly
979+
/*
980+
* @brief Revert trial atom block iblock and free up memory space accordingly.
973981
*/
974982
static void revert_place_atom_block(const AtomBlockId blk_id,
975983
t_lb_router_data* router_data,
@@ -1021,7 +1029,9 @@ static void revert_place_atom_block(const AtomBlockId blk_id,
10211029
mutable_atom_ctx.lookup.set_atom_pb(blk_id, nullptr);
10221030
}
10231031

1024-
/* Speculation successful, commit input/output pins used */
1032+
/*
1033+
* @brief Speculation successful, commit input/output pins used.
1034+
*/
10251035
static void commit_lookahead_pins_used(t_pb* cur_pb) {
10261036
const t_pb_type* pb_type = cur_pb->pb_graph_node->pb_type;
10271037

@@ -1055,7 +1065,7 @@ static void commit_lookahead_pins_used(t_pb* cur_pb) {
10551065
}
10561066

10571067
/**
1058-
* Cleans up a pb after unsuccessful molecule packing
1068+
* @brief Cleans up a pb after unsuccessful molecule packing
10591069
*
10601070
* Recursively frees pbs from a t_pb tree. The given root pb itself is not
10611071
* deleted.
@@ -1135,7 +1145,7 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul
11351145
VTR_ASSERT_DEBUG(cluster.pb != nullptr);
11361146
VTR_ASSERT_DEBUG(cluster.type != nullptr);
11371147

1138-
// TODO: Remove these global accesses.
1148+
// TODO: Remove these global accesses to the contexts.
11391149
// AtomContext used for:
11401150
// - printing verbose statements
11411151
// - Looking up the primitive pb
@@ -1349,7 +1359,7 @@ e_block_pack_status ClusterLegalizer::try_pack_molecule(t_pack_molecule* molecul
13491359
cluster.noc_grp_id = new_cluster_noc_grp_id;
13501360

13511361
// Insert the molecule into the cluster for bookkeeping.
1352-
cluster.molecules.insert(molecule);
1362+
cluster.molecules.push_back(molecule);
13531363

13541364
for (int i = 0; i < molecule_size; i++) {
13551365
AtomBlockId atom_blk_id = molecule->atom_block_ids[i];
@@ -1653,10 +1663,13 @@ ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist,
16531663
feasible_block_array_size_ = feasible_block_array_size;
16541664
log_verbosity_ = log_verbosity;
16551665
// Get the target external pin utilization
1656-
// NOTE: This is really silly, but this can potentially fail. If it does
1657-
// it is important that everything is allocated. If not, when it fails
1658-
// it will call the reset method when only parts of the class are
1659-
// allocated which may cause havoc...
1666+
// NOTE: This has to be initialized last due to the fact that VPR_FATA_ERROR
1667+
// may be called within the constructor of t_ext_pin_util_targets. If
1668+
// this occurs, an excpetion is thrown which will drain the stack. If
1669+
// the cluster legalizer object is stored on the stack, this can call
1670+
// the destructor prematurely (before certain structures are allocated).
1671+
// Therefore, this is created at the end, when the class is in a state
1672+
// where it can be destroyed.
16601673
target_external_pin_util_ = t_ext_pin_util_targets(target_external_pin_util_str);
16611674
}
16621675

0 commit comments

Comments
 (0)