@@ -72,7 +72,7 @@ using namespace std;
72
72
#define AAPACK_MAX_NET_SINKS_IGNORE 64 /* The packer looks at all sinks of a net when deciding what next candidate block to pack, for high-fanout nets, this is too runtime costly for marginal benefit, thus ignore those high fanout nets */
73
73
#define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE */
74
74
#define AAPACK_MAX_TRANSITIVE_FANOUT_EXPLORE 4 /* When investigating transitive fanout connections in packing, this is the highest fanout net that will be explored */
75
- #define AAPACK_MAX_TRANSITIVE_EXPLORE 4 /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less tahn AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE */
75
+ #define AAPACK_MAX_TRANSITIVE_EXPLORE 40 /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE */
76
76
77
77
// Constant allowing all cluster pins to be used
78
78
const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL (1 ., 1 .);
@@ -573,17 +573,18 @@ std::map<t_type_ptr,size_t> do_clustering(const t_packer_opts& packer_opts, cons
573
573
clb_inter_blk_nets,
574
574
clb_index, packer_opts.pack_verbosity );
575
575
continue ;
576
- } else {
577
- /* Continue packing by filling smallest cluster */
578
- if (verbosity > 2 ) {
579
- VTR_LOG (" \t PASSED: '%s' (%s)" , blk_name.c_str (), blk_model->name );
580
- VTR_LOGV (next_molecule->pack_pattern , " molecule %s molecule_size %zu" ,
581
- next_molecule->pack_pattern ->name , next_molecule->atom_block_ids .size ());
582
- VTR_LOG (" \n " );
583
- }
584
- VTR_LOGV (verbosity == 2 , " ." );
585
- fflush (stdout);
586
576
}
577
+
578
+ /* Continue packing by filling smallest cluster */
579
+ if (verbosity > 2 ) {
580
+ VTR_LOG (" \t PASSED: '%s' (%s)" , blk_name.c_str (), blk_model->name );
581
+ VTR_LOGV (next_molecule->pack_pattern , " molecule %s molecule_size %zu" ,
582
+ next_molecule->pack_pattern ->name , next_molecule->atom_block_ids .size ());
583
+ VTR_LOG (" \n " );
584
+ }
585
+ VTR_LOGV (verbosity == 2 , " ." );
586
+ fflush (stdout);
587
+
587
588
update_cluster_stats (next_molecule, clb_index,
588
589
is_clock, // Set of all clocks
589
590
is_clock, // Set of all global signals (currently clocks)
@@ -617,6 +618,7 @@ std::map<t_type_ptr,size_t> do_clustering(const t_packer_opts& packer_opts, cons
617
618
} else {
618
619
is_cluster_legal = true ;
619
620
}
621
+
620
622
if (is_cluster_legal) {
621
623
intra_lb_routing.push_back (router_data->saved_lb_nets );
622
624
VTR_ASSERT ((int )intra_lb_routing.size () == num_clb);
@@ -1077,13 +1079,13 @@ static void alloc_and_load_pb_stats(t_pb *pb) {
1077
1079
pb->pb_stats ->connectiongain .clear ();
1078
1080
pb->pb_stats ->sharinggain .clear ();
1079
1081
pb->pb_stats ->hillgain .clear ();
1082
+ pb->pb_stats ->transitive_fanout_candidates .clear ();
1080
1083
1081
1084
pb->pb_stats ->num_pins_of_net_in_pb .clear ();
1082
1085
1083
1086
pb->pb_stats ->num_child_blocks_in_pb = 0 ;
1084
1087
1085
1088
pb->pb_stats ->explore_transitive_fanout = true ;
1086
- pb->pb_stats ->transitive_fanout_candidates = nullptr ;
1087
1089
}
1088
1090
/* ****************************************/
1089
1091
@@ -1732,11 +1734,11 @@ static void update_cluster_stats( const t_pack_molecule *molecule,
1732
1734
cur_pb = atom_ctx.lookup .atom_pb (blk_id)->parent_pb ;
1733
1735
while (cur_pb) {
1734
1736
/* reset list of feasible blocks */
1735
- cur_pb->pb_stats ->num_feasible_blocks = NOT_VALID;
1736
- cur_pb->pb_stats ->num_child_blocks_in_pb ++;
1737
1737
if (cur_pb->parent_pb == nullptr ) {
1738
1738
cb = cur_pb;
1739
1739
}
1740
+ cur_pb->pb_stats ->num_feasible_blocks = NOT_VALID;
1741
+ cur_pb->pb_stats ->num_child_blocks_in_pb ++;
1740
1742
cur_pb = cur_pb->parent_pb ;
1741
1743
}
1742
1744
@@ -1789,6 +1791,10 @@ static void update_cluster_stats( const t_pack_molecule *molecule,
1789
1791
1790
1792
commit_lookahead_pins_used (cb);
1791
1793
}
1794
+
1795
+ // if this molecule came from the transitive fanout candidates remove it
1796
+ cb->pb_stats ->transitive_fanout_candidates .erase (molecule->atom_block_ids [molecule->root ]);
1797
+ cb->pb_stats ->explore_transitive_fanout = true ;
1792
1798
}
1793
1799
1794
1800
static void start_new_cluster (
@@ -1966,8 +1972,7 @@ static t_pack_molecule *get_highest_gain_molecule(
1966
1972
}
1967
1973
1968
1974
// 2. Find unpacked molecule based on transitive connections (eg. 2 hops away) with current cluster
1969
- if (cur_pb->pb_stats ->num_feasible_blocks == 0 &&
1970
- cur_pb->pb_stats ->explore_transitive_fanout == true ) {
1975
+ if (cur_pb->pb_stats ->num_feasible_blocks == 0 && cur_pb->pb_stats ->explore_transitive_fanout ) {
1971
1976
add_cluster_molecule_candidates_by_transitive_connectivity (cur_pb, cluster_placement_stats_ptr, atom_molecules, clb_inter_blk_nets, cluster_index);
1972
1977
}
1973
1978
@@ -1979,13 +1984,11 @@ static t_pack_molecule *get_highest_gain_molecule(
1979
1984
/* Grab highest gain molecule */
1980
1985
t_pack_molecule* molecule = nullptr ;
1981
1986
for (int j = 0 ; j < cur_pb->pb_stats ->num_feasible_blocks ; j++) {
1982
- if (cur_pb->pb_stats ->num_feasible_blocks != 0 ) {
1983
- cur_pb->pb_stats ->num_feasible_blocks --;
1984
- int index = cur_pb->pb_stats ->num_feasible_blocks ;
1985
- molecule = cur_pb->pb_stats ->feasible_blocks [index];
1986
- VTR_ASSERT (molecule->valid == true );
1987
- return molecule;
1988
- }
1987
+ cur_pb->pb_stats ->num_feasible_blocks --;
1988
+ int index = cur_pb->pb_stats ->num_feasible_blocks ;
1989
+ molecule = cur_pb->pb_stats ->feasible_blocks [index];
1990
+ VTR_ASSERT (molecule->valid == true );
1991
+ return molecule;
1989
1992
}
1990
1993
1991
1994
return molecule;
@@ -2094,42 +2097,35 @@ void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb,
2094
2097
2095
2098
auto & atom_ctx = g_vpr_ctx.atom ();
2096
2099
2097
- if (cur_pb->pb_stats ->transitive_fanout_candidates == nullptr ) {
2098
- /* First time finding transitive fanout candidates therefore alloc and load them */
2099
- cur_pb->pb_stats ->transitive_fanout_candidates = new vector<t_pack_molecule *>;
2100
- load_transitive_fanout_candidates (cluster_index,
2101
- atom_molecules,
2102
- cur_pb->pb_stats ,
2103
- clb_inter_blk_nets);
2104
-
2105
- /* Only consider candidates that pass a very simple legality check */
2106
- for (int i = 0 ; i < (int ) cur_pb->pb_stats ->transitive_fanout_candidates ->size (); i++) {
2107
- t_pack_molecule* molecule = (*cur_pb->pb_stats ->transitive_fanout_candidates )[i];
2108
- if (molecule->valid ) {
2109
- bool success = true ;
2110
- for (int j = 0 ; j < get_array_size_of_molecule (molecule); j++) {
2111
- if (molecule->atom_block_ids [j]) {
2112
- VTR_ASSERT (atom_ctx.lookup .atom_clb (molecule->atom_block_ids [j]) == ClusterBlockId::INVALID ());
2113
- auto blk_id = molecule->atom_block_ids [j];
2114
- if (!exists_free_primitive_for_atom_block (cluster_placement_stats_ptr, blk_id)) {
2115
- /* TODO: debating whether to check if placement exists for molecule (more
2116
- * robust) or individual atom blocks (faster) */
2117
- success = false ;
2118
- break ;
2119
- }
2100
+ cur_pb->pb_stats ->explore_transitive_fanout = false ;
2101
+
2102
+ /* First time finding transitive fanout candidates therefore alloc and load them */
2103
+ load_transitive_fanout_candidates (cluster_index,
2104
+ atom_molecules,
2105
+ cur_pb->pb_stats ,
2106
+ clb_inter_blk_nets);
2107
+ /* Only consider candidates that pass a very simple legality check */
2108
+ for (const auto & transitive_candidate : cur_pb->pb_stats ->transitive_fanout_candidates ) {
2109
+ t_pack_molecule* molecule = transitive_candidate.second ;
2110
+ if (molecule->valid ) {
2111
+ bool success = true ;
2112
+ for (int j = 0 ; j < get_array_size_of_molecule (molecule); j++) {
2113
+ if (molecule->atom_block_ids [j]) {
2114
+ VTR_ASSERT (atom_ctx.lookup .atom_clb (molecule->atom_block_ids [j]) == ClusterBlockId::INVALID ());
2115
+ auto blk_id = molecule->atom_block_ids [j];
2116
+ if (!exists_free_primitive_for_atom_block (cluster_placement_stats_ptr, blk_id)) {
2117
+ /* TODO: debating whether to check if placement exists for molecule (more
2118
+ * robust) or individual atom blocks (faster) */
2119
+ success = false ;
2120
+ break ;
2120
2121
}
2121
2122
}
2122
- if (success) {
2123
- add_molecule_to_pb_stats_candidates (molecule,
2124
- cur_pb-> pb_stats -> gain , cur_pb, min (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE,AAPACK_MAX_TRANSITIVE_EXPLORE));
2125
- }
2123
+ }
2124
+ if (success) {
2125
+ add_molecule_to_pb_stats_candidates (molecule,
2126
+ cur_pb-> pb_stats -> gain , cur_pb, min (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE, AAPACK_MAX_TRANSITIVE_EXPLORE));
2126
2127
}
2127
2128
}
2128
- } else {
2129
- /* Clean up, no more candidates in transitive fanout to consider */
2130
- delete cur_pb->pb_stats ->transitive_fanout_candidates ;
2131
- cur_pb->pb_stats ->transitive_fanout_candidates = nullptr ;
2132
- cur_pb->pb_stats ->explore_transitive_fanout = false ;
2133
2129
}
2134
2130
}
2135
2131
@@ -2149,11 +2145,11 @@ static t_pack_molecule *get_molecule_for_cluster(
2149
2145
* passed in. If no suitable block is found it returns ClusterBlockId::INVALID().
2150
2146
*/
2151
2147
2152
- t_pack_molecule *best_molecule ;
2148
+ VTR_ASSERT (!cur_pb-> parent_pb ) ;
2153
2149
2154
2150
/* If cannot pack into primitive, try packing into cluster */
2155
2151
2156
- best_molecule = get_highest_gain_molecule (cur_pb, atom_molecules,
2152
+ auto best_molecule = get_highest_gain_molecule (cur_pb, atom_molecules,
2157
2153
NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, cluster_index);
2158
2154
2159
2155
/* If no blocks have any gain to the current cluster, the code above *
@@ -3009,26 +3005,38 @@ static void commit_lookahead_pins_used(t_pb *cur_pb) {
3009
3005
}
3010
3006
}
3011
3007
3012
- /* Score unclustered atoms that are two hops away from current cluster */
3008
+ /* *
3009
+ * Score unclustered atoms that are two hops away from current cluster
3010
+ * For example, consider a cluster that has a FF feeding an adder in another
3011
+ * cluster. Since this FF is feeding an adder that is packed in another cluster
3012
+ * this function should find other FFs that are feeding other inputs of this adder
3013
+ * since they are two hops away from the FF packed in this cluster
3014
+ */
3013
3015
static void load_transitive_fanout_candidates (ClusterBlockId clb_index,
3014
3016
const std::multimap<AtomBlockId,t_pack_molecule*>& atom_molecules,
3015
3017
t_pb_stats *pb_stats,
3016
3018
vtr::vector<ClusterBlockId,std::vector<AtomNetId>> &clb_inter_blk_nets) {
3017
3019
auto & atom_ctx = g_vpr_ctx.atom ();
3018
3020
3021
+ // iterate over all the nets that have pins in this cluster
3019
3022
for (const auto net_id : pb_stats->marked_nets ) {
3023
+ // only consider small nets to constrain runtime
3020
3024
if (atom_ctx.nlist .net_pins (net_id).size () < AAPACK_MAX_TRANSITIVE_FANOUT_EXPLORE + 1 ) {
3025
+ // iterate over all the pins of the net
3021
3026
for (const auto pin_id : atom_ctx.nlist .net_pins (net_id)) {
3022
3027
AtomBlockId atom_blk_id = atom_ctx.nlist .pin_block (pin_id);
3023
- ClusterBlockId tclb = atom_ctx.lookup .atom_clb (atom_blk_id); // The transitive CLB
3028
+ // get the transitive cluster
3029
+ ClusterBlockId tclb = atom_ctx.lookup .atom_clb (atom_blk_id);
3030
+ // if the block connected to this pin is packed in another cluster
3024
3031
if (tclb != clb_index && tclb != ClusterBlockId::INVALID ()) {
3025
- /* Explore transitive connections from already packed cluster */
3032
+ // explore transitive nets from already packed cluster
3026
3033
for (AtomNetId tnet : clb_inter_blk_nets[tclb]) {
3034
+ // iterate over all the pins of the net
3027
3035
for (AtomPinId tpin : atom_ctx.nlist .net_pins (tnet)) {
3028
3036
auto blk_id = atom_ctx.nlist .pin_block (tpin);
3037
+ // This transitive atom is not packed, score and add
3029
3038
if (atom_ctx.lookup .atom_clb (blk_id) == ClusterBlockId::INVALID ()) {
3030
- /* This transitive atom is not packed, score and add */
3031
- std::vector<t_pack_molecule *> &transitive_fanout_candidates = *(pb_stats->transitive_fanout_candidates );
3039
+ auto & transitive_fanout_candidates = pb_stats->transitive_fanout_candidates ;
3032
3040
3033
3041
if (pb_stats->gain .count (blk_id) == 0 ) {
3034
3042
pb_stats->gain [blk_id] = 0.001 ;
@@ -3039,19 +3047,7 @@ static void load_transitive_fanout_candidates(ClusterBlockId clb_index,
3039
3047
for (const auto & kv : vtr::make_range (rng.first , rng.second )) {
3040
3048
t_pack_molecule* molecule = kv.second ;
3041
3049
if (molecule->valid ) {
3042
- unsigned int imol = 0 ;
3043
-
3044
- /* The number of potential molecules is heavily bounded so
3045
- * this O(N) operation should be safe since N is small */
3046
- for (imol = 0 ; imol < transitive_fanout_candidates.size (); imol++) {
3047
- if (molecule == transitive_fanout_candidates[imol]) {
3048
- break ;
3049
- }
3050
- }
3051
- if (imol == transitive_fanout_candidates.size ()) {
3052
- /* not in candidate list, add to list */
3053
- transitive_fanout_candidates.push_back (molecule);
3054
- }
3050
+ transitive_fanout_candidates.insert ({molecule->atom_block_ids [molecule->root ], molecule});
3055
3051
}
3056
3052
}
3057
3053
}
0 commit comments