Skip to content

Commit bae983f

Browse files
authored
Merge pull request #1886 from verilog-to-routing/improve_clustering_gain_updates
Update cluster gains once per cluster
2 parents 2953937 + 5f8b499 commit bae983f

File tree

1 file changed

+104
-92
lines changed

1 file changed

+104
-92
lines changed

vpr/src/pack/cluster.cpp

Lines changed: 104 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ static bool pb_used_for_blif_model(const t_pb* pb, std::string blif_model_name);
388388

389389
static void print_le_count(std::vector<int>& le_count, const t_pb_type* le_pb_type);
390390

391+
static t_pb* get_top_level_pb(t_pb* pb);
392+
391393
/*****************************************/
392394
/*globally accessible function*/
393395
std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& packer_opts,
@@ -1978,6 +1980,7 @@ static void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_upd
19781980

19791981
auto& atom_ctx = g_vpr_ctx.atom();
19801982
t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb;
1983+
cur_pb = get_top_level_pb(cur_pb);
19811984

19821985
if (int(atom_ctx.nlist.net_sinks(net_id).size()) > high_fanout_net_threshold) {
19831986
/* Optimization: It can be too runtime costly for marking all sinks for
@@ -1986,9 +1989,6 @@ static void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_upd
19861989
if (!is_global.count(net_id)) {
19871990
/* If no low/medium fanout nets, we may need to consider
19881991
* high fan-out nets for packing, so select one and store it */
1989-
while (cur_pb->parent_pb != nullptr) {
1990-
cur_pb = cur_pb->parent_pb;
1991-
}
19921992
AtomNetId stored_net = cur_pb->pb_stats->tie_break_high_fanout_net;
19931993
if (!stored_net || atom_ctx.nlist.net_sinks(net_id).size() < atom_ctx.nlist.net_sinks(stored_net).size()) {
19941994
cur_pb->pb_stats->tie_break_high_fanout_net = net_id;
@@ -1997,59 +1997,56 @@ static void mark_and_update_partial_gain(const AtomNetId net_id, enum e_gain_upd
19971997
return;
19981998
}
19991999

2000-
while (cur_pb) {
2001-
/* Mark atom net as being visited, if necessary. */
2000+
/* Mark atom net as being visited, if necessary. */
20022001

2003-
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2004-
cur_pb->pb_stats->marked_nets.push_back(net_id);
2005-
}
2002+
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2003+
cur_pb->pb_stats->marked_nets.push_back(net_id);
2004+
}
20062005

2007-
/* Update gains of affected blocks. */
2006+
/* Update gains of affected blocks. */
20082007

2009-
if (gain_flag == GAIN) {
2010-
/* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input)
2011-
* If so, avoid double counting by skipping the first (driving) pin. */
2008+
if (gain_flag == GAIN) {
2009+
/* Check if this net is connected to it's driver block multiple times (i.e. as both an output and input)
2010+
* If so, avoid double counting by skipping the first (driving) pin. */
20122011

2013-
auto pins = atom_ctx.nlist.net_pins(net_id);
2014-
if (net_output_feeds_driving_block_input[net_id] != 0)
2015-
//We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2
2016-
//(i.e. the net loops back to the block only once)
2017-
pins = atom_ctx.nlist.net_sinks(net_id);
2012+
auto pins = atom_ctx.nlist.net_pins(net_id);
2013+
if (net_output_feeds_driving_block_input[net_id] != 0)
2014+
//We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2
2015+
//(i.e. the net loops back to the block only once)
2016+
pins = atom_ctx.nlist.net_sinks(net_id);
20182017

2019-
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2020-
for (auto pin_id : pins) {
2021-
auto blk_id = atom_ctx.nlist.pin_block(pin_id);
2022-
if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) {
2023-
if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) {
2024-
cur_pb->pb_stats->marked_blocks.push_back(blk_id);
2025-
cur_pb->pb_stats->sharinggain[blk_id] = 1;
2026-
cur_pb->pb_stats->hillgain[blk_id] = 1 - num_ext_inputs_atom_block(blk_id);
2027-
} else {
2028-
cur_pb->pb_stats->sharinggain[blk_id]++;
2029-
cur_pb->pb_stats->hillgain[blk_id]++;
2030-
}
2018+
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2019+
for (auto pin_id : pins) {
2020+
auto blk_id = atom_ctx.nlist.pin_block(pin_id);
2021+
if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) {
2022+
if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) {
2023+
cur_pb->pb_stats->marked_blocks.push_back(blk_id);
2024+
cur_pb->pb_stats->sharinggain[blk_id] = 1;
2025+
cur_pb->pb_stats->hillgain[blk_id] = 1 - num_ext_inputs_atom_block(blk_id);
2026+
} else {
2027+
cur_pb->pb_stats->sharinggain[blk_id]++;
2028+
cur_pb->pb_stats->hillgain[blk_id]++;
20312029
}
20322030
}
20332031
}
2032+
}
20342033

2035-
if (connection_driven) {
2036-
update_connection_gain_values(net_id, clustered_blk_id, cur_pb,
2037-
net_relation_to_clustered_block);
2038-
}
2039-
2040-
if (timing_driven) {
2041-
update_timing_gain_values(net_id, cur_pb,
2042-
net_relation_to_clustered_block,
2043-
timing_info,
2044-
is_global);
2045-
}
2034+
if (connection_driven) {
2035+
update_connection_gain_values(net_id, clustered_blk_id, cur_pb,
2036+
net_relation_to_clustered_block);
20462037
}
2047-
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2048-
cur_pb->pb_stats->num_pins_of_net_in_pb[net_id] = 0;
2038+
2039+
if (timing_driven) {
2040+
update_timing_gain_values(net_id, cur_pb,
2041+
net_relation_to_clustered_block,
2042+
timing_info,
2043+
is_global);
20492044
}
2050-
cur_pb->pb_stats->num_pins_of_net_in_pb[net_id]++;
2051-
cur_pb = cur_pb->parent_pb;
20522045
}
2046+
if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(net_id) == 0) {
2047+
cur_pb->pb_stats->num_pins_of_net_in_pb[net_id] = 0;
2048+
}
2049+
cur_pb->pb_stats->num_pins_of_net_in_pb[net_id]++;
20532050
}
20542051

20552052
/*****************************************/
@@ -2060,57 +2057,54 @@ static void update_total_gain(float alpha, float beta, bool timing_driven, bool
20602057
auto& atom_ctx = g_vpr_ctx.atom();
20612058
t_pb* cur_pb = pb;
20622059

2063-
AttractGroupId cluster_att_grp_id = cur_pb->pb_stats->attraction_grp_id;
2060+
cur_pb = get_top_level_pb(cur_pb);
2061+
AttractGroupId cluster_att_grp_id;
20642062

2065-
while (cur_pb) {
2066-
for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) {
2067-
//Initialize connectiongain and sharinggain if
2068-
//they have not previously been updated for the block
2069-
if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) {
2070-
cur_pb->pb_stats->connectiongain[blk_id] = 0;
2071-
}
2072-
if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) {
2073-
cur_pb->pb_stats->sharinggain[blk_id] = 0;
2074-
}
2075-
2076-
/* Todo: Right now we update the gain multiple times for each block.
2077-
* Eventually want to move this out of the while loop and only update it
2078-
* for the top-level block in each cluster.*/
2079-
AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id);
2080-
if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) {
2081-
//increase gain of atom based on attraction group gain
2082-
float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id);
2083-
cur_pb->pb_stats->gain[blk_id] += att_grp_gain;
2084-
}
2085-
2086-
/* Todo: This was used to explore different normalization options, can
2087-
* be made more efficient once we decide on which one to use*/
2088-
int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size();
2089-
int num_used_output_pins = atom_ctx.nlist.block_output_pins(blk_id).size();
2090-
/* end todo */
2091-
2092-
/* Calculate area-only cost function */
2093-
int num_used_pins = num_used_input_pins + num_used_output_pins;
2094-
VTR_ASSERT(num_used_pins > 0);
2095-
if (connection_driven) {
2096-
/*try to absorb as many connections as possible*/
2097-
cur_pb->pb_stats->gain[blk_id] = ((1 - beta)
2098-
* (float)cur_pb->pb_stats->sharinggain[blk_id]
2099-
+ beta * (float)cur_pb->pb_stats->connectiongain[blk_id])
2100-
/ (num_used_pins);
2101-
} else {
2102-
cur_pb->pb_stats->gain[blk_id] = ((float)cur_pb->pb_stats->sharinggain[blk_id])
2103-
/ (num_used_pins);
2104-
}
2063+
cluster_att_grp_id = cur_pb->pb_stats->attraction_grp_id;
21052064

2106-
/* Add in timing driven cost into cost function */
2107-
if (timing_driven) {
2108-
cur_pb->pb_stats->gain[blk_id] = alpha
2109-
* cur_pb->pb_stats->timinggain[blk_id]
2110-
+ (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id];
2111-
}
2065+
for (AtomBlockId blk_id : cur_pb->pb_stats->marked_blocks) {
2066+
//Initialize connectiongain and sharinggain if
2067+
//they have not previously been updated for the block
2068+
if (cur_pb->pb_stats->connectiongain.count(blk_id) == 0) {
2069+
cur_pb->pb_stats->connectiongain[blk_id] = 0;
2070+
}
2071+
if (cur_pb->pb_stats->sharinggain.count(blk_id) == 0) {
2072+
cur_pb->pb_stats->sharinggain[blk_id] = 0;
2073+
}
2074+
2075+
AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id);
2076+
if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) {
2077+
//increase gain of atom based on attraction group gain
2078+
float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id);
2079+
cur_pb->pb_stats->gain[blk_id] += att_grp_gain;
2080+
}
2081+
2082+
/* Todo: This was used to explore different normalization options, can
2083+
* be made more efficient once we decide on which one to use*/
2084+
int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size();
2085+
int num_used_output_pins = atom_ctx.nlist.block_output_pins(blk_id).size();
2086+
/* end todo */
2087+
2088+
/* Calculate area-only cost function */
2089+
int num_used_pins = num_used_input_pins + num_used_output_pins;
2090+
VTR_ASSERT(num_used_pins > 0);
2091+
if (connection_driven) {
2092+
/*try to absorb as many connections as possible*/
2093+
cur_pb->pb_stats->gain[blk_id] = ((1 - beta)
2094+
* (float)cur_pb->pb_stats->sharinggain[blk_id]
2095+
+ beta * (float)cur_pb->pb_stats->connectiongain[blk_id])
2096+
/ (num_used_pins);
2097+
} else {
2098+
cur_pb->pb_stats->gain[blk_id] = ((float)cur_pb->pb_stats->sharinggain[blk_id])
2099+
/ (num_used_pins);
2100+
}
2101+
2102+
/* Add in timing driven cost into cost function */
2103+
if (timing_driven) {
2104+
cur_pb->pb_stats->gain[blk_id] = alpha
2105+
* cur_pb->pb_stats->timinggain[blk_id]
2106+
+ (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id];
21122107
}
2113-
cur_pb = cur_pb->parent_pb;
21142108
}
21152109
}
21162110

@@ -3957,3 +3951,21 @@ static void print_le_count(std::vector<int>& le_count, const t_pb_type* le_pb_ty
39573951
VTR_LOG(" LEs used for logic only : %d\n", le_count[1]);
39583952
VTR_LOG(" LEs used for registers only : %d\n\n", le_count[2]);
39593953
}
3954+
3955+
/**
3956+
* Given a pointer to a pb in a cluster, this routine returns
3957+
* a pointer to the top-level pb of the given pb.
3958+
* This is needed when updating the gain for a cluster.
3959+
*/
3960+
static t_pb* get_top_level_pb(t_pb* pb) {
3961+
t_pb* top_level_pb = pb;
3962+
3963+
while (pb) {
3964+
top_level_pb = pb;
3965+
pb = pb->parent_pb;
3966+
}
3967+
3968+
VTR_ASSERT(top_level_pb != nullptr);
3969+
3970+
return top_level_pb;
3971+
}

0 commit comments

Comments
 (0)