Skip to content

Commit 0eb1e95

Browse files
vpr: Updated class_size in cluster level pin feasibility check
Use the number of used pins in a class as the class_size if it is already larger than the class_size multiplied by the external pin utilization factor. This happens when the seed block chosen for a cluster uses more inputs than the class_size multiplied by the external pin utilization factor. Since when packing the seed block, the external pin utilization factor is set to 1.0 to avoid large molecules from failing packing.
1 parent ab363fc commit 0eb1e95

File tree

2 files changed

+44
-29
lines changed

2 files changed

+44
-29
lines changed

vpr/src/pack/cluster.cpp

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,8 +1063,8 @@ static void alloc_and_load_pb_stats(t_pb *pb) {
10631063
* only those atom block structures will be fastest. If almost all blocks *
10641064
* have been touched it should be faster to just run through them all *
10651065
* in order (less addressing and better cache locality). */
1066-
pb->pb_stats->input_pins_used = std::vector<std::unordered_set<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
1067-
pb->pb_stats->output_pins_used = std::vector<std::unordered_set<AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
1066+
pb->pb_stats->input_pins_used = std::vector<std::unordered_map<size_t, AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
1067+
pb->pb_stats->output_pins_used = std::vector<std::unordered_map<size_t, AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
10681068
pb->pb_stats->lookahead_input_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
10691069
pb->pb_stats->lookahead_output_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
10701070
pb->pb_stats->num_feasible_blocks = NOT_VALID;
@@ -2918,73 +2918,88 @@ static bool check_lookahead_pins_used(t_pb *cur_pb, t_ext_pin_util max_external_
29182918

29192919
const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
29202920

2921-
bool success = true;
2922-
2923-
if (pb_type->num_modes > 0 && cur_pb->name != nullptr) {
2924-
for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class && success; i++) {
2921+
if (pb_type->num_modes > 0 && cur_pb->name) {
2922+
for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
29252923
size_t class_size = cur_pb->pb_graph_node->input_pin_class_size[i];
2924+
29262925
if (cur_pb->is_root()) {
2927-
//Scale the class size by the specified utilization.
2928-
//We clip to 1 to prevent class sizes of one from being scaled to zero.
2929-
class_size = std::max<size_t>(1, max_external_pin_util.input_pin_util * class_size);
2926+
// Scale the class size by the maximum external pin utilization factor
2927+
// Use ceil to avoid classes of size 1 from being scaled to zero
2928+
class_size = std::ceil(max_external_pin_util.input_pin_util * class_size);
2929+
// if the number of pins already used is larger than class size, then the number of
2930+
// cluster inputs already used should be our constraint. Why is this needed? This is
2931+
// needed since when packing the seed block the maximum external pin utilization is
2932+
// used as 1.0 allowing molecules that are using up to all the cluster inputs to be
2933+
// packed legally. Therefore, if the seed block is already using more inputs than
2934+
// the allowed maximum utilization, this should become the new maximum pin utilization.
2935+
class_size = std::max<size_t>(class_size, cur_pb->pb_stats->input_pins_used[i].size());
29302936
}
29312937

29322938
if (cur_pb->pb_stats->lookahead_input_pins_used[i].size() > class_size) {
2933-
success = false;
2939+
return false;
29342940
}
29352941
}
29362942

2937-
for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class && success; i++) {
2943+
for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
29382944
size_t class_size = cur_pb->pb_graph_node->output_pin_class_size[i];
29392945
if (cur_pb->is_root()) {
2940-
//Scale the class size by the specified utilization.
2941-
//We clip to 1 to prevent class sizes of one from being scaled to zero.
2942-
class_size = std::max<size_t>(1, max_external_pin_util.output_pin_util * class_size);
2946+
// Scale the class size by the maximum external pin utilization factor
2947+
// Use ceil to avoid classes of size 1 from being scaled to zero
2948+
class_size = std::ceil(max_external_pin_util.output_pin_util * class_size);
2949+
// if the number of pins already used is larger than class size, then the number of
2950+
// cluster outputs already used should be our constraint. Why is this needed? This is
2951+
// needed since when packing the seed block the maximum external pin utilization is
2952+
// used as 1.0 allowing molecules that are using up to all the cluster inputs to be
2953+
// packed legally. Therefore, if the seed block is already using more inputs than
2954+
// the allowed maximum utilization, this should become the new maximum pin utilization.
2955+
class_size = std::max<size_t>(class_size, cur_pb->pb_stats->output_pins_used[i].size());
29432956
}
29442957

29452958
if (cur_pb->pb_stats->lookahead_output_pins_used[i].size() > class_size) {
2946-
success = false;
2959+
return false;
29472960
}
29482961
}
29492962

2950-
if (success && cur_pb->child_pbs != nullptr) {
2951-
for (int i = 0; success && i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) {
2952-
if (cur_pb->child_pbs[i] != nullptr) {
2953-
for (int j = 0; success && j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) {
2954-
success = check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util);
2963+
if (cur_pb->child_pbs) {
2964+
for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) {
2965+
if (cur_pb->child_pbs[i]) {
2966+
for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) {
2967+
if (!check_lookahead_pins_used(&cur_pb->child_pbs[i][j], max_external_pin_util))
2968+
return false;
29552969
}
29562970
}
29572971
}
29582972
}
29592973
}
2960-
return success;
2974+
2975+
return true;
29612976
}
29622977

29632978
/* Speculation successful, commit input/output pins used */
29642979
static void commit_lookahead_pins_used(t_pb *cur_pb) {
29652980

29662981
const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
29672982

2968-
if (pb_type->num_modes > 0 && cur_pb->name != nullptr) {
2983+
if (pb_type->num_modes > 0 && cur_pb->name) {
29692984
for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
29702985
VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]);
29712986
for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) {
29722987
VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]);
2973-
cur_pb->pb_stats->input_pins_used[i].insert(cur_pb->pb_stats->lookahead_input_pins_used[i][j]);
2988+
cur_pb->pb_stats->input_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_input_pins_used[i][j]});
29742989
}
29752990
}
29762991

29772992
for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
29782993
VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]);
29792994
for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) {
29802995
VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]);
2981-
cur_pb->pb_stats->output_pins_used[i].insert(cur_pb->pb_stats->lookahead_output_pins_used[i][j]);
2996+
cur_pb->pb_stats->output_pins_used[i].insert({j, cur_pb->pb_stats->lookahead_output_pins_used[i][j]});
29822997
}
29832998
}
29842999

2985-
if (cur_pb->child_pbs != nullptr) {
3000+
if (cur_pb->child_pbs) {
29863001
for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) {
2987-
if (cur_pb->child_pbs[i] != nullptr) {
3002+
if (cur_pb->child_pbs[i]) {
29883003
for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) {
29893004
commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]);
29903005
}

vpr/src/pack/pack_types.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Defines core data structures used in packing
88
*/
99
#include <map>
10-
#include <unordered_set>
10+
#include <unordered_map>
1111
#include <vector>
1212

1313
#include "arch_types.h"
@@ -66,8 +66,8 @@ struct t_pb_stats {
6666
std::map<AtomNetId, int> num_pins_of_net_in_pb;
6767

6868
/* Record of pins of class used */
69-
std::vector<std::unordered_set<AtomNetId>> input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this input pin class */
70-
std::vector<std::unordered_set<AtomNetId>> output_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this output pin class */
69+
std::vector<std::unordered_map<size_t, AtomNetId>> input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this input pin class */
70+
std::vector<std::unordered_map<size_t, AtomNetId>> output_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this output pin class */
7171

7272
/* Use vector because array size is expected to be small so runtime should be faster using vector than map despite the O(N) vs O(log(n)) behaviour.*/
7373
std::vector<std::vector<AtomNetId>> lookahead_input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] vector of input pins of this class that are speculatively used */

0 commit comments

Comments
 (0)