Skip to content

Commit ab363fc

Browse files
vpr: Changed input/output pins used to hash tables
1 parent 5ce7ca1 commit ab363fc

File tree

2 files changed

+19
-44
lines changed

2 files changed

+19
-44
lines changed

vpr/src/pack/cluster.cpp

Lines changed: 15 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,26 +1028,19 @@ static t_pack_molecule *get_free_molecule_with_most_ext_inputs_for_cluster(
10281028
* TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count
10291029
*/
10301030

1031-
int ext_inps;
1032-
int i, j;
1033-
t_pack_molecule *molecule;
1034-
10351031
int inputs_avail = 0;
10361032

1037-
for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
1038-
for (j = 0; j < cur_pb->pb_graph_node->input_pin_class_size[i]; j++) {
1039-
if (cur_pb->pb_stats->input_pins_used[i][j])
1040-
inputs_avail++;
1041-
}
1033+
for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
1034+
inputs_avail += cur_pb->pb_stats->input_pins_used[i].size();
10421035
}
10431036

1044-
molecule = nullptr;
1037+
t_pack_molecule* molecule = nullptr;
10451038

10461039
if (inputs_avail >= unclustered_list_head_size) {
10471040
inputs_avail = unclustered_list_head_size - 1;
10481041
}
10491042

1050-
for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
1043+
for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
10511044
molecule = get_molecule_by_num_ext_inputs(
10521045
ext_inps, LEAVE_CLUSTERED, cluster_placement_stats_ptr);
10531046
if (molecule != nullptr) {
@@ -1063,30 +1056,21 @@ static void alloc_and_load_pb_stats(t_pb *pb) {
10631056
/* Call this routine when starting to fill up a new cluster. It resets *
10641057
* the gain vector, etc. */
10651058

1066-
int i;
1067-
10681059
pb->pb_stats = new t_pb_stats;
10691060

10701061
/* If statement below is for speed. If nets are reasonably low-fanout, *
10711062
* only a relatively small number of blocks will be marked, and updating *
10721063
* only those atom block structures will be fastest. If almost all blocks *
10731064
* have been touched it should be faster to just run through them all *
10741065
* in order (less addressing and better cache locality). */
1075-
pb->pb_stats->input_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
1076-
pb->pb_stats->output_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
1066+
pb->pb_stats->input_pins_used = std::vector<std::unordered_set<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
1067+
pb->pb_stats->output_pins_used = std::vector<std::unordered_set<AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
10771068
pb->pb_stats->lookahead_input_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_input_pin_class);
10781069
pb->pb_stats->lookahead_output_pins_used = std::vector<std::vector<AtomNetId>>(pb->pb_graph_node->num_output_pin_class);
10791070
pb->pb_stats->num_feasible_blocks = NOT_VALID;
10801071
pb->pb_stats->feasible_blocks = (t_pack_molecule**) vtr::calloc(AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE, sizeof(t_pack_molecule *));
10811072

10821073
pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID();
1083-
for (i = 0; i < pb->pb_graph_node->num_input_pin_class; i++) {
1084-
pb->pb_stats->input_pins_used[i] = std::vector<AtomNetId>(pb->pb_graph_node->input_pin_class_size[i]);
1085-
}
1086-
1087-
for (i = 0; i < pb->pb_graph_node->num_output_pin_class; i++) {
1088-
pb->pb_stats->output_pins_used[i] = std::vector<AtomNetId>(pb->pb_graph_node->output_pin_class_size[i]);
1089-
}
10901074

10911075
pb->pb_stats->gain.clear();
10921076
pb->pb_stats->timinggain.clear();
@@ -2978,36 +2962,30 @@ static bool check_lookahead_pins_used(t_pb *cur_pb, t_ext_pin_util max_external_
29782962

29792963
/* Speculation successful, commit input/output pins used */
29802964
static void commit_lookahead_pins_used(t_pb *cur_pb) {
2981-
int i, j;
2982-
int ipin;
2965+
29832966
const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type;
29842967

29852968
if (pb_type->num_modes > 0 && cur_pb->name != nullptr) {
2986-
for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
2987-
ipin = 0;
2969+
for (int i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) {
29882970
VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->input_pin_class_size[i]);
2989-
for (j = 0; j < (int) cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) {
2971+
for (size_t j = 0; j < cur_pb->pb_stats->lookahead_input_pins_used[i].size(); j++) {
29902972
VTR_ASSERT(cur_pb->pb_stats->lookahead_input_pins_used[i][j]);
2991-
cur_pb->pb_stats->input_pins_used[i][ipin] = cur_pb->pb_stats->lookahead_input_pins_used[i][j];
2992-
ipin++;
2973+
cur_pb->pb_stats->input_pins_used[i].insert(cur_pb->pb_stats->lookahead_input_pins_used[i][j]);
29932974
}
29942975
}
29952976

2996-
for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
2997-
ipin = 0;
2977+
for (int i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) {
29982978
VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i].size() <= (unsigned int)cur_pb->pb_graph_node->output_pin_class_size[i]);
2999-
for (j = 0; j < (int) cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) {
2979+
for (size_t j = 0; j < cur_pb->pb_stats->lookahead_output_pins_used[i].size(); j++) {
30002980
VTR_ASSERT(cur_pb->pb_stats->lookahead_output_pins_used[i][j]);
3001-
cur_pb->pb_stats->output_pins_used[i][ipin] = cur_pb->pb_stats->lookahead_output_pins_used[i][j];
3002-
ipin++;
2981+
cur_pb->pb_stats->output_pins_used[i].insert(cur_pb->pb_stats->lookahead_output_pins_used[i][j]);
30032982
}
30042983
}
30052984

30062985
if (cur_pb->child_pbs != nullptr) {
3007-
for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children;
3008-
i++) {
2986+
for (int i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; i++) {
30092987
if (cur_pb->child_pbs[i] != nullptr) {
3010-
for (j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) {
2988+
for (int j = 0; j < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; j++) {
30112989
commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]);
30122990
}
30132991
}

vpr/src/pack/pack_types.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* Defines core data structures used in packing
88
*/
99
#include <map>
10+
#include <unordered_set>
1011
#include <vector>
1112

1213
#include "arch_types.h"
@@ -64,13 +65,9 @@ struct t_pb_stats {
6465
* currently open pb? */
6566
std::map<AtomNetId, int> num_pins_of_net_in_pb;
6667

67-
/* Record of pins of class used
68-
* TODO: Jason Luu: Should really be using hash table for this for speed,
69-
* too lazy to write one now, performance isn't too bad since I'm at
70-
* most iterating over the number of pins of a pb which is effectively
71-
* a constant for reasonable architectures */
72-
std::vector<std::vector<AtomNetId>> input_pins_used; /* [0..pb_graph_node->num_pin_classes-1][0..pin_class_size] number of input pins of this class that are used */
73-
std::vector<std::vector<AtomNetId>> output_pins_used; /* [0..pb_graph_node->num_pin_classes-1][0..pin_class_size] number of output pins of this class that are used */
68+
/* Record of pins of class used */
69+
std::vector<std::unordered_set<AtomNetId>> input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this input pin class */
70+
std::vector<std::unordered_set<AtomNetId>> output_pins_used; /* [0..pb_graph_node->num_pin_classes-1] nets using this output pin class */
7471

7572
/* Use vector because array size is expected to be small so runtime should be faster using vector than map despite the O(N) vs O(log(n)) behaviour.*/
7673
std::vector<std::vector<AtomNetId>> lookahead_input_pins_used; /* [0..pb_graph_node->num_pin_classes-1] vector of input pins of this class that are speculatively used */

0 commit comments

Comments
 (0)