Skip to content

Commit 33c131a

Browse files
authored
Merge pull request #2816 from AlexandreSinger/feature-greedy-clusterer-class
[Packer] Created GreedyClusterer Class
2 parents 3fa3148 + 2232cd4 commit 33c131a

File tree

8 files changed

+283
-164
lines changed

8 files changed

+283
-164
lines changed

vpr/src/analytical_place/full_legalizer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "ShowSetup.h"
1818
#include "ap_netlist_fwd.h"
1919
#include "check_netlist.h"
20-
#include "cluster.h"
2120
#include "cluster_legalizer.h"
2221
#include "cluster_util.h"
2322
#include "clustered_netlist.h"

vpr/src/base/vpr_api.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
#include "check_route.h"
6565
#include "constant_nets.h"
6666
#include "atom_netlist_utils.h"
67-
#include "cluster.h"
6867
#include "output_clustering.h"
6968
#include "vpr_constraints_reader.h"
7069
#include "place_constraints.h"

vpr/src/pack/cluster.h

Lines changed: 0 additions & 32 deletions
This file was deleted.

vpr/src/pack/cluster_util.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,32 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_
18791879
}
18801880
}
18811881

1882+
void print_pb_type_count(const ClusteredNetlist& clb_nlist) {
1883+
auto& device_ctx = g_vpr_ctx.device();
1884+
1885+
std::map<t_pb_type*, int> pb_type_count;
1886+
1887+
size_t max_depth = 0;
1888+
for (ClusterBlockId blk : clb_nlist.blocks()) {
1889+
size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0);
1890+
1891+
max_depth = std::max(max_depth, pb_max_depth);
1892+
}
1893+
1894+
size_t max_pb_type_name_chars = 0;
1895+
for (auto& pb_type : pb_type_count) {
1896+
max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name));
1897+
}
1898+
1899+
VTR_LOG("\nPb types usage...\n");
1900+
for (const auto& logical_block_type : device_ctx.logical_block_types) {
1901+
if (!logical_block_type.pb_type) continue;
1902+
1903+
print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count);
1904+
}
1905+
VTR_LOG("\n");
1906+
}
1907+
18821908
t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
18831909
std::string lut_name = ".names";
18841910

vpr/src/pack/cluster_util.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,11 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_
481481

482482
void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);
483483

484+
/**
485+
* Print the total number of used physical blocks for each pb type in the architecture
486+
*/
487+
void print_pb_type_count(const ClusteredNetlist& clb_nlist);
488+
484489
/*
485490
* @brief This function identifies the logic block type which is defined by the
486491
* block type which has a lut primitive.

vpr/src/pack/cluster.cpp renamed to vpr/src/pack/greedy_clusterer.cpp

Lines changed: 72 additions & 108 deletions
Large diffs are not rendered by default.

vpr/src/pack/greedy_clusterer.h

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date November 2024
5+
* @brief The declarations of the Greedy Clusterer class which is used to
6+
* encapsulate the process of greedy clustering.
7+
*/
8+
9+
#pragma once
10+
11+
#include <map>
12+
#include <unordered_set>
13+
#include "physical_types.h"
14+
15+
// Forward declarations
16+
class AtomNetId;
17+
class AtomNetlist;
18+
class AttractionInfo;
19+
class ClusterLegalizer;
20+
class Prepacker;
21+
struct t_analysis_opts;
22+
struct t_clustering_data;
23+
struct t_pack_high_fanout_thresholds;
24+
struct t_packer_opts;
25+
26+
/**
27+
* @brief A clusterer that generates clusters by greedily choosing the clusters
28+
* which appear to have the best gain for a given neighbor.
29+
*
30+
* This clusterer generates one cluster at a time by finding candidate molecules
31+
* and selecting the molecule with the highest gain.
32+
*
33+
* The clusterer takes an Atom Netlist which has be pre-packed into pack
34+
* patterns (e.g. carry chains) as input and produces a set of legal clusters
35+
* of these pack molecules as output. Legality here means that it was able to
36+
* find a valid intra-lb route for the inputs of the clusters, through the
37+
* internal molecules, and to the outputs of the clusters.
38+
*/
39+
class GreedyClusterer {
40+
public:
41+
/**
42+
* @brief Constructor of the Greedy Clusterer class.
43+
*
44+
* The clusterer may be invoked many times during the packing flow. This
45+
* constructor will pre-compute information before clustering which can
46+
* improve the performance of the clusterer.
47+
*
48+
* @param packer_opts
49+
* Options passed by the user to configure the packing and
50+
* clustering algorithms.
51+
* @param analysis_opts
52+
* Options passed by the user to configure timing analysis in
53+
* the clusterer.
54+
* @param atom_netlist
55+
* The atom netlist to cluster over.
56+
* @param arch
57+
* The architecture to cluster over.
58+
* @param high_fanout_thresholds
59+
* The thresholds for what to consider as a high-fanout net
60+
* for each logical block type. The clusterer will not consider
61+
* nets with fanout higher than this to be important in
62+
* candidate block selection (gain computation).
63+
* A reason for it being per block type is that some blocks,
64+
* like RAMs, have weak gains to other RAM primitives due to
65+
* fairly high fanout address nets, so a higher fanout
66+
* threshold for them is useful in generating a more dense
67+
* packing.
68+
* @param is_clock
69+
* The set of clock nets in the Atom Netlist.
70+
* @param is_global
71+
* The set of global nets in the Atom Netlist. These will be
72+
* routed on special dedicated networks, and hence are less
73+
* relavent to locality / attraction.
74+
*/
75+
GreedyClusterer(const t_packer_opts& packer_opts,
76+
const t_analysis_opts& analysis_opts,
77+
const AtomNetlist& atom_netlist,
78+
const t_arch* arch,
79+
const t_pack_high_fanout_thresholds& high_fanout_thresholds,
80+
const std::unordered_set<AtomNetId>& is_clock,
81+
const std::unordered_set<AtomNetId>& is_global);
82+
83+
/**
84+
* @brief Performs clustering on the pack molecules formed by the prepacker.
85+
*
86+
* The clustering is contained within the Cluster Legalizer.
87+
*
88+
* @param cluster_legalizer
89+
* The cluster legalizer which is used to create clusters and
90+
* grow clusters by adding molecules to a cluster.
91+
* @param prepacker
92+
* The prepacker object which contains the pack molecules that
93+
* are atoms which are pre-packed before the main clustering
94+
* (due to pack patterns, e.g. carry chains).
95+
* @param allow_unrelated_clustering
96+
* Allows primitives which have no attraction to the given
97+
* cluster to be packed into it. This can lead to a denser
98+
* packing, but tends to be bad for wirelength and timing.
99+
* @param balance_block_type_utilization
100+
* When true, tries to create clusters that balance the logical
101+
* block type utilization. This is useful when some primitives
102+
* have multiple logical block types to which they can cluster,
103+
* e.g. multiple sizes of physical RAMs exist on the chip.
104+
* @param attraction_groups
105+
* Information on the attraction groups used during the
106+
* clustering process. These are groups of primitives that have
107+
* extra attraction to each other; currently they are used to
108+
* guide the clusterer when it must cluster some parts of a
109+
* design densely due to user placement/floorplanning
110+
* constraints. They are created if some floorplan regions are
111+
* overfilled after a clustering attempt.
112+
*
113+
* @return num_used_type_instances
114+
* The number of used logical blocks of each type by the
115+
* clustering. This information may be useful when detecting
116+
* if the clustering can fit on the device.
117+
*/
118+
std::map<t_logical_block_type_ptr, size_t>
119+
do_clustering(ClusterLegalizer& cluster_legalizer,
120+
Prepacker& prepacker,
121+
bool allow_unrelated_clustering,
122+
bool balance_block_type_utilization,
123+
AttractionInfo& attraction_groups);
124+
125+
private:
126+
/*
127+
* When attraction groups are created, the purpose is to pack more densely by adding more molecules
128+
* from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
129+
* not on), the cluster keeps being packed until the get_molecule routines return either a repeated
130+
* molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
131+
* cluster until a nullptr is returned. So, the number of repeated molecules is changed from 1 to 500,
132+
* effectively making the clusterer pack a cluster until a nullptr is returned.
133+
*/
134+
static constexpr int attraction_groups_max_repeated_molecules_ = 500;
135+
136+
/// @brief The packer options used to configure the clusterer.
137+
const t_packer_opts& packer_opts_;
138+
139+
/// @brief The analysis options used to configure timing analysis within the
140+
/// clusterer.
141+
const t_analysis_opts& analysis_opts_;
142+
143+
/// @brief The atom netlist to cluster over.
144+
const AtomNetlist& atom_netlist_;
145+
146+
/// @brief The device architecture to cluster onto.
147+
const t_arch* arch_ = nullptr;
148+
149+
/// @brief The high-fanout thresholds per logical block type. Used to ignore
150+
/// certain nets when calculating the gain for the next candidate
151+
/// molecule to cluster.
152+
const t_pack_high_fanout_thresholds& high_fanout_thresholds_;
153+
154+
/// @brief A set of atom nets which are considered as clocks.
155+
const std::unordered_set<AtomNetId>& is_clock_;
156+
157+
/// @brief A set of atom nets which are considered as global nets.
158+
const std::unordered_set<AtomNetId>& is_global_;
159+
160+
/// @brief Pre-computed logical block types for each model in the architecture.
161+
std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
162+
};
163+

vpr/src/pack/pack.cpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#include <unordered_set>
22

33
#include "SetupGrid.h"
4-
#include "cluster.h"
54
#include "cluster_legalizer.h"
65
#include "cluster_util.h"
6+
#include "constraints_report.h"
77
#include "globals.h"
8+
#include "greedy_clusterer.h"
89
#include "pack.h"
910
#include "prepack.h"
1011
#include "vpr_context.h"
@@ -29,7 +30,6 @@ bool try_pack(t_packer_opts* packer_opts,
2930
const DeviceContext& device_ctx = g_vpr_ctx.device();
3031

3132
std::unordered_set<AtomNetId> is_clock, is_global;
32-
t_clustering_data clustering_data;
3333
VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());
3434

3535
is_clock = alloc_and_load_is_clock();
@@ -91,7 +91,6 @@ bool try_pack(t_packer_opts* packer_opts,
9191
}
9292

9393
int pack_iteration = 1;
94-
bool floorplan_regions_overfull = false;
9594

9695
// Initialize the cluster legalizer.
9796
ClusterLegalizer cluster_legalizer(atom_ctx.nlist,
@@ -110,27 +109,25 @@ bool try_pack(t_packer_opts* packer_opts,
110109
VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str());
111110
VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str());
112111

113-
while (true) {
114-
free_clustering_data(*packer_opts, clustering_data);
115-
112+
// Initialize the greedy clusterer.
113+
GreedyClusterer clusterer(*packer_opts,
114+
*analysis_opts,
115+
atom_ctx.nlist,
116+
arch,
117+
high_fanout_thresholds,
118+
is_clock,
119+
is_global);
116120

121+
while (true) {
117122
//Cluster the netlist
118123
// num_used_type_instances: A map used to save the number of used
119124
// instances from each logical block type.
120125
std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
121-
num_used_type_instances = do_clustering(*packer_opts,
122-
*analysis_opts,
123-
arch,
124-
prepacker,
125-
cluster_legalizer,
126-
is_clock,
127-
is_global,
128-
allow_unrelated_clustering,
129-
balance_block_type_util,
130-
attraction_groups,
131-
floorplan_regions_overfull,
132-
high_fanout_thresholds,
133-
clustering_data);
126+
num_used_type_instances = clusterer.do_clustering(cluster_legalizer,
127+
prepacker,
128+
allow_unrelated_clustering,
129+
balance_block_type_util,
130+
attraction_groups);
134131

135132
//Try to size/find a device
136133
bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
@@ -139,6 +136,7 @@ bool try_pack(t_packer_opts* packer_opts,
139136
* is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause
140137
* of the floorplan not fitting, so attraction groups are turned on for later iterations.
141138
*/
139+
bool floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer);
142140
bool floorplan_not_fitting = (floorplan_regions_overfull || g_vpr_ctx.floorplanning().constraints.get_num_partitions() > 0);
143141

144142
if (fits_on_device && !floorplan_regions_overfull) {
@@ -261,9 +259,6 @@ bool try_pack(t_packer_opts* packer_opts,
261259
//check clustering and output it
262260
check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch);
263261

264-
// Free Data Structures
265-
free_clustering_data(*packer_opts, clustering_data);
266-
267262
VTR_LOG("\n");
268263
VTR_LOG("Netlist conversion complete.\n");
269264
VTR_LOG("\n");

0 commit comments

Comments
 (0)