Skip to content

Commit d52240a

Browse files
Merge pull request #2892 from AlexandreSinger/feature-ap-flow-full-legalizer
[AP] Added APPack to the AP Flow as a Full Legalizer
2 parents 0cf372a + 25e2450 commit d52240a

File tree

20 files changed

+500
-96
lines changed

20 files changed

+500
-96
lines changed

doc/src/vpr/command_line_usage.rst

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ VPR runs all stages of (pack, place, route, and analysis) if none of :option:`--
8989
as such, the :option:`--pack` and :option:`--place` options should not be set when this option is set.
9090
This flow requires that the device has a fixed size and some of the primitive blocks are fixed somewhere on the device grid.
9191

92+
.. seealso:: See :ref:`analytical_placement_options` for the options for this flow.
93+
9294
.. seealso:: See :ref:`Fixed FPGA Grid Layout <fixed_arch_grid_layout>` and :option:`--device` for how to fix the device size.
9395

9496
.. seealso:: See :ref:`VPR Placement Constraints <placement_constraints>` for how to fix primitive blocks in a design to the device grid.
@@ -1163,6 +1165,40 @@ The following options are only used when FPGA device and netlist contain a NoC r
11631165

11641166
**Default:** ``vpr_noc_placement_output.txt``
11651167

1168+
1169+
.. _analytical_placement_options:
1170+
1171+
Analytical Placement Options
1172+
^^^^^^^^^^^^^^^
1173+
Instead of Packing atoms into clusters and placing the clusters into valid tile
1174+
sites on the FPGA, Analytical Placement uses analytical techniques to place atoms
1175+
on the FPGA device by relaxing the constraints on where they can be placed. This
1176+
atom-level placement is then legalized into a clustered placement and passed into
1177+
the router in VPR.
1178+
1179+
Analytical Placement is generally split into three stages:
1180+
1181+
* Global Placement: Uses analytical techniques to place atoms on the FPGA grid.
1182+
1183+
* Full Legalization: Legalizes a flat (atom) placement into legal clusters placed on the FPGA grid.
1184+
1185+
* Detailed Placement: While keeping the clusters legal, performs optimizations on the clustered placement.
1186+
1187+
.. warning::
1188+
1189+
Analytical Placement is experimental and under active development.
1190+
1191+
.. option:: --ap_full_legalizer {naive | appack}
1192+
1193+
Controls which Full Legalizer to use in the AP Flow.
1194+
1195+
* ``naive`` Use a Naive Full Legalizer which will try to create clusters exactly where their atoms are placed.
1196+
1197+
* ``appack`` Use APPack, which takes the Packer in VPR and uses the flat atom placement to create better clusters.
1198+
1199+
**Default:** ``appack``
1200+
1201+
11661202
.. _router_options:
11671203

11681204
Router Options
@@ -1179,7 +1215,7 @@ VPR uses a negotiated congestion algorithm (based on Pathfinder) to perform rout
11791215
This means that during the routing stage, all nets, both intra- and inter-cluster, are routed directly from one primitive pin to another primitive pin.
11801216
This increases routing time but can improve routing quality by re-arranging LUT inputs and exposing additional optimization opportunities in architectures with local intra-cluster routing that is not a full crossbar.
11811217

1182-
**Default:** ``OFF`
1218+
**Default:** ``off``
11831219

11841220
.. option:: --max_router_iterations <int>
11851221

vpr/src/analytical_place/analytical_placement_flow.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,15 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
9393
device_ctx.grid.get_num_layers()));
9494

9595
// Run the Full Legalizer.
96-
FullLegalizer full_legalizer(ap_netlist,
97-
vpr_setup,
98-
device_ctx.grid,
99-
device_ctx.arch,
100-
atom_nlist,
101-
prepacker,
102-
device_ctx.logical_block_types,
103-
vpr_setup.PackerRRGraph,
104-
device_ctx.arch->models,
105-
device_ctx.arch->model_library,
106-
vpr_setup.PackerOpts);
107-
full_legalizer.legalize(p_placement);
96+
const t_ap_opts& ap_opts = vpr_setup.APOpts;
97+
std::unique_ptr<FullLegalizer> full_legalizer = make_full_legalizer(ap_opts.full_legalizer_type,
98+
ap_netlist,
99+
atom_nlist,
100+
prepacker,
101+
vpr_setup,
102+
*device_ctx.arch,
103+
device_ctx.grid,
104+
device_ctx.logical_block_types);
105+
full_legalizer->legalize(p_placement);
108106
}
109107

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* @file
3+
* @author Alex Singer
4+
* @date February 2025
5+
* @brief Enumerations used by the Analytical Placement Flow.
6+
*/
7+
8+
#pragma once
9+
10+
/**
11+
* @brief The type of a Full Legalizer.
12+
*
13+
* The Analytical Placement flow may implement different Full Legalizers. This
14+
* enum can select between these different Full Legalizers.
15+
*/
16+
enum class e_ap_full_legalizer {
17+
Naive, ///< The Naive Full Legalizer, which clusters atoms placed in the same tile and tries to place them in that tile according to the flat placement.
18+
APPack ///< The APPack Full Legalizer, which uses the flat placement to improve the Packer and Placer.
19+
};
20+

vpr/src/analytical_place/full_legalizer.cpp

Lines changed: 114 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010
#include "full_legalizer.h"
1111

1212
#include <list>
13+
#include <memory>
1314
#include <unordered_set>
1415
#include <vector>
1516

17+
#include "FlatPlacementInfo.h"
18+
#include "ap_flow_enums.h"
19+
#include "device_grid.h"
1620
#include "partial_placement.h"
1721
#include "ShowSetup.h"
1822
#include "ap_netlist_fwd.h"
@@ -25,6 +29,7 @@
2529
#include "logic_types.h"
2630
#include "pack.h"
2731
#include "physical_types.h"
32+
#include "place.h"
2833
#include "place_and_route.h"
2934
#include "place_constraints.h"
3035
#include "place_macro.h"
@@ -42,6 +47,38 @@
4247
#include "vtr_time.h"
4348
#include "vtr_vector.h"
4449

50+
51+
std::unique_ptr<FullLegalizer> make_full_legalizer(e_ap_full_legalizer full_legalizer_type,
52+
const APNetlist& ap_netlist,
53+
const AtomNetlist& atom_netlist,
54+
const Prepacker& prepacker,
55+
t_vpr_setup& vpr_setup,
56+
const t_arch& arch,
57+
const DeviceGrid& device_grid,
58+
const std::vector<t_logical_block_type>& logical_block_types) {
59+
switch (full_legalizer_type) {
60+
case e_ap_full_legalizer::Naive:
61+
return std::make_unique<NaiveFullLegalizer>(ap_netlist,
62+
atom_netlist,
63+
prepacker,
64+
vpr_setup,
65+
arch,
66+
device_grid,
67+
logical_block_types);
68+
case e_ap_full_legalizer::APPack:
69+
return std::make_unique<APPack>(ap_netlist,
70+
atom_netlist,
71+
prepacker,
72+
vpr_setup,
73+
arch,
74+
device_grid,
75+
logical_block_types);
76+
default:
77+
VPR_FATAL_ERROR(VPR_ERROR_AP,
78+
"Unrecognized full legalizer type");
79+
}
80+
}
81+
4582
namespace {
4683

4784
/// @brief A unique ID for each root tile on the device.
@@ -239,24 +276,24 @@ static LegalizationClusterId create_new_cluster(PackMoleculeId seed_molecule_id,
239276
return LegalizationClusterId();
240277
}
241278

242-
void FullLegalizer::create_clusters(const PartialPlacement& p_placement) {
279+
void NaiveFullLegalizer::create_clusters(const PartialPlacement& p_placement) {
243280
// PACKING:
244281
// Initialize the cluster legalizer (Packing)
245282
// FIXME: The legalization strategy is currently set to full. Should handle
246283
// this better to make it faster.
247-
t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts_.high_fanout_threshold);
284+
t_pack_high_fanout_thresholds high_fanout_thresholds(vpr_setup_.PackerOpts.high_fanout_threshold);
248285
ClusterLegalizer cluster_legalizer(atom_netlist_,
249286
prepacker_,
250287
logical_block_types_,
251-
lb_type_rr_graphs_,
252-
user_models_,
253-
library_models_,
254-
packer_opts_.target_external_pin_util,
288+
vpr_setup_.PackerRRGraph,
289+
arch_.models,
290+
arch_.model_library,
291+
vpr_setup_.PackerOpts.target_external_pin_util,
255292
high_fanout_thresholds,
256293
ClusterLegalizationStrategy::FULL,
257-
packer_opts_.enable_pin_feasibility_filter,
258-
packer_opts_.feasible_block_array_size,
259-
packer_opts_.pack_verbosity);
294+
vpr_setup_.PackerOpts.enable_pin_feasibility_filter,
295+
vpr_setup_.PackerOpts.feasible_block_array_size,
296+
vpr_setup_.PackerOpts.pack_verbosity);
260297
// Create clusters for each tile.
261298
// Start by giving each root tile a unique ID.
262299
size_t grid_width = device_grid_.width();
@@ -330,24 +367,24 @@ void FullLegalizer::create_clusters(const PartialPlacement& p_placement) {
330367

331368
// Check and output the clustering.
332369
std::unordered_set<AtomNetId> is_clock = alloc_and_load_is_clock();
333-
check_and_output_clustering(cluster_legalizer, packer_opts_, is_clock, arch_);
370+
check_and_output_clustering(cluster_legalizer, vpr_setup_.PackerOpts, is_clock, &arch_);
334371
// Reset the cluster legalizer. This is required to load the packing.
335372
cluster_legalizer.reset();
336373
// Regenerate the clustered netlist from the file generated previously.
337374
// FIXME: This writing and loading from a file is wasteful. Should generate
338375
// the clusters directly from the cluster legalizer.
339-
vpr_load_packing(vpr_setup_, *arch_);
376+
vpr_load_packing(vpr_setup_, arch_);
340377
load_cluster_constraints();
341378
const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
342379

343380
// Verify the packing and print some info
344-
check_netlist(packer_opts_.pack_verbosity);
381+
check_netlist(vpr_setup_.PackerOpts.pack_verbosity);
345382
writeClusteredNetlistStats(vpr_setup_.FileNameOpts.write_block_usage);
346383
print_pb_type_count(clb_nlist);
347384
}
348385

349-
void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
350-
const PartialPlacement& p_placement) {
386+
void NaiveFullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
387+
const PartialPlacement& p_placement) {
351388
// PLACING:
352389
// Create a lookup from the AtomBlockId to the APBlockId
353390
vtr::vector<AtomBlockId, APBlockId> atom_to_ap_block(atom_netlist_.blocks().size());
@@ -409,7 +446,7 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
409446
// - This may be needed to perform SA. Not needed right now.
410447
}
411448

412-
void FullLegalizer::legalize(const PartialPlacement& p_placement) {
449+
void NaiveFullLegalizer::legalize(const PartialPlacement& p_placement) {
413450
// Create a scoped timer for the full legalizer
414451
vtr::ScopedStartFinishTimer full_legalizer_timer("AP Full Legalizer");
415452

@@ -449,3 +486,65 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
449486
post_place_sync();
450487
}
451488

489+
void APPack::legalize(const PartialPlacement& p_placement) {
490+
// Create a scoped timer for the full legalizer
491+
vtr::ScopedStartFinishTimer full_legalizer_timer("AP Full Legalizer");
492+
493+
// Convert the Partial Placement (APNetlist) to a flat placement (AtomNetlist).
494+
FlatPlacementInfo flat_placement_info(atom_netlist_);
495+
for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
496+
PackMoleculeId mol_id = ap_netlist_.block_molecule(ap_blk_id);
497+
const t_pack_molecule& mol = prepacker_.get_molecule(mol_id);
498+
for (AtomBlockId atom_blk_id : mol.atom_block_ids) {
499+
if (!atom_blk_id.is_valid())
500+
continue;
501+
flat_placement_info.blk_x_pos[atom_blk_id] = p_placement.block_x_locs[ap_blk_id];
502+
flat_placement_info.blk_y_pos[atom_blk_id] = p_placement.block_y_locs[ap_blk_id];
503+
flat_placement_info.blk_layer[atom_blk_id] = p_placement.block_layer_nums[ap_blk_id];
504+
flat_placement_info.blk_sub_tile[atom_blk_id] = p_placement.block_sub_tiles[ap_blk_id];
505+
}
506+
}
507+
508+
// Run the Packer stage with the flat placement as a hint.
509+
try_pack(&vpr_setup_.PackerOpts,
510+
&vpr_setup_.AnalysisOpts,
511+
arch_,
512+
vpr_setup_.RoutingArch,
513+
vpr_setup_.user_models,
514+
vpr_setup_.library_models,
515+
vpr_setup_.PackerRRGraph,
516+
flat_placement_info);
517+
518+
// The Packer stores the clusters into a .net file. Load the packing file.
519+
// FIXME: This should be removed. Reading from a file is strange.
520+
vpr_load_packing(vpr_setup_, arch_);
521+
load_cluster_constraints();
522+
const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
523+
524+
// Verify the packing and print some info
525+
check_netlist(vpr_setup_.PackerOpts.pack_verbosity);
526+
writeClusteredNetlistStats(vpr_setup_.FileNameOpts.write_block_usage);
527+
print_pb_type_count(clb_nlist);
528+
529+
// Pass the clustering into the Placer with the flat placement as a hint.
530+
// TODO: This should only be the initial placer. Running the full SA would
531+
// be more of a Detailed Placer.
532+
const auto& placement_net_list = (const Netlist<>&)clb_nlist;
533+
try_place(placement_net_list,
534+
vpr_setup_.PlacerOpts,
535+
vpr_setup_.RouterOpts,
536+
vpr_setup_.AnalysisOpts,
537+
vpr_setup_.NocOpts,
538+
arch_.Chans,
539+
&vpr_setup_.RoutingArch,
540+
vpr_setup_.Segments,
541+
arch_.directs,
542+
flat_placement_info,
543+
false /* is_flat */);
544+
545+
// TODO: This was taken from vpr_api. Not sure why it is needed. Should be
546+
// made part of the placement and verify placement should check for
547+
// it.
548+
post_place_sync();
549+
}
550+

0 commit comments

Comments
 (0)