Skip to content

Commit 9d920ec

Browse files
authored
Merge pull request #2808 from AlexandreSinger/feature-ap-testing
[AP] Testing Infrastructure
2 parents 66f35d9 + 4650ba1 commit 9d920ec

23 files changed

+6583
-7
lines changed

vpr/src/analytical_place/full_legalizer.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "logic_types.h"
2727
#include "pack.h"
2828
#include "physical_types.h"
29+
#include "place_and_route.h"
2930
#include "place_constraints.h"
3031
#include "place_macro.h"
3132
#include "verify_clustering.h"
@@ -103,9 +104,6 @@ class APClusterPlacer {
103104
g_vpr_ctx.mutable_placement().cube_bb = false;
104105
g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids();
105106

106-
// Initialize the macros
107-
blk_loc_registry.mutable_place_macros().alloc_and_load_placement_macros(directs);
108-
109107
// TODO: The next few steps will be basically a direct copy of the initial
110108
// placement code since it does everything we need! It would be nice
111109
// to share the code.
@@ -133,6 +131,13 @@ class APClusterPlacer {
133131
const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
134132
const auto& block_locs = g_vpr_ctx.placement().block_locs();
135133
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
134+
// If this block has already been placed, just return true.
135+
// TODO: This should be investigated further. What I think is happening
136+
// is that a macro is being placed which contains another cluster.
137+
// This must be a carry chain. May need to rewrite the algorithm
138+
// below to use macros instead of clusters.
139+
if (is_block_placed(clb_blk_id, block_locs))
140+
return true;
136141
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
137142
t_pl_macro pl_macro = get_macro(clb_blk_id);
138143
t_pl_loc to_loc;
@@ -170,6 +175,10 @@ class APClusterPlacer {
170175
bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) {
171176
const auto& block_locs = g_vpr_ctx.placement().block_locs();
172177
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
178+
// If this block has already been placed, just return true.
179+
// TODO: See similar comment above.
180+
if (is_block_placed(clb_blk_id, block_locs))
181+
return true;
173182
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
174183
t_pl_macro pl_macro = get_macro(clb_blk_id);
175184
const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region();
@@ -346,6 +355,10 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
346355
for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
347356
const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id);
348357
for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) {
358+
// See issue #2791, some of the atom_block_ids may be invalid. They
359+
// can safely be ignored.
360+
if (!atom_blk_id.is_valid())
361+
continue;
349362
// Ensure that this block is not in any other AP block. That would
350363
// be weird.
351364
VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid());
@@ -429,5 +442,10 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
429442
"Aborting program.\n",
430443
num_placement_errors);
431444
}
445+
446+
// TODO: This was taken from vpr_api. Not sure why it is needed. Should be
447+
// made part of the placement and verify placement should check for
448+
// it.
449+
post_place_sync();
432450
}
433451

vpr/src/analytical_place/partial_legalizer.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
7676
PrimitiveVector mass;
7777
const t_pack_molecule* mol = netlist.block_molecule(blk_id);
7878
for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
79+
// See issue #2791, some of the atom_block_ids may be invalid. They can
80+
// safely be ignored.
81+
if (!atom_blk_id.is_valid())
82+
continue;
7983
const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
8084
VTR_ASSERT_DEBUG(model->index >= 0);
8185
mass.add_val_to_dim(get_model_mass(model), model->index);
@@ -354,6 +358,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
354358
// Create visited flags for each bin. Set the source to visited.
355359
vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
356360
bin_visited[src_bin_id] = true;
361+
// Create a distance count for each bin from the src.
362+
vtr::vector_map<LegalizerBinId, unsigned> bin_distance(bins_.size(), 0);
357363
// Flags to check if a specific model has been found in the given direction.
358364
// In this case, direction is the direction of the largest component of the
359365
// manhattan distance between the source bin and the target bin.
@@ -401,6 +407,11 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
401407
// Pop the bin from the queue.
402408
LegalizerBinId bin_id = q.front();
403409
q.pop();
410+
// If the distance of this block from the source is too large, do not
411+
// explore.
412+
unsigned curr_bin_dist = bin_distance[bin_id];
413+
if (curr_bin_dist > max_bin_neighbor_dist_)
414+
continue;
404415
// Get the direct neighbors of the bin (neighbors that are directly
405416
// touching).
406417
auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
@@ -431,6 +442,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
431442
}
432443
// Mark this bin as visited and push it onto the queue.
433444
bin_visited[dir_neighbor_bin_id] = true;
445+
// Update the distance.
446+
bin_distance[dir_neighbor_bin_id] = curr_bin_dist + 1;
434447
// FIXME: This may be inneficient since it will do an entire BFS of
435448
// the grid if a neighbor of a given type does not exist in
436449
// a specific direction. Should add a check to see if it is
@@ -506,6 +519,7 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
506519
tile_bin_[x][y] = new_bin_id;
507520
}
508521
}
522+
509523
// Get the number of models in the device.
510524
size_t num_models = get_num_models();
511525
// Connect the bins.
@@ -524,10 +538,14 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
524538
compute_neighbors_of_bin(tile_bin_[x][y], num_models);
525539
}
526540
}
541+
527542
// Pre-compute the masses of the APBlocks
543+
VTR_LOGV(log_verbosity_ >= 10, "Pre-computing the block masses...\n");
528544
for (APBlockId blk_id : netlist.blocks()) {
529545
block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
530546
}
547+
VTR_LOGV(log_verbosity_ >= 10, "Finished pre-computing the block masses.\n");
548+
531549
// Initialize the block_bins.
532550
block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
533551
}

vpr/src/analytical_place/partial_legalizer.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,18 @@ class FlowBasedLegalizer : public PartialLegalizer {
196196
/// enough space to flow blocks.
197197
static constexpr size_t max_num_iterations_ = 100;
198198

199+
/// @brief The maximum number of hops away a neighbor of a bin can be. Where
200+
/// a hop is the minimum number of bins you need to pass through to
201+
/// get to this neighbor (manhattan distance in bins-space).
202+
///
203+
/// This is used to speed up the computation of the neighbors of bins since
204+
/// it reduces the amount of the graph that needs to be explored.
205+
///
206+
/// TODO: This may need to be made per primitive type since some types may
207+
/// need to explore more of the architecture than others to find
208+
/// sufficient neighbors.
209+
static constexpr unsigned max_bin_neighbor_dist_ = 4;
210+
199211
/// @brief A vector of all the bins in the legalizer.
200212
vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;
201213

vtr_flow/arch/timing/k6_frac_N10_40nm.xml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,35 @@
6666
<!--Fill with 'clb'-->
6767
<fill type="clb" priority="10"/>
6868
</auto_layout>
69+
<!--
70+
This architecture is commonly used for the MCNC Benchmark Suite. Below
71+
are a set of fixed layouts which were found to work well for these
72+
benchmarks. They were found by finding the minimum device size for each
73+
benchmark and categorizing the benchmarks into the different fixed
74+
layouts. Each fixed layout was chosen to be around 1.5x larger than the
75+
previous.
76+
-->
77+
<fixed_layout name="mcnc_small" width="11" height="11">
78+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
79+
<perimeter type="io" priority="100"/>
80+
<corners type="EMPTY" priority="101"/>
81+
<!--Fill with 'clb'-->
82+
<fill type="clb" priority="10"/>
83+
</fixed_layout>
84+
<fixed_layout name="mcnc_medium" width="16" height="16">
85+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
86+
<perimeter type="io" priority="100"/>
87+
<corners type="EMPTY" priority="101"/>
88+
<!--Fill with 'clb'-->
89+
<fill type="clb" priority="10"/>
90+
</fixed_layout>
91+
<fixed_layout name="mcnc_large" width="22" height="22">
92+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
93+
<perimeter type="io" priority="100"/>
94+
<corners type="EMPTY" priority="101"/>
95+
<!--Fill with 'clb'-->
96+
<fill type="clb" priority="10"/>
97+
</fixed_layout>
6998
</layout>
7099
<device>
71100
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM

vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_40nm.xml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,79 @@
237237
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
238238
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
239239
</auto_layout>
240+
<!--
241+
This architecture is commonly used for the VTR Benchmark Suite. Below
242+
are a set of fixed layouts which were found to work well for these
243+
benchmarks. They were found by finding the minimum device size for each
244+
benchmark and categorizing the benchmarks into the different fixed
245+
layouts. Each fixed layout was chosen to be around 1.5x larger than the
246+
previous.
247+
-->
248+
<fixed_layout name="vtr_extra_small" width="20" height="20">
249+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
250+
<perimeter type="io" priority="100"/>
251+
<corners type="EMPTY" priority="101"/>
252+
<!--Fill with 'clb'-->
253+
<fill type="clb" priority="10"/>
254+
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
255+
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
256+
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
257+
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
258+
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
259+
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
260+
</fixed_layout>
261+
<fixed_layout name="vtr_small" width="30" height="30">
262+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
263+
<perimeter type="io" priority="100"/>
264+
<corners type="EMPTY" priority="101"/>
265+
<!--Fill with 'clb'-->
266+
<fill type="clb" priority="10"/>
267+
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
268+
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
269+
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
270+
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
271+
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
272+
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
273+
</fixed_layout>
274+
<fixed_layout name="vtr_medium" width="42" height="42">
275+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
276+
<perimeter type="io" priority="100"/>
277+
<corners type="EMPTY" priority="101"/>
278+
<!--Fill with 'clb'-->
279+
<fill type="clb" priority="10"/>
280+
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
281+
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
282+
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
283+
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
284+
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
285+
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
286+
</fixed_layout>
287+
<fixed_layout name="vtr_large" width="65" height="65">
288+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
289+
<perimeter type="io" priority="100"/>
290+
<corners type="EMPTY" priority="101"/>
291+
<!--Fill with 'clb'-->
292+
<fill type="clb" priority="10"/>
293+
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
294+
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
295+
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
296+
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
297+
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
298+
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
299+
</fixed_layout>
300+
<fixed_layout name="vtr_extra_large" width="105" height="105">
301+
<!--Perimeter of 'io' blocks with 'EMPTY' blocks at corners-->
302+
<perimeter type="io" priority="100"/>
303+
<corners type="EMPTY" priority="101"/>
304+
<!--Fill with 'clb'-->
305+
<fill type="clb" priority="10"/>
306+
<!--Column of 'mult_36' with 'EMPTY' blocks wherever a 'mult_36' does not fit. Vertical offset by 1 for perimeter.-->
307+
<col type="mult_36" startx="6" starty="1" repeatx="8" priority="20"/>
308+
<col type="EMPTY" startx="6" repeatx="8" starty="1" priority="19"/>
309+
<!--Column of 'memory' with 'EMPTY' blocks wherever a 'memory' does not fit. Vertical offset by 1 for perimeter.-->
310+
<col type="memory" startx="2" starty="1" repeatx="8" priority="20"/>
311+
<col type="EMPTY" startx="2" repeatx="8" starty="1" priority="19"/>
312+
</fixed_layout>
240313
</layout>
241314
<device>
242315
<!-- VB & JL: Using Ian Kuon's transistor sizing and drive strength data for routing, at 40 nm. Ian used BPTM
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
%include "common/pass_requirements.vpr_status.txt"
2+
%include "timing/pass_requirements.vpr_ap.txt"
3+
%include "timing/pass_requirements.vpr_route_fixed_chan_width.txt"
4+
5+
%include "common/pass_requirements.vtr_benchmarks.txt"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# This collects QoR data that is interesting for the AP flow running on a fixed
2+
# channel width.
3+
4+
vpr_status;output.txt;vpr_status=(.*)
5+
total_wirelength;vpr.out;\s*Total wirelength: (\d+)
6+
# Final critical path delay (least slack): 6.34202 ns, Fmax: 157.678 MHz
7+
crit_path_delay;vpr.out;Critical path: (.*) ns
8+
ap_runtime;vpr.out;Analytical Placement took (.*) seconds
9+
pack_runtime;vpr.out;Packing took (.*) seconds
10+
# TODO: Figure out how to match Placement and not Analytical Placement better.
11+
place_runtime;vpr.out;^(?!.*\bAnalytical\b).*Placement took (.*) seconds
12+
route_runtime;vpr.out;Routing took (.*) seconds
13+
total_runtime;vpr.out;The entire flow of VPR took (.*) seconds
14+
num_clb;vpr.out;Netlist clb blocks:\s*(\d+)
15+

vtr_flow/scripts/python_libs/vtr/task.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ def parse_circuit_constraint_list(
325325
"arch",
326326
"device",
327327
"constraints",
328+
"route_chan_width",
328329
]
329330
)
330331

@@ -792,6 +793,10 @@ def apply_cmd_line_circuit_constraints(cmd, circuit, config):
792793
circuit_vpr_constraints = config.circuit_constraints[circuit]["constraints"]
793794
if circuit_vpr_constraints is not None:
794795
cmd += ["--read_vpr_constraints", circuit_vpr_constraints]
796+
# Check if the circuit has constrained route channel width.
797+
constrained_route_w = config.circuit_constraints[circuit]["route_chan_width"]
798+
if constrained_route_w is not None:
799+
cmd += ["--route_chan_width", constrained_route_w]
795800

796801
def resolve_vtr_source_file(config, filename, base_dir=""):
797802
"""

0 commit comments

Comments
 (0)