Skip to content

Commit 332a9b6

Browse files
Merge pull request #2951 from verilog-to-routing/fix_device_util_report
[Pack] Device Util Output
2 parents 16b900f + 2c604bd commit 332a9b6

File tree

9 files changed

+170
-122
lines changed

9 files changed

+170
-122
lines changed

vpr/src/analytical_place/analytical_placement_flow.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "user_place_constraints.h"
2222
#include "vpr_context.h"
2323
#include "vpr_types.h"
24+
#include "stats.h"
2425
#include "vtr_assert.h"
2526
#include "vtr_time.h"
2627

@@ -188,6 +189,12 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
188189
device_ctx.grid);
189190
full_legalizer->legalize(p_placement);
190191

192+
// Print the number of resources in netlist and number of resources available in architecture
193+
float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
194+
print_resource_usage();
195+
// Print the device utilization
196+
print_device_utilization(target_device_utilization);
197+
191198
// Run the Detailed Placer.
192199
std::unique_ptr<DetailedPlacer> detailed_placer = make_detailed_placer(ap_opts.detailed_placer_type,
193200
g_vpr_ctx.placement().blk_loc_registry(),

vpr/src/base/SetupGrid.cpp

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "vtr_assert.h"
1616
#include "vtr_math.h"
1717
#include "vtr_log.h"
18+
#include "stats.h"
1819

1920
#include "vpr_types.h"
2021
#include "vpr_error.h"
@@ -755,59 +756,6 @@ static void CheckGrid(const DeviceGrid& grid) {
755756
}
756757
}
757758

758-
float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts) {
759-
//Record the resources of the grid
760-
std::map<t_physical_tile_type_ptr, size_t> grid_resources;
761-
for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
762-
for (int x = 0; x < (int)grid.width(); ++x) {
763-
for (int y = 0; y < (int)grid.height(); ++y) {
764-
int width_offset = grid.get_width_offset({x, y, layer_num});
765-
int height_offset = grid.get_height_offset({x, y, layer_num});
766-
if (width_offset == 0 && height_offset == 0) {
767-
const auto& type = grid.get_physical_type({x, y, layer_num});
768-
++grid_resources[type];
769-
}
770-
}
771-
}
772-
}
773-
774-
//Determine the area of grid in tile units
775-
float grid_area = 0.;
776-
for (auto& kv : grid_resources) {
777-
t_physical_tile_type_ptr type = kv.first;
778-
size_t count = kv.second;
779-
780-
float type_area = type->width * type->height;
781-
782-
grid_area += type_area * count;
783-
}
784-
785-
//Determine the area of instances in tile units
786-
float instance_area = 0.;
787-
for (auto& kv : instance_counts) {
788-
if (is_empty_type(kv.first)) {
789-
continue;
790-
}
791-
792-
t_physical_tile_type_ptr type = pick_physical_type(kv.first);
793-
794-
size_t count = kv.second;
795-
796-
float type_area = type->width * type->height;
797-
798-
//Instances of multi-capaicty blocks take up less space
799-
if (type->capacity != 0) {
800-
type_area /= type->capacity;
801-
}
802-
803-
instance_area += type_area * count;
804-
}
805-
806-
float utilization = instance_area / grid_area;
807-
808-
return utilization;
809-
}
810-
811759
size_t count_grid_tiles(const DeviceGrid& grid) {
812760
return grid.get_num_layers() * grid.width() * grid.height();
813761
}

vpr/src/base/SetupGrid.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,6 @@ DeviceGrid create_device_grid(const std::string& layout_name,
2727
size_t min_width,
2828
size_t min_height);
2929

30-
/**
31-
* @brief Calculate the device utilization
32-
*
33-
* Calculate the device utilization (i.e. fraction of used grid tiles)
34-
* foor the specified grid and resource requirements
35-
*/
36-
float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts);
37-
3830
/**
3931
* @brief Returns the effective size of the device
4032
* (size of the bounding box of non-empty grid tiles)

vpr/src/base/stats.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,3 +448,127 @@ int count_netlist_clocks() {
448448
//Since std::set does not include duplicates, the number of clocks is the size of the set
449449
return static_cast<int>(clock_names.size());
450450
}
451+
452+
float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts) {
453+
//Record the resources of the grid
454+
std::map<t_physical_tile_type_ptr, size_t> grid_resources;
455+
for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
456+
for (int x = 0; x < (int)grid.width(); ++x) {
457+
for (int y = 0; y < (int)grid.height(); ++y) {
458+
int width_offset = grid.get_width_offset({x, y, layer_num});
459+
int height_offset = grid.get_height_offset({x, y, layer_num});
460+
if (width_offset == 0 && height_offset == 0) {
461+
const auto& type = grid.get_physical_type({x, y, layer_num});
462+
++grid_resources[type];
463+
}
464+
}
465+
}
466+
}
467+
468+
//Determine the area of grid in tile units
469+
float grid_area = 0.;
470+
for (auto& kv : grid_resources) {
471+
t_physical_tile_type_ptr type = kv.first;
472+
size_t count = kv.second;
473+
474+
float type_area = type->width * type->height;
475+
476+
grid_area += type_area * count;
477+
}
478+
479+
//Determine the area of instances in tile units
480+
float instance_area = 0.;
481+
for (auto& kv : instance_counts) {
482+
if (is_empty_type(kv.first)) {
483+
continue;
484+
}
485+
486+
t_physical_tile_type_ptr type = pick_physical_type(kv.first);
487+
488+
size_t count = kv.second;
489+
490+
float type_area = type->width * type->height;
491+
492+
//Instances of multi-capaicty blocks take up less space
493+
if (type->capacity != 0) {
494+
type_area /= type->capacity;
495+
}
496+
497+
instance_area += type_area * count;
498+
}
499+
500+
float utilization = instance_area / grid_area;
501+
502+
return utilization;
503+
}
504+
505+
void print_resource_usage() {
506+
auto& device_ctx = g_vpr_ctx.device();
507+
const auto& clb_netlist = g_vpr_ctx.clustering().clb_nlist;
508+
std::map<t_logical_block_type_ptr, size_t> num_type_instances;
509+
for (auto blk_id : clb_netlist.blocks()) {
510+
num_type_instances[clb_netlist.block_type(blk_id)]++;
511+
}
512+
513+
VTR_LOG("\n");
514+
VTR_LOG("Resource usage...\n");
515+
for (const auto& type : device_ctx.logical_block_types) {
516+
if (is_empty_type(&type)) continue;
517+
size_t num_instances = num_type_instances.count(&type) > 0 ? num_type_instances.at(&type) : 0;
518+
VTR_LOG("\tNetlist\n\t\t%d\tblocks of type: %s\n",
519+
num_instances, type.name.c_str());
520+
521+
VTR_LOG("\tArchitecture\n");
522+
for (const auto equivalent_tile : type.equivalent_tiles) {
523+
//get the number of equivalent tile across all layers
524+
num_instances = device_ctx.grid.num_instances(equivalent_tile, -1);
525+
526+
VTR_LOG("\t\t%d\tblocks of type: %s\n",
527+
num_instances, equivalent_tile->name.c_str());
528+
}
529+
}
530+
VTR_LOG("\n");
531+
}
532+
533+
void print_device_utilization(const float target_device_utilization) {
534+
auto& device_ctx = g_vpr_ctx.device();
535+
const auto& clb_netlist = g_vpr_ctx.clustering().clb_nlist;
536+
std::map<t_logical_block_type_ptr, size_t> num_type_instances;
537+
for (auto blk_id : clb_netlist.blocks()) {
538+
num_type_instances[clb_netlist.block_type(blk_id)]++;
539+
}
540+
541+
float device_utilization = calculate_device_utilization(device_ctx.grid, num_type_instances);
542+
VTR_LOG("Device Utilization: %.2f (target %.2f)\n", device_utilization, target_device_utilization);
543+
for (const auto& type : device_ctx.physical_tile_types) {
544+
if (is_empty_type(&type)) {
545+
continue;
546+
}
547+
548+
if (device_ctx.grid.num_instances(&type, -1) != 0) {
549+
VTR_LOG("\tPhysical Tile %s:\n", type.name.c_str());
550+
551+
auto equivalent_sites = get_equivalent_sites_set(&type);
552+
553+
for (auto logical_block : equivalent_sites) {
554+
float util = 0.;
555+
size_t num_inst = device_ctx.grid.num_instances(&type, -1);
556+
if (num_inst != 0) {
557+
size_t num_netlist_instances = num_type_instances.count(logical_block) > 0 ? num_type_instances.at(logical_block) : 0;
558+
util = float(num_netlist_instances) / num_inst;
559+
}
560+
VTR_LOG("\tBlock Utilization: %.2f Logical Block: %s\n", util, logical_block->name.c_str());
561+
}
562+
}
563+
}
564+
VTR_LOG("\n");
565+
566+
if (!device_ctx.grid.limiting_resources().empty()) {
567+
std::vector<std::string> limiting_block_names;
568+
for (auto blk_type : device_ctx.grid.limiting_resources()) {
569+
limiting_block_names.emplace_back(blk_type->name);
570+
}
571+
VTR_LOG("FPGA size limited by block type(s): %s\n", vtr::join(limiting_block_names, " ").c_str());
572+
VTR_LOG("\n");
573+
}
574+
}

vpr/src/base/stats.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <limits>
44
#include <algorithm>
55
#include "vpr_types.h"
6+
#include "netlist.h"
67

78
void routing_stats(const Netlist<>& net_list,
89
bool full_stats,
@@ -23,6 +24,25 @@ void get_num_bends_and_length(ParentNetId inet, int* bends, int* length, int* se
2324

2425
int count_netlist_clocks();
2526

27+
/**
28+
* @brief Calculate the device utilization
29+
*
30+
* Calculate the device utilization (i.e. fraction of used grid tiles)
31+
* for the specified grid and resource requirements
32+
*/
33+
float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts);
34+
35+
/**
36+
* @brief Prints the number of resources in the netlist and the number of available resources in the architecture.
37+
*/
38+
void print_resource_usage();
39+
40+
/**
41+
* @brief Prints the device utilization
42+
* @param target_device_utilization The target device utilization set by the user
43+
*/
44+
void print_device_utilization(const float target_device_utilization);
45+
2646
/**
2747
* @brief template functions must be defined in header, or explicitely
2848
* instantiated in definition file (defeats the point of template)

vpr/src/base/vpr_api.cpp

Lines changed: 14 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,16 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
384384
}
385385
}
386386

387-
// For the time being, we decided to create the flat graph after placement is done. Thus, the is_flat parameter for this function
388-
//, since it is called before routing, should be false.
389-
vpr_create_device(vpr_setup, arch, false);
387+
vpr_create_device(vpr_setup, arch);
388+
// If packing is not skipped, cluster netlist contain valid information, so
389+
// we can print the resource usage and device utilization
390+
if (vpr_setup.PackerOpts.doPacking != STAGE_SKIP) {
391+
float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
392+
// Print the number of resources in netlist and number of resources available in architecture
393+
print_resource_usage();
394+
// Print the device utilization
395+
print_device_utilization(target_device_utilization);
396+
}
390397

391398
// TODO: Placer still assumes that cluster net list is used - graphics can not work with flat routing yet
392399
vpr_init_graphics(vpr_setup, arch, false);
@@ -449,7 +456,7 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
449456
return route_status.success();
450457
}
451458

452-
void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch, bool is_flat) {
459+
void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch) {
453460
vtr::ScopedStartFinishTimer timer("Create Device");
454461
vpr_create_device_grid(vpr_setup, arch);
455462

@@ -458,7 +465,9 @@ void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch, bool is_flat)
458465
vpr_setup_noc(vpr_setup, arch);
459466

460467
if (vpr_setup.PlacerOpts.place_chan_width != NO_FIXED_CHANNEL_WIDTH) {
461-
vpr_create_rr_graph(vpr_setup, arch, vpr_setup.PlacerOpts.place_chan_width, is_flat);
468+
// The RR graph built by this function should contain only the intra-cluster resources.
469+
// If the flat router is used, additional resources are added when routing begins.
470+
vpr_create_rr_graph(vpr_setup, arch, vpr_setup.PlacerOpts.place_chan_width, false);
462471
}
463472
}
464473

@@ -498,59 +507,6 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
498507
*/
499508
size_t num_grid_tiles = count_grid_tiles(device_ctx.grid);
500509
VTR_LOG("FPGA sized to %zu x %zu: %zu grid tiles (%s)\n", device_ctx.grid.width(), device_ctx.grid.height(), num_grid_tiles, device_ctx.grid.name().c_str());
501-
502-
VTR_LOG("\n");
503-
VTR_LOG("Resource usage...\n");
504-
for (const auto& type : device_ctx.logical_block_types) {
505-
if (is_empty_type(&type)) continue;
506-
507-
VTR_LOG("\tNetlist\n\t\t%d\tblocks of type: %s\n",
508-
num_type_instances[&type], type.name.c_str());
509-
510-
VTR_LOG("\tArchitecture\n");
511-
for (const auto equivalent_tile : type.equivalent_tiles) {
512-
auto num_instances = 0;
513-
//get the number of equivalent tile across all layers
514-
num_instances = (int)device_ctx.grid.num_instances(equivalent_tile, -1);
515-
516-
VTR_LOG("\t\t%d\tblocks of type: %s\n",
517-
num_instances, equivalent_tile->name.c_str());
518-
}
519-
}
520-
VTR_LOG("\n");
521-
522-
float device_utilization = calculate_device_utilization(device_ctx.grid, num_type_instances);
523-
VTR_LOG("Device Utilization: %.2f (target %.2f)\n", device_utilization, target_device_utilization);
524-
for (const auto& type : device_ctx.physical_tile_types) {
525-
if (is_empty_type(&type)) {
526-
continue;
527-
}
528-
529-
if (device_ctx.grid.num_instances(&type, -1) != 0) {
530-
VTR_LOG("\tPhysical Tile %s:\n", type.name.c_str());
531-
532-
auto equivalent_sites = get_equivalent_sites_set(&type);
533-
534-
for (auto logical_block : equivalent_sites) {
535-
float util = 0.;
536-
size_t num_inst = device_ctx.grid.num_instances(&type, -1);
537-
if (num_inst != 0) {
538-
util = float(num_type_instances[logical_block]) / num_inst;
539-
}
540-
VTR_LOG("\tBlock Utilization: %.2f Logical Block: %s\n", util, logical_block->name.c_str());
541-
}
542-
}
543-
}
544-
VTR_LOG("\n");
545-
546-
if (!device_ctx.grid.limiting_resources().empty()) {
547-
std::vector<std::string> limiting_block_names;
548-
for (auto blk_type : device_ctx.grid.limiting_resources()) {
549-
limiting_block_names.emplace_back(blk_type->name);
550-
}
551-
VTR_LOG("FPGA size limited by block type(s): %s\n", vtr::join(limiting_block_names, " ").c_str());
552-
VTR_LOG("\n");
553-
}
554510
}
555511

556512
void vpr_setup_clock_networks(t_vpr_setup& vpr_setup, const t_arch& Arch) {

vpr/src/base/vpr_api.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void vpr_analysis(const Netlist<>& net_list,
138138
/* Device creating */
139139

140140
///@brief Create the device (grid + rr graph)
141-
void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& Arch, bool is_flat);
141+
void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& Arch);
142142

143143
///@brief Create the device grid
144144
void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch);

vpr/src/pack/pack.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "vpr_context.h"
1919
#include "vpr_error.h"
2020
#include "vpr_types.h"
21+
#include "stats.h"
2122
#include "vtr_assert.h"
2223
#include "vtr_log.h"
2324

0 commit comments

Comments
 (0)