Merge pull request #2951 from verilog-to-routing/fix_device_util_report

AlexandreSinger · web-flow · commit 332a9b66581c · 2025-03-31T09:48:56.000-04:00
[Pack] Device Util Output
diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp
@@ -21,6 +21,7 @@
 #include "user_place_constraints.h"
 #include "vpr_context.h"
 #include "vpr_types.h"
+#include "stats.h"
 #include "vtr_assert.h"
 #include "vtr_time.h"
 
@@ -188,6 +189,12 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
                                                                         device_ctx.grid);
     full_legalizer->legalize(p_placement);
 
+    // Print the number of resources in netlist and number of resources available in architecture
+    float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
+    print_resource_usage();
+    // Print the device utilization
+    print_device_utilization(target_device_utilization);
+
     // Run the Detailed Placer.
     std::unique_ptr<DetailedPlacer> detailed_placer = make_detailed_placer(ap_opts.detailed_placer_type,
                                                                            g_vpr_ctx.placement().blk_loc_registry(),
diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp
@@ -15,6 +15,7 @@
 #include "vtr_assert.h"
 #include "vtr_math.h"
 #include "vtr_log.h"
+#include "stats.h"
 
 #include "vpr_types.h"
 #include "vpr_error.h"
@@ -755,59 +756,6 @@ static void CheckGrid(const DeviceGrid& grid) {
     }
 }
 
-float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts) {
-    //Record the resources of the grid
-    std::map<t_physical_tile_type_ptr, size_t> grid_resources;
-    for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
-        for (int x = 0; x < (int)grid.width(); ++x) {
-            for (int y = 0; y < (int)grid.height(); ++y) {
-                int width_offset = grid.get_width_offset({x, y, layer_num});
-                int height_offset = grid.get_height_offset({x, y, layer_num});
-                if (width_offset == 0 && height_offset == 0) {
-                    const auto& type = grid.get_physical_type({x, y, layer_num});
-                    ++grid_resources[type];
-                }
-            }
-        }
-    }
-
-    //Determine the area of grid in tile units
-    float grid_area = 0.;
-    for (auto& kv : grid_resources) {
-        t_physical_tile_type_ptr type = kv.first;
-        size_t count = kv.second;
-
-        float type_area = type->width * type->height;
-
-        grid_area += type_area * count;
-    }
-
-    //Determine the area of instances in tile units
-    float instance_area = 0.;
-    for (auto& kv : instance_counts) {
-        if (is_empty_type(kv.first)) {
-            continue;
-        }
-
-        t_physical_tile_type_ptr type = pick_physical_type(kv.first);
-
-        size_t count = kv.second;
-
-        float type_area = type->width * type->height;
-
-        //Instances of multi-capaicty blocks take up less space
-        if (type->capacity != 0) {
-            type_area /= type->capacity;
-        }
-
-        instance_area += type_area * count;
-    }
-
-    float utilization = instance_area / grid_area;
-
-    return utilization;
-}
-
 size_t count_grid_tiles(const DeviceGrid& grid) {
     return grid.get_num_layers() * grid.width() * grid.height();
 }
diff --git a/vpr/src/base/SetupGrid.h b/vpr/src/base/SetupGrid.h
@@ -27,14 +27,6 @@ DeviceGrid create_device_grid(const std::string& layout_name,
                               size_t min_width,
                               size_t min_height);
 
-/**
- * @brief Calculate the device utilization
- *
- * Calculate the device utilization (i.e. fraction of used grid tiles)
- * foor the specified grid and resource requirements
- */
-float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts);
-
 /**
  * @brief Returns the effective size of the device
  *        (size of the bounding box of non-empty grid tiles)
diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp
@@ -448,3 +448,127 @@ int count_netlist_clocks() {
     //Since std::set does not include duplicates, the number of clocks is the size of the set
     return static_cast<int>(clock_names.size());
 }
+
+float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts) {
+    //Record the resources of the grid
+    std::map<t_physical_tile_type_ptr, size_t> grid_resources;
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
+        for (int x = 0; x < (int)grid.width(); ++x) {
+            for (int y = 0; y < (int)grid.height(); ++y) {
+                int width_offset = grid.get_width_offset({x, y, layer_num});
+                int height_offset = grid.get_height_offset({x, y, layer_num});
+                if (width_offset == 0 && height_offset == 0) {
+                    const auto& type = grid.get_physical_type({x, y, layer_num});
+                    ++grid_resources[type];
+                }
+            }
+        }
+    }
+
+    //Determine the area of grid in tile units
+    float grid_area = 0.;
+    for (auto& kv : grid_resources) {
+        t_physical_tile_type_ptr type = kv.first;
+        size_t count = kv.second;
+
+        float type_area = type->width * type->height;
+
+        grid_area += type_area * count;
+    }
+
+    //Determine the area of instances in tile units
+    float instance_area = 0.;
+    for (auto& kv : instance_counts) {
+        if (is_empty_type(kv.first)) {
+            continue;
+        }
+
+        t_physical_tile_type_ptr type = pick_physical_type(kv.first);
+
+        size_t count = kv.second;
+
+        float type_area = type->width * type->height;
+
+        //Instances of multi-capaicty blocks take up less space
+        if (type->capacity != 0) {
+            type_area /= type->capacity;
+        }
+
+        instance_area += type_area * count;
+    }
+
+    float utilization = instance_area / grid_area;
+
+    return utilization;
+}
+
+void print_resource_usage() {
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& clb_netlist = g_vpr_ctx.clustering().clb_nlist;
+    std::map<t_logical_block_type_ptr, size_t> num_type_instances;
+    for (auto blk_id : clb_netlist.blocks()) {
+        num_type_instances[clb_netlist.block_type(blk_id)]++;
+    }
+
+    VTR_LOG("\n");
+    VTR_LOG("Resource usage...\n");
+    for (const auto& type : device_ctx.logical_block_types) {
+        if (is_empty_type(&type)) continue;
+        size_t num_instances = num_type_instances.count(&type) > 0 ? num_type_instances.at(&type) : 0;
+        VTR_LOG("\tNetlist\n\t\t%d\tblocks of type: %s\n",
+                num_instances, type.name.c_str());
+
+        VTR_LOG("\tArchitecture\n");
+        for (const auto equivalent_tile : type.equivalent_tiles) {
+            //get the number of equivalent tile across all layers
+            num_instances = device_ctx.grid.num_instances(equivalent_tile, -1);
+
+            VTR_LOG("\t\t%d\tblocks of type: %s\n",
+                    num_instances, equivalent_tile->name.c_str());
+        }
+    }
+    VTR_LOG("\n");
+}
+
+void print_device_utilization(const float target_device_utilization) {
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& clb_netlist = g_vpr_ctx.clustering().clb_nlist;
+    std::map<t_logical_block_type_ptr, size_t> num_type_instances;
+    for (auto blk_id : clb_netlist.blocks()) {
+        num_type_instances[clb_netlist.block_type(blk_id)]++;
+    }
+
+    float device_utilization = calculate_device_utilization(device_ctx.grid, num_type_instances);
+    VTR_LOG("Device Utilization: %.2f (target %.2f)\n", device_utilization, target_device_utilization);
+    for (const auto& type : device_ctx.physical_tile_types) {
+        if (is_empty_type(&type)) {
+            continue;
+        }
+
+        if (device_ctx.grid.num_instances(&type, -1) != 0) {
+            VTR_LOG("\tPhysical Tile %s:\n", type.name.c_str());
+
+            auto equivalent_sites = get_equivalent_sites_set(&type);
+
+            for (auto logical_block : equivalent_sites) {
+                float util = 0.;
+                size_t num_inst = device_ctx.grid.num_instances(&type, -1);
+                if (num_inst != 0) {
+                    size_t num_netlist_instances = num_type_instances.count(logical_block) > 0 ? num_type_instances.at(logical_block) : 0;
+                    util = float(num_netlist_instances) / num_inst;
+                }
+                VTR_LOG("\tBlock Utilization: %.2f Logical Block: %s\n", util, logical_block->name.c_str());
+            }
+        }
+    }
+    VTR_LOG("\n");
+
+    if (!device_ctx.grid.limiting_resources().empty()) {
+        std::vector<std::string> limiting_block_names;
+        for (auto blk_type : device_ctx.grid.limiting_resources()) {
+            limiting_block_names.emplace_back(blk_type->name);
+        }
+        VTR_LOG("FPGA size limited by block type(s): %s\n", vtr::join(limiting_block_names, " ").c_str());
+        VTR_LOG("\n");
+    }
+}
diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h
@@ -3,6 +3,7 @@
 #include <limits>
 #include <algorithm>
 #include "vpr_types.h"
+#include "netlist.h"
 
 void routing_stats(const Netlist<>& net_list,
                    bool full_stats,
@@ -23,6 +24,25 @@ void get_num_bends_and_length(ParentNetId inet, int* bends, int* length, int* se
 
 int count_netlist_clocks();
 
+/**
+ * @brief Calculate the device utilization
+ *
+ * Calculate the device utilization (i.e. fraction of used grid tiles)
+ * for the specified grid and resource requirements
+ */
+float calculate_device_utilization(const DeviceGrid& grid, const std::map<t_logical_block_type_ptr, size_t>& instance_counts);
+
+/**
+ * @brief Prints the number of resources in the netlist and the number of available resources in the architecture.
+ */
+void print_resource_usage();
+
+/**
+ * @brief Prints the device utilization
+ * @param target_device_utilization The target device utilization set by the user
+ */
+void print_device_utilization(const float target_device_utilization);
+
 /**
  * @brief template functions must be defined in header, or explicitely
  *        instantiated in definition file (defeats the point of template)
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
@@ -384,9 +384,16 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
         }
     }
 
-    // For the time being, we decided to create the flat graph after placement is done. Thus, the is_flat parameter for this function
-    //, since it is called before routing, should be false.
-    vpr_create_device(vpr_setup, arch, false);
+    vpr_create_device(vpr_setup, arch);
+    // If packing is not skipped, cluster netlist contain valid information, so
+    // we can print the resource usage and device utilization
+    if (vpr_setup.PackerOpts.doPacking != STAGE_SKIP) {
+        float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
+        // Print the number of resources in netlist and number of resources available in architecture
+        print_resource_usage();
+        // Print the device utilization
+        print_device_utilization(target_device_utilization);
+    }
 
     // TODO: Placer still assumes that cluster net list is used - graphics can not work with flat routing yet
     vpr_init_graphics(vpr_setup, arch, false);
@@ -449,7 +456,7 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
     return route_status.success();
 }
 
-void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch, bool is_flat) {
+void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch) {
     vtr::ScopedStartFinishTimer timer("Create Device");
     vpr_create_device_grid(vpr_setup, arch);
 
@@ -458,7 +465,9 @@ void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& arch, bool is_flat)
     vpr_setup_noc(vpr_setup, arch);
 
     if (vpr_setup.PlacerOpts.place_chan_width != NO_FIXED_CHANNEL_WIDTH) {
-        vpr_create_rr_graph(vpr_setup, arch, vpr_setup.PlacerOpts.place_chan_width, is_flat);
+        // The RR graph built by this function should contain only the intra-cluster resources.
+        // If the flat router is used, additional resources are added when routing begins.
+        vpr_create_rr_graph(vpr_setup, arch, vpr_setup.PlacerOpts.place_chan_width, false);
     }
 }
 
@@ -498,59 +507,6 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
      */
     size_t num_grid_tiles = count_grid_tiles(device_ctx.grid);
     VTR_LOG("FPGA sized to %zu x %zu: %zu grid tiles (%s)\n", device_ctx.grid.width(), device_ctx.grid.height(), num_grid_tiles, device_ctx.grid.name().c_str());
-
-    VTR_LOG("\n");
-    VTR_LOG("Resource usage...\n");
-    for (const auto& type : device_ctx.logical_block_types) {
-        if (is_empty_type(&type)) continue;
-
-        VTR_LOG("\tNetlist\n\t\t%d\tblocks of type: %s\n",
-                num_type_instances[&type], type.name.c_str());
-
-        VTR_LOG("\tArchitecture\n");
-        for (const auto equivalent_tile : type.equivalent_tiles) {
-            auto num_instances = 0;
-            //get the number of equivalent tile across all layers
-            num_instances = (int)device_ctx.grid.num_instances(equivalent_tile, -1);
-
-            VTR_LOG("\t\t%d\tblocks of type: %s\n",
-                    num_instances, equivalent_tile->name.c_str());
-        }
-    }
-    VTR_LOG("\n");
-
-    float device_utilization = calculate_device_utilization(device_ctx.grid, num_type_instances);
-    VTR_LOG("Device Utilization: %.2f (target %.2f)\n", device_utilization, target_device_utilization);
-    for (const auto& type : device_ctx.physical_tile_types) {
-        if (is_empty_type(&type)) {
-            continue;
-        }
-
-        if (device_ctx.grid.num_instances(&type, -1) != 0) {
-            VTR_LOG("\tPhysical Tile %s:\n", type.name.c_str());
-
-            auto equivalent_sites = get_equivalent_sites_set(&type);
-
-            for (auto logical_block : equivalent_sites) {
-                float util = 0.;
-                size_t num_inst = device_ctx.grid.num_instances(&type, -1);
-                if (num_inst != 0) {
-                    util = float(num_type_instances[logical_block]) / num_inst;
-                }
-                VTR_LOG("\tBlock Utilization: %.2f Logical Block: %s\n", util, logical_block->name.c_str());
-            }
-        }
-    }
-    VTR_LOG("\n");
-
-    if (!device_ctx.grid.limiting_resources().empty()) {
-        std::vector<std::string> limiting_block_names;
-        for (auto blk_type : device_ctx.grid.limiting_resources()) {
-            limiting_block_names.emplace_back(blk_type->name);
-        }
-        VTR_LOG("FPGA size limited by block type(s): %s\n", vtr::join(limiting_block_names, " ").c_str());
-        VTR_LOG("\n");
-    }
 }
 
 void vpr_setup_clock_networks(t_vpr_setup& vpr_setup, const t_arch& Arch) {
diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h
@@ -138,7 +138,7 @@ void vpr_analysis(const Netlist<>& net_list,
 /* Device creating */
 
 ///@brief Create the device (grid + rr graph)
-void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& Arch, bool is_flat);
+void vpr_create_device(t_vpr_setup& vpr_setup, const t_arch& Arch);
 
 ///@brief Create the device grid
 void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch);
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
@@ -18,6 +18,7 @@
 #include "vpr_context.h"
 #include "vpr_error.h"
 #include "vpr_types.h"
+#include "stats.h"
 #include "vtr_assert.h"
 #include "vtr_log.h"
 
diff --git a/vpr/test/test_vpr.cpp b/vpr/test/test_vpr.cpp