Add a pre-pack timing report.

litghost · kmurray · commit 4219024c97d2 · 2019-06-18T15:33:45.000-04:00
This timing report provides insight into how the packer criticality
values were computed.

Signed-off-by: Keith Rothman &lt;537074+litghost@users.noreply.github.com&gt;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
@@ -498,8 +498,10 @@ bool vpr_pack(t_vpr_setup& vpr_setup, const t_arch& arch) {
                                  + wtoi_switch_del); /* multiply by 4 to get a more conservative estimate */
     }
 
-    return try_pack(&vpr_setup.PackerOpts, &arch, vpr_setup.user_models,
-                    vpr_setup.library_models, inter_cluster_delay, vpr_setup.PackerRRGraph);
+    return try_pack(&vpr_setup.PackerOpts, &vpr_setup.AnalysisOpts,
+                    &arch, vpr_setup.user_models,
+                    vpr_setup.library_models, inter_cluster_delay,
+                    vpr_setup.PackerRRGraph);
 }
 
 void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) {
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
@@ -64,9 +64,12 @@ using namespace std;
 #include "lb_type_rr_graph.h"
 
 #include "timing_info.h"
+#include "timing_reports.h"
 #include "PreClusterDelayCalculator.h"
+#include "PreClusterTimingGraphResolver.h"
 #include "tatum/echo_writer.hpp"
 #include "tatum/report/graphviz_dot_writer.hpp"
+#include "tatum/TimingReporter.hpp"
 
 #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30 /* This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */
 #define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10       /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE */
@@ -339,6 +342,7 @@ static t_pb_graph_pin* get_driver_pb_graph_pin(const t_pb* driver_pb, const Atom
 /*****************************************/
 /*globally accessible function*/
 std::map<t_type_ptr, size_t> do_clustering(const t_packer_opts& packer_opts,
+                                           const t_analysis_opts& analysis_opts,
                                            const t_arch* arch,
                                            t_pack_molecule* molecule_head,
                                            int num_models,
@@ -467,6 +471,21 @@ std::map<t_type_ptr, size_t> do_clustering(const t_packer_opts& packer_opts,
                               *timing_ctx.graph, *timing_ctx.constraints, *clustering_delay_calc, timing_info->analyzer());
         }
 
+        {
+            auto& timing_ctx = g_vpr_ctx.timing();
+            PreClusterTimingGraphResolver resolver(atom_ctx.nlist,
+                                                   atom_ctx.lookup, *timing_ctx.graph, *clustering_delay_calc);
+            resolver.set_detail_level(analysis_opts.timing_report_detail);
+
+            tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph,
+                                                  *timing_ctx.constraints);
+
+            timing_reporter.report_timing_setup(
+                "pre_pack.report_timing.setup.rpt",
+                *timing_info->setup_analyzer(),
+                analysis_opts.timing_report_npaths);
+        }
+
         //Calculate true criticalities of each block
         for (AtomBlockId blk : atom_ctx.nlist.blocks()) {
             for (AtomPinId in_pin : atom_ctx.nlist.block_input_pins(blk)) {
diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h
@@ -10,6 +10,7 @@
 #include "atom_netlist_fwd.h"
 
 std::map<t_type_ptr, size_t> do_clustering(const t_packer_opts& packer_opts,
+                                           const t_analysis_opts& analysis_opts,
                                            const t_arch* arch,
                                            t_pack_molecule* molecule_head,
                                            int num_models,
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
@@ -36,6 +36,7 @@ static t_pack_high_fanout_thresholds parse_high_fanout_thresholds(std::vector<st
 static std::string high_fanout_thresholds_to_string(const t_pack_high_fanout_thresholds& hf_thresholds);
 
 bool try_pack(t_packer_opts* packer_opts,
+              const t_analysis_opts* analysis_opts,
               const t_arch* arch,
               const t_model* user_models,
               const t_model* library_models,
@@ -121,15 +122,18 @@ bool try_pack(t_packer_opts* packer_opts,
 
     while (true) {
         //Cluster the netlist
-        auto num_type_instances = do_clustering(*packer_opts, arch, list_of_pack_molecules, num_models,
-                                                is_clock,
-                                                atom_molecules,
-                                                expected_lowest_cost_pb_gnode,
-                                                allow_unrelated_clustering,
-                                                balance_block_type_util,
-                                                lb_type_rr_graphs,
-                                                target_external_pin_util,
-                                                high_fanout_thresholds);
+        auto num_type_instances = do_clustering(
+            *packer_opts,
+            *analysis_opts,
+            arch, list_of_pack_molecules, num_models,
+            is_clock,
+            atom_molecules,
+            expected_lowest_cost_pb_gnode,
+            allow_unrelated_clustering,
+            balance_block_type_util,
+            lb_type_rr_graphs,
+            target_external_pin_util,
+            high_fanout_thresholds);
 
         //Try to size/find a device
         bool fits_on_device = try_size_device_grid(*arch, num_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h
@@ -2,6 +2,7 @@
 #define PACK_H
 
 bool try_pack(t_packer_opts* packer_opts,
+              const t_analysis_opts* analysis_opts,
               const t_arch* arch,
               const t_model* user_models,
               const t_model* library_models,
diff --git a/vpr/src/timing/PreClusterTimingGraphResolver.cpp b/vpr/src/timing/PreClusterTimingGraphResolver.cpp
@@ -0,0 +1,121 @@
+#include "PreClusterTimingGraphResolver.h"
+#include "atom_netlist.h"
+#include "atom_lookup.h"
+
+PreClusterTimingGraphResolver::PreClusterTimingGraphResolver(
+    const AtomNetlist& netlist,
+    const AtomLookup& netlist_lookup,
+    const tatum::TimingGraph& timing_graph,
+    const tatum::DelayCalculator& delay_calc)
+    : netlist_(netlist)
+    , netlist_lookup_(netlist_lookup)
+    , timing_graph_(timing_graph)
+    , delay_calc_(delay_calc) {}
+
+std::string PreClusterTimingGraphResolver::node_name(tatum::NodeId node) const {
+    AtomPinId pin = netlist_lookup_.tnode_atom_pin(node);
+
+    return netlist_.pin_name(pin);
+}
+
+std::string PreClusterTimingGraphResolver::node_type_name(tatum::NodeId node) const {
+    AtomPinId pin = netlist_lookup_.tnode_atom_pin(node);
+    AtomBlockId blk = netlist_.pin_block(pin);
+
+    std::string name = netlist_.block_model(blk)->name;
+
+    if (detail_level() == e_timing_report_detail::AGGREGATED) {
+        //Annotate primitive grid location, if known
+        auto& atom_ctx = g_vpr_ctx.atom();
+        auto& place_ctx = g_vpr_ctx.placement();
+        ClusterBlockId cb = atom_ctx.lookup.atom_clb(blk);
+        if (cb && place_ctx.block_locs.count(cb)) {
+            int x = place_ctx.block_locs[cb].loc.x;
+            int y = place_ctx.block_locs[cb].loc.y;
+            name += " at (" + std::to_string(x) + "," + std::to_string(y) + ")";
+        }
+    }
+
+    return name;
+}
+
+tatum::EdgeDelayBreakdown PreClusterTimingGraphResolver::edge_delay_breakdown(tatum::EdgeId edge, tatum::DelayType tatum_delay_type) const {
+    tatum::EdgeDelayBreakdown delay_breakdown;
+
+    if (edge && detail_level() == e_timing_report_detail::AGGREGATED) {
+        auto edge_type = timing_graph_.edge_type(edge);
+
+        DelayType delay_type; //TODO: should unify vpr/tatum DelayType
+        if (tatum_delay_type == tatum::DelayType::MAX) {
+            delay_type = DelayType::MAX;
+        } else {
+            VTR_ASSERT(tatum_delay_type == tatum::DelayType::MIN);
+            delay_type = DelayType::MIN;
+        }
+
+        if (edge_type == tatum::EdgeType::INTERCONNECT) {
+            tatum::DelayComponent inter_cluster;
+            inter_cluster.type_name = "inter-cluster net delay estimate";
+            inter_cluster.delay = delay_calc_.max_edge_delay(timing_graph_, edge);
+            delay_breakdown.components.push_back(inter_cluster);
+        } else {
+            //Primtiive edge
+            //
+            tatum::DelayComponent component;
+
+            tatum::NodeId node = timing_graph_.edge_sink_node(edge);
+
+            AtomPinId atom_pin = netlist_lookup_.tnode_atom_pin(node);
+            AtomBlockId atom_blk = netlist_.pin_block(atom_pin);
+
+            //component.inst_name = netlist_.block_name(atom_blk);
+
+            component.type_name = "primitive '";
+            component.type_name += netlist_.block_model(atom_blk)->name;
+            component.type_name += "'";
+
+            if (edge_type == tatum::EdgeType::PRIMITIVE_COMBINATIONAL) {
+                component.type_name += " combinational delay";
+
+                if (delay_type == DelayType::MAX) {
+                    component.delay = delay_calc_.max_edge_delay(timing_graph_, edge);
+                } else {
+                    VTR_ASSERT(delay_type == DelayType::MIN);
+                    component.delay = delay_calc_.min_edge_delay(timing_graph_, edge);
+                }
+            } else if (edge_type == tatum::EdgeType::PRIMITIVE_CLOCK_LAUNCH) {
+                if (delay_type == DelayType::MAX) {
+                    component.type_name += " Tcq_max";
+                    component.delay = delay_calc_.max_edge_delay(timing_graph_, edge);
+                } else {
+                    VTR_ASSERT(delay_type == DelayType::MIN);
+                    component.type_name += " Tcq_min";
+                    component.delay = delay_calc_.min_edge_delay(timing_graph_, edge);
+                }
+
+            } else {
+                VTR_ASSERT(edge_type == tatum::EdgeType::PRIMITIVE_CLOCK_CAPTURE);
+
+                if (delay_type == DelayType::MAX) {
+                    component.type_name += " Tsu";
+                    component.delay = delay_calc_.setup_time(timing_graph_, edge);
+                } else {
+                    component.type_name += " Thld";
+                    component.delay = delay_calc_.hold_time(timing_graph_, edge);
+                }
+            }
+
+            delay_breakdown.components.push_back(component);
+        }
+    }
+
+    return delay_breakdown;
+}
+
+e_timing_report_detail PreClusterTimingGraphResolver::detail_level() const {
+    return detail_level_;
+}
+
+void PreClusterTimingGraphResolver::set_detail_level(e_timing_report_detail report_detail) {
+    detail_level_ = report_detail;
+}
diff --git a/vpr/src/timing/PreClusterTimingGraphResolver.h b/vpr/src/timing/PreClusterTimingGraphResolver.h
@@ -0,0 +1,34 @@
+#ifndef VPR_PRE_CLUSTER_TIMING_GRAPH_RESOLVER_H_
+#define VPR_PRE_CLUSTER_TIMING_GRAPH_RESOLVER_H_
+
+#include "tatum/TimingGraphNameResolver.hpp"
+#include "atom_netlist_fwd.h"
+#include "atom_lookup.h"
+#include "AnalysisDelayCalculator.h"
+
+class PreClusterTimingGraphResolver : public tatum::TimingGraphNameResolver {
+  public:
+    PreClusterTimingGraphResolver(
+        const AtomNetlist& netlist,
+        const AtomLookup& netlist_lookup,
+        const tatum::TimingGraph& timing_graph,
+        const tatum::DelayCalculator& delay_calc);
+
+    std::string node_name(tatum::NodeId node) const override;
+    std::string node_type_name(tatum::NodeId node) const override;
+
+    tatum::EdgeDelayBreakdown edge_delay_breakdown(tatum::EdgeId edge, tatum::DelayType delay_type) const override;
+
+    void set_detail_level(e_timing_report_detail report_detail);
+
+  private:
+    e_timing_report_detail detail_level() const;
+
+    const AtomNetlist& netlist_;
+    const AtomLookup& netlist_lookup_;
+    const tatum::TimingGraph& timing_graph_;
+    const tatum::DelayCalculator& delay_calc_;
+    e_timing_report_detail detail_level_ = e_timing_report_detail::NETLIST;
+};
+
+#endif /* VPR_PRE_CLUSTER_TIMING_GRAPH_RESOLVER_H_ */

Original file line number	Diff line number	Diff line change
`@@ -498,8 +498,10 @@ bool vpr_pack(t_vpr_setup& vpr_setup, const t_arch& arch) {`
`498`	`498`	`+ wtoi_switch_del); /* multiply by 4 to get a more conservative estimate */`
`499`	`499`	`}`
`500`	`500`
`501`		`- return try_pack(&vpr_setup.PackerOpts, &arch, vpr_setup.user_models,`
`502`		`- vpr_setup.library_models, inter_cluster_delay, vpr_setup.PackerRRGraph);`
	`501`	`+ return try_pack(&vpr_setup.PackerOpts, &vpr_setup.AnalysisOpts,`
	`502`	`+ &arch, vpr_setup.user_models,`
	`503`	`+ vpr_setup.library_models, inter_cluster_delay,`
	`504`	`+ vpr_setup.PackerRRGraph);`
`503`	`505`	`}`
`504`	`506`
`505`	`507`	`void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) {`