From 4df0c9c45aec8c59d372655ad7ad99a3430f908e Mon Sep 17 00:00:00 2001 From: aman26kbm Date: Wed, 7 Jul 2021 16:47:53 -0500 Subject: [PATCH 1/3] Modifications to the power estimation documentation: 1. Fixing the heading hierarchy 2. Changing function name from power_calc_primitive to power_usage_primitive 3. Adding some clarity in the flow steps and what's the name of the output file. 4. Adding example command of run_vtr_flow with -power enabled 5. Adding that the power model doesn't model single-bit adders in logic blocks. --- doc/src/vtr/power_estimation/index.rst | 32 +++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/doc/src/vtr/power_estimation/index.rst b/doc/src/vtr/power_estimation/index.rst index e971e9978d3..9651b66286e 100644 --- a/doc/src/vtr/power_estimation/index.rst +++ b/doc/src/vtr/power_estimation/index.rst @@ -43,6 +43,16 @@ $VTR_ROOT/vtrflow/tech/* See :ref:`power_technology_properties` for information on how to generate an XML file for your own SPICE technology model. +In this mode, the VTR will run ODIN->ABC->ACE->VPR. The ACE stage is additional and specific to this power estimation flow. Using run_vtr_flow.py will automatically run ACE 2.0 to generate activity information and a new BLIF file (see ::ref:`power_ace` for details). + +The final power estimates will be available in file named .power in the result directory. + +Here is an example command: + +.. code-block:: + $VTR_ROOT/vtr_flow/scripts/run_vtr_flow.py ../benchmarks/verilog/diffeq1.v ../arch/timing/k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml -power -cmos_tech ../tech/PTM_45nm/45nm.xml -temp_dir power_try_45nm + + VPR ~~~ @@ -133,7 +143,7 @@ where * ````: Is the activity file to be created. * ````: The new BLIF file. - This will be functionally identical in function to the ABC blif; however, since ABC does not maintain internal node names, a new BLIF must be produced with node names that match the activity file. + This will be functionally identical in function to the ABC blif; however, since ABC does not maintain internal node names, a new BLIF must be produced with node names that match the activity file. This blif file is fed to the subsequent parts of the flow (to VPR). If a user is using run_vtr_flow.py (which will run ACE 2.0 underneath if the options mentioned earlier like -power are used), then the flow will copy this ACE2 generated blif file (.ace.blif) to .pre-vpr.blif and then launch VPR with this new file. User’s may with to use their own activity estimation tool. The produced activity file must contain one line for each net in the BLIF file, in the following format:: @@ -202,7 +212,7 @@ Other methods of estimation: ``specify-size`` -~~~~~~~~~~~~~~~~ +"""""""""""""""" This estimation method provides a detailed transistor level modelling of CLBs, and will provide the most accurate power estimations. For each ``pb_type``, power estimation accounts for the following components (see :numref:`fig_power_sample_block`). @@ -257,13 +267,13 @@ If necessary, the user can seperate a port into multiple ports with different wi For all child ``pb_types``, the algorithm performs a recursive call. Eventually ``pb_types`` will be reached that have no children. These are primitives, such as flip-flops, LUTs, or other hard-blocks. -The power model includes functions to perform transistor-level power estimation for flip-flops and LUTs. +The power model includes functions to perform transistor-level power estimation for flip-flops and LUTs (Note: the power model doesn't, by default, include power estimation for single-bit adders that are commonly found in logic blocks of modern FPGAs). If the user wishes to use a design with other primitive types (memories, multipliers, etc), they must provide an equivalent function. -If the user makes such a function, the ``power_calc_primitive`` function should be modified to call it. +If the user makes such a function, the ``power_usage_primitive`` function should be modified to call it. Alternatively, these blocks can be configured to use higher-level power estimation methods. ``auto-size`` -~~~~~~~~~~~~~ +"""""""""""""""" This estimation method also performs detailed transistor-level modelling. It is almost identical to the ``specify-size`` method described above. The only difference is that the local wire capacitance and buffers are automatically inserted for all pins, when necessary. @@ -274,7 +284,7 @@ This is equivalent to using the ``specify-size`` method with the ``wire_length=a Although not as accurate as user-provided buffer and wire sizes, it is capable of automatically capturing trends in power dissipation as architectures are modified. ``pin-toggle`` -~~~~~~~~~~~~~~ +"""""""""""""""" This method allows users to specify the dynamic power of a block in terms of the energy per toggle (in Joules) of each input, output or clock pin for the ``pb_type``. The static power is provided as an absolute (in Watts). This is done using the following construct: @@ -304,7 +314,7 @@ It is assumed that the power usage specified here includes power of all child `` No further recursive power estimation will be performed. ``C-internal`` -~~~~~~~~~~~~~~ +"""""""""""""""" This method allows the users to specify the dynamic power of a block in terms of the internal capacitance of the block. The activity will be averaged across all of the input pins, and will be supplied with the internal capacitance to the standard equation: @@ -327,7 +337,7 @@ It is assumed that the power usage specified here includes power of all child `` No further recursive power estimation will be performed. ``absolute`` -~~~~~~~~~~~~ +"""""""""""""""" This method is the most basic power estimation method, and allows users to specify both the dynamic and static power of a block as absolute values (in Watts). This is done using the following construct: @@ -345,12 +355,12 @@ It is assumed that the power usage specified here includes power of all child `` No further recursive power estimation will be performed. Global Routing --------------- +~~~~~~~~~~~~~~ Global routing consists of switch boxes and input connection boxes. Switch Boxes -~~~~~~~~~~~~ +"""""""""""""""" Switch boxes are modelled as the following components (:numref:`fig_power_sb`): @@ -389,7 +399,7 @@ The user may override this method by providing the buffer size as shown below: The size is the drive strength of the buffer, relative to a minimum-sized inverter. Input Connection Boxes -~~~~~~~~~~~~~~~~~~~~~~ +"""""""""""""""" Input connection boxes are modelled as the following components (:numref:`fig_power_cb`): From 2655b6719be7d2a2877fd7af8d142181bac31251 Mon Sep 17 00:00:00 2001 From: aman26kbm Date: Wed, 14 Jul 2021 21:14:31 -0500 Subject: [PATCH 2/3] Adding support for multiple input ports on the CLB. This is common in newer arch files. --- vpr/src/power/power_sizing.cpp | 38 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/vpr/src/power/power_sizing.cpp b/vpr/src/power/power_sizing.cpp index db8a9cca2cc..1a31bc492f2 100644 --- a/vpr/src/power/power_sizing.cpp +++ b/vpr/src/power/power_sizing.cpp @@ -79,24 +79,28 @@ static double power_count_transistors_connectionbox() { auto& power_ctx = g_vpr_ctx.power(); auto type = find_most_common_block_type(device_ctx.grid); - VTR_ASSERT(type->pb_graph_head->num_input_ports == 1); - inputs = type->pb_graph_head->num_input_pins[0]; - - /* Buffers from Tracks */ - buffer_size = power_ctx.commonly_used->max_seg_to_IPIN_fanout - * (power_ctx.commonly_used->NMOS_1X_C_d - / power_ctx.commonly_used->INV_1X_C_in) - / power_ctx.arch->logical_effort_factor; - buffer_size = std::max(1.0F, buffer_size); - transistor_cnt += power_ctx.solution_inf.channel_width - * power_count_transistors_buffer(buffer_size); - - /* Muxes to IPINs */ - transistor_cnt += inputs - * power_count_transistors_mux( - power_get_mux_arch(power_ctx.commonly_used->max_IPIN_fanin, - power_ctx.arch->mux_transistor_size)); + //For each port on the most common block, look at the number of + //input pins this port has and estimate the transistor count based + //on the size muxes that drive these input pins. + for (int i=0; i < type->pb_graph_head->num_input_ports; i++) { + inputs = type->pb_graph_head->num_input_pins[i]; + + /* Buffers from Tracks */ + buffer_size = power_ctx.commonly_used->max_seg_to_IPIN_fanout + * (power_ctx.commonly_used->NMOS_1X_C_d + / power_ctx.commonly_used->INV_1X_C_in) + / power_ctx.arch->logical_effort_factor; + buffer_size = std::max(1.0F, buffer_size); + transistor_cnt += power_ctx.solution_inf.channel_width + * power_count_transistors_buffer(buffer_size); + + /* Muxes to IPINs */ + transistor_cnt += inputs + * power_count_transistors_mux( + power_get_mux_arch(power_ctx.commonly_used->max_IPIN_fanin, + power_ctx.arch->mux_transistor_size)); + } return transistor_cnt; } From 26259af847e59da500920de94b3aeeceb4102187 Mon Sep 17 00:00:00 2001 From: aman26kbm Date: Thu, 15 Jul 2021 13:15:47 -0500 Subject: [PATCH 3/3] Formatting changes --- vpr/src/power/power_sizing.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vpr/src/power/power_sizing.cpp b/vpr/src/power/power_sizing.cpp index 1a31bc492f2..de5749a9f7b 100644 --- a/vpr/src/power/power_sizing.cpp +++ b/vpr/src/power/power_sizing.cpp @@ -80,10 +80,10 @@ static double power_count_transistors_connectionbox() { auto type = find_most_common_block_type(device_ctx.grid); - //For each port on the most common block, look at the number of - //input pins this port has and estimate the transistor count based + //For each port on the most common block, look at the number of + //input pins this port has and estimate the transistor count based //on the size muxes that drive these input pins. - for (int i=0; i < type->pb_graph_head->num_input_ports; i++) { + for (int i = 0; i < type->pb_graph_head->num_input_ports; i++) { inputs = type->pb_graph_head->num_input_pins[i]; /* Buffers from Tracks */