diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp index ca5f72772eb..9b334402114 100644 --- a/vpr/src/analytical_place/analytical_placement_flow.cpp +++ b/vpr/src/analytical_place/analytical_placement_flow.cpp @@ -12,6 +12,7 @@ #include "atom_netlist.h" #include "full_legalizer.h" #include "gen_ap_netlist_from_atoms.h" +#include "global_placer.h" #include "globals.h" #include "partial_legalizer.h" #include "partial_placement.h" @@ -58,7 +59,6 @@ static void print_ap_netlist_stats(const APNetlist& netlist) { } void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { - (void)vpr_setup; // Start an overall timer for the Analytical Placement flow. vtr::ScopedStartFinishTimer timer("Analytical Placement"); @@ -79,16 +79,9 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { print_ap_netlist_stats(ap_netlist); // Run the Global Placer - // For now, just runs the solver and partial legalizer 10 times arbitrarily. - PartialPlacement p_placement(ap_netlist); - std::unique_ptr solver = make_analytical_solver(e_analytical_solver::QP_HYBRID, - ap_netlist); - std::unique_ptr legalizer = make_partial_legalizer(e_partial_legalizer::FLOW_BASED, - ap_netlist); - for (size_t i = 0; i < 10; i++) { - solver->solve(i, p_placement); - legalizer->legalize(p_placement); - } + std::unique_ptr global_placer = make_global_placer(e_global_placer::SimPL, + ap_netlist); + PartialPlacement p_placement = global_placer->place(); // Verify that the partial placement is valid before running the full // legalizer. diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp index 3f32255dcb8..933a4d559f1 100644 --- a/vpr/src/analytical_place/full_legalizer.cpp +++ b/vpr/src/analytical_place/full_legalizer.cpp @@ -394,8 +394,6 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist, // FIXME: Allocate and load moveable blocks? // - This may be needed to perform SA. Not needed right now. - - // TODO: Check initial placement legality } void FullLegalizer::legalize(const PartialPlacement& p_placement) { diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp new file mode 100644 index 00000000000..d2fbfe42d11 --- /dev/null +++ b/vpr/src/analytical_place/global_placer.cpp @@ -0,0 +1,134 @@ +/** + * @file + * @author Alex Singer + * @date October 2024 + * @brief The definitions of the global placers used in the AP flow and their + * base class. + */ + +#include "global_placer.h" +#include +#include +#include "analytical_solver.h" +#include "ap_netlist.h" +#include "partial_legalizer.h" +#include "partial_placement.h" +#include "vpr_error.h" +#include "vtr_log.h" +#include "vtr_time.h" + +std::unique_ptr make_global_placer(e_global_placer placer_type, + const APNetlist& netlist) { + // Based on the placer type passed in, build the global placer. + switch (placer_type) { + case e_global_placer::SimPL: + return std::make_unique(netlist); + default: + VPR_FATAL_ERROR(VPR_ERROR_AP, + "Unrecognized global placer type"); + + } +} + +SimPLGlobalPlacer::SimPLGlobalPlacer(const APNetlist& netlist) : GlobalPlacer(netlist) { + // This can be a long method. Good to time this to see how long it takes to + // construct the global placer. + vtr::ScopedStartFinishTimer global_placer_building_timer("Constructing Global Placer"); + // Build the solver. + solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID, + netlist); + // Build the partial legalizer + partial_legalizer_ = make_partial_legalizer(e_partial_legalizer::FLOW_BASED, + netlist); +} + +/** + * @brief Helper method to print the header of the per-iteration status updates + * of the global placer. + */ +static void print_SimPL_status_header() { + VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n"); + VTR_LOG("Iter Lower Bound HPWL Upper Bound HPWL Solver Time Legalizer Time Total Time\n"); + VTR_LOG(" (sec) (sec) (sec)\n"); + VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n"); +} + +/** + * @brief Helper method to print the per-iteration status of the global placer. + */ +static void print_SimPL_status(size_t iteration, + double lb_hpwl, + double ub_hpwl, + float solver_time, + float legalizer_time, + float total_time) { + // Iteration + VTR_LOG("%4zu", iteration); + + // Lower Bound HPWL + VTR_LOG(" %16.2f", lb_hpwl); + + // Upper Bound HPWL + VTR_LOG(" %16.2f", ub_hpwl); + + // Solver runtime + VTR_LOG(" %11.3f", solver_time); + + // Legalizer runtime + VTR_LOG(" %14.3f", legalizer_time); + + // Total runtime + VTR_LOG(" %10.3f", total_time); + + VTR_LOG("\n"); + + fflush(stdout); +} + +PartialPlacement SimPLGlobalPlacer::place() { + // Create a timer to time the entire global placement time. + vtr::ScopedStartFinishTimer global_placer_time("AP Global Placer"); + // Create a timer to keep track of how long the solver and legalizer take. + vtr::Timer runtime_timer; + // Print the status header. + if (log_verbosity_ >= 1) + print_SimPL_status_header(); + // Initialialize the partial placement object. + PartialPlacement p_placement(netlist_); + // Run the global placer. + for (size_t i = 0; i < max_num_iterations_; i++) { + float iter_start_time = runtime_timer.elapsed_sec(); + + // Run the solver. + float solver_start_time = runtime_timer.elapsed_sec(); + solver_->solve(i, p_placement); + float solver_end_time = runtime_timer.elapsed_sec(); + double lb_hpwl = p_placement.get_hpwl(netlist_); + + // Run the legalizer. + float legalizer_start_time = runtime_timer.elapsed_sec(); + partial_legalizer_->legalize(p_placement); + float legalizer_end_time = runtime_timer.elapsed_sec(); + double ub_hpwl = p_placement.get_hpwl(netlist_); + + // Print some stats + if (log_verbosity_ >= 1) { + float iter_end_time = runtime_timer.elapsed_sec(); + print_SimPL_status(i, lb_hpwl, ub_hpwl, + solver_end_time - solver_start_time, + legalizer_end_time - legalizer_start_time, + iter_end_time - iter_start_time); + } + + // Exit condition: If the upper-bound and lower-bound HPWLs are + // sufficiently close together then stop. + double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl; + if (hpwl_relative_gap < target_hpwl_relative_gap_) + break; + } + // Return the placement from the final iteration. + // TODO: investigate saving the best solution found so far. It should be + // cheap to save a copy of the PartialPlacement object. + return p_placement; +} + diff --git a/vpr/src/analytical_place/global_placer.h b/vpr/src/analytical_place/global_placer.h new file mode 100644 index 00000000000..0628b9b0d50 --- /dev/null +++ b/vpr/src/analytical_place/global_placer.h @@ -0,0 +1,143 @@ +/** + * @file + * @author Alex Singer + * @date October 2024 + * @brief The declarations of the Global Placer base class which is used to + * define the functionality of all global placers in the AP flow. + * + * A Global Placer creates a Partial Placement given only the netlist and the + * architecture. It uses analytical techniques (i.e. efficient numerical + * minimization of an objective function of a placement) to find a placement + * that optimizes for objectives subject to some of the constraints of the FPGA + * architecture. + */ + +#pragma once + +#include + +// Forward declarations +class APNetlist; +class AnalyticalSolver; +class PartialPlacement; +class PartialLegalizer; + +/** + * @brief Enumeration of all of the global placers currently implemented in VPR. + */ +enum class e_global_placer { + SimPL // Global placer based on the SimPL paper. +}; + +/** + * @brief The Global Placer base class + * + * This declares the functionality that all Global Placers will use. This + * provides a standard interface for the global placers so they can be used + * interchangably. This makes it very easy to test and compare different global + * placers. + */ +class GlobalPlacer { +public: + virtual ~GlobalPlacer() {} + + /** + * @brief Constructor of the base GlobalPlacer class + * + * @param netlist Netlist of the design at some abstraction level; + * typically this would have some atoms and groups of + * atoms (in a pack pattern). + * @param log_verbosity The verbosity of log messages in the Global + * Placer. + */ + GlobalPlacer(const APNetlist& netlist, int log_verbosity = 1) + : netlist_(netlist), + log_verbosity_(log_verbosity) {} + + /** + * @brief Perform global placement on the given netlist. + * + * The role of a global placer is to try and find a placement for the given + * netlist which optimizes some objective function and is mostly legal. + */ + virtual PartialPlacement place() = 0; + +protected: + + /// @brief The APNetlist the global placer is placing. + const APNetlist& netlist_; + + /// @brief The setting of how verbose the log messages should be in the + /// global placer. Anything larger than zero will display per + /// iteration status messages. + int log_verbosity_; +}; + +/** + * @brief A factory method which creates a Global Placer of the given type. + */ +std::unique_ptr make_global_placer(e_global_placer placer_type, + const APNetlist& netlist); + +/** + * @brief A Global Placer based on the SimPL work for analytical ASIC placement. + * https://doi.org/10.1145/2461256.2461279 + * + * This placement technique uses a solver to generate a placement that optimizes + * over some objective function and is likely very illegal (has many overlapping + * blocks and blocks in the wrong places). This solution represents the "lower- + * bound" on the solution quality. + * + * This technique passes this "lower-bound" solution into a legalizer, which + * tries to find the closest legal solution to the lower-bound solution (by + * spreading out blocks and placing them in legal positions). This often + * destroys the quality of the lower-bound solution, and is considered an + * "upper-bound" on the solution quality. + * + * Each iteration of this global placer, the upper-bound solution is fed into + * the solver as a "hint" to what a legal solution looks like. This allows the + * solver to produce another placement which will make decisions knowing where + * the blocks will end-up in the legal solution. This worstens the quality of + * the lower-bound solution; however, after passing this solution back into + * the legalizer, this will likely improve the quality of the upper-bound + * solution. + * + * Over several iterations the upper-bound and lower-bound solutions will + * approach each other until a good quality, mostly-legal solution is found. + */ +class SimPLGlobalPlacer : public GlobalPlacer { +private: + + /// @brief The maximum number of iterations the global placer can perform. + static constexpr size_t max_num_iterations_ = 100; + + /// @brief The target relative gap between the HPWL of the upper-bound and + /// lower-bound placements. The placer will stop if the difference + /// between the two bounds, normalized to the upper-bound, is smaller + /// than this number. + static constexpr double target_hpwl_relative_gap_ = 0.10; + + /// @brief The solver which generates the lower-bound placement. + std::unique_ptr solver_; + + /// @brief The legalizer which generates the upper-bound placement. + std::unique_ptr partial_legalizer_; + +public: + + /** + * @brief Constructor for the SimPL Global Placer + * + * Constructs the solver and partial legalizer. + */ + SimPLGlobalPlacer(const APNetlist& netlist); + + /** + * @brief Run a SimPL-like global placement algorithm + * + * This iteratively runs the solver and legalizer until a good quality and + * mostly-legal placement is found. + */ + PartialPlacement place() final; +}; + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt index b34357d3497..bdc120a773e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml single_wire.v common 4.25 vpr 70.91 MiB -1 -1 0.18 16276 1 0.39 -1 -1 29812 -1 -1 0 1 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72608 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 70.9 MiB 0.10 8 14 1 6.79088e+06 0 166176. 575.005 0.36 0.00138004 0.00129992 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00122838 0.00119736 -fixed_k6_frac_N8_22nm.xml single_ff.v common 4.68 vpr 71.03 MiB -1 -1 0.20 16236 1 0.39 -1 -1 29696 -1 -1 1 2 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72736 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 71.0 MiB 0.10 20 31 1 6.79088e+06 13472 414966. 1435.87 0.63 0.00135413 0.0012936 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.17 0.00 0.14 -1 -1 0.17 0.00127341 0.00123431 -fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.69 vpr 71.67 MiB -1 -1 0.46 18220 3 0.40 -1 -1 33084 -1 -1 40 99 3 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73388 99 130 240 229 1 247 272 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 71.7 MiB 0.21 32 3122 15 6.79088e+06 2.18288e+06 586450. 2029.24 1.84 0.271358 0.247517 24814 144142 -1 2952 30 711 1121 349988 188928 2.0466 2.0466 -154.346 -2.0466 -0.04337 -0.04337 744469. 2576.02 0.25 0.25 0.22 -1 -1 0.25 0.102379 0.0937273 -fixed_k6_frac_N8_22nm.xml diffeq1.v common 32.17 vpr 74.24 MiB -1 -1 0.75 23104 15 0.61 -1 -1 34204 -1 -1 74 162 0 5 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76024 162 96 817 258 1 797 337 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 74.2 MiB 0.60 60 15916 46 6.79088e+06 2.97693e+06 1.01997e+06 3529.29 24.68 3.56948 3.3772 29998 257685 -1 13617 19 4413 11558 1499556 342325 21.9657 21.9657 -1806.56 -21.9657 0 0 1.27783e+06 4421.56 0.40 0.79 0.44 -1 -1 0.40 0.334496 0.31821 +fixed_k6_frac_N8_22nm.xml single_wire.v common 2.23 vpr 70.88 MiB -1 -1 0.13 15888 1 0.16 -1 -1 29628 -1 -1 0 1 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72576 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 70.9 MiB 0.09 8 14 1 6.79088e+06 0 166176. 575.005 0.37 0.001421 0.00133975 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00128701 0.00125353 +fixed_k6_frac_N8_22nm.xml single_ff.v common 2.68 vpr 70.80 MiB -1 -1 0.14 16336 1 0.16 -1 -1 29640 -1 -1 1 2 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72500 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 70.8 MiB 0.09 20 31 1 6.79088e+06 13472 414966. 1435.87 0.65 0.00147526 0.00140277 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.18 0.00 0.15 -1 -1 0.18 0.00127433 0.00123146 +fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 4.51 vpr 71.77 MiB -1 -1 0.47 18200 3 0.09 -1 -1 33188 -1 -1 41 99 3 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73496 99 130 240 229 1 247 273 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 71.8 MiB 0.23 32 3145 19 6.79088e+06 2.19635e+06 586450. 2029.24 1.54 0.232941 0.21265 24814 144142 -1 2897 11 635 1003 105957 24691 2.0466 2.0466 -155.681 -2.0466 -0.21204 -0.16867 744469. 2576.02 0.27 0.09 0.22 -1 -1 0.27 0.046062 0.042439 +fixed_k6_frac_N8_22nm.xml diffeq1.v common 30.51 vpr 74.25 MiB -1 -1 0.73 23000 15 0.35 -1 -1 34316 -1 -1 65 162 0 5 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76028 162 96 817 258 1 792 328 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 74.2 MiB 0.61 68 16708 27 6.79088e+06 2.85568e+06 1.14541e+06 3963.36 22.39 3.52495 3.33579 31438 289477 -1 14397 19 4042 10567 1489293 328107 22.3059 22.3059 -1909.12 -22.3059 0 0 1.42693e+06 4937.46 0.51 0.78 0.51 -1 -1 0.51 0.316463 0.300694