diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 5e0c3009766..60217dab2e3 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -12,6 +12,8 @@ #include #include #include +#include "device_grid.h" +#include "flat_placement_types.h" #include "partial_placement.h" #include "ap_netlist.h" #include "vpr_error.h" @@ -36,14 +38,16 @@ #endif // EIGEN_INSTALLED std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, - const APNetlist& netlist) { + const APNetlist& netlist, + const DeviceGrid& device_grid) { // Based on the solver type passed in, build the solver. switch (solver_type) { case e_analytical_solver::QP_HYBRID: #ifdef EIGEN_INSTALLED - return std::make_unique(netlist); + return std::make_unique(netlist, device_grid); #else (void)netlist; + (void)device_grid; VPR_FATAL_ERROR(VPR_ERROR_AP, "QP Hybrid Solver requires the Eigen library"); break; @@ -64,8 +68,11 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist) // row ID from [0, num_moveable_blocks) for each moveable block in the // netlist. num_moveable_blocks_ = 0; + num_fixed_blocks_ = 0; size_t current_row_id = 0; for (APBlockId blk_id : netlist.blocks()) { + if (netlist.block_mobility(blk_id) == APBlockMobility::FIXED) + num_fixed_blocks_++; if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE) continue; APRowId new_row_id = APRowId(current_row_id); @@ -155,10 +162,10 @@ void QPHybridSolver::init_linear_system() { } // Initialize the linear system with zeros. - size_t num_variables = num_moveable_blocks_ + num_star_nodes; - A_sparse = Eigen::SparseMatrix(num_variables, num_variables); - b_x = Eigen::VectorXd::Zero(num_variables); - b_y = Eigen::VectorXd::Zero(num_variables); + num_variables_ = num_moveable_blocks_ + num_star_nodes; + A_sparse = Eigen::SparseMatrix(num_variables_, num_variables_); + b_x = Eigen::VectorXd::Zero(num_variables_); + b_y = Eigen::VectorXd::Zero(num_variables_); // Create a list of triplets that will be used to create the sparse // coefficient matrix. This is the method recommended by Eigen to initialize @@ -254,7 +261,54 @@ void QPHybridSolver::update_linear_system_with_anchors( } } +void QPHybridSolver::init_guesses(const DeviceGrid& device_grid) { + // If the number of fixed blocks is zero, initialized the guesses to the + // center of the device. + if (num_fixed_blocks_ == 0) { + guess_x = Eigen::VectorXd::Constant(num_variables_, device_grid.width() / 2.0); + guess_y = Eigen::VectorXd::Constant(num_variables_, device_grid.height() / 2.0); + return; + } + + // Compute the centroid of all fixed blocks in the netlist. + t_flat_pl_loc centroid({0.0f, 0.0f, 0.0f}); + unsigned num_blks_summed = 0; + for (APBlockId blk_id : netlist_.blocks()) { + // We only get the centroid of fixed blocks since these are the only + // blocks with positions that we know. + if (netlist_.block_mobility(blk_id) != APBlockMobility::FIXED) + continue; + // Get the flat location of the fixed block. + APFixedBlockLoc fixed_blk_loc = netlist_.block_loc(blk_id); + VTR_ASSERT_SAFE(fixed_blk_loc.x != APFixedBlockLoc::UNFIXED_DIM); + VTR_ASSERT_SAFE(fixed_blk_loc.y != APFixedBlockLoc::UNFIXED_DIM); + VTR_ASSERT_SAFE(fixed_blk_loc.layer_num != APFixedBlockLoc::UNFIXED_DIM); + t_flat_pl_loc flat_blk_loc; + flat_blk_loc.x = fixed_blk_loc.x; + flat_blk_loc.y = fixed_blk_loc.y; + flat_blk_loc.layer = fixed_blk_loc.layer_num; + // Accumulate into the centroid. + centroid += flat_blk_loc; + num_blks_summed++; + } + // Divide the sum by the number of fixed blocks. + VTR_ASSERT_SAFE(num_blks_summed == num_fixed_blocks_); + centroid /= static_cast(num_blks_summed); + + // Set the guesses to the centroid location. + guess_x = Eigen::VectorXd::Constant(num_variables_, centroid.x); + guess_y = Eigen::VectorXd::Constant(num_variables_, centroid.y); +} + void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) { + // In the first iteration, if the number of fixed blocks is 0, set the + // placement to be equal to the guess. The solver below will just set the + // solution to the zero vector if we do not set it to the guess directly. + if (iteration == 0 && num_fixed_blocks_ == 0) { + store_solution_into_placement(guess_x, guess_y, p_placement); + return; + } + // Create a temporary linear system which will contain the original linear // system which may be updated to include the anchor points. Eigen::SparseMatrix A_sparse_diff = Eigen::SparseMatrix(A_sparse); @@ -280,14 +334,24 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) { cg.compute(A_sparse_diff); VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at compute!"); // Use the solver to solve for x and y using the constant vectors - // TODO: Use solve with guess to make this faster. Use the previous placement - // as a guess. - Eigen::VectorXd x = cg.solve(b_x_diff); + Eigen::VectorXd x = cg.solveWithGuess(b_x_diff, guess_x); VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_x!"); - Eigen::VectorXd y = cg.solve(b_y_diff); + Eigen::VectorXd y = cg.solveWithGuess(b_y_diff, guess_y); VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!"); // Write the results back into the partial placement object. + store_solution_into_placement(x, y, p_placement); + + // Update the guess. The guess for the next iteration is the solution in + // this iteration. + guess_x = x; + guess_y = y; +} + +void QPHybridSolver::store_solution_into_placement(const Eigen::VectorXd& x_soln, + const Eigen::VectorXd& y_soln, + PartialPlacement& p_placement) { + // NOTE: The first [0, num_moveable_blocks_) rows always represent the // moveable APBlocks. The star nodes always come after and are ignored // in the solution. @@ -296,8 +360,23 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) { APBlockId blk_id = row_id_to_blk_id_[row_id]; VTR_ASSERT_DEBUG(blk_id.is_valid()); VTR_ASSERT_DEBUG(netlist_.block_mobility(blk_id) == APBlockMobility::MOVEABLE); - p_placement.block_x_locs[blk_id] = x[row_id_idx]; - p_placement.block_y_locs[blk_id] = y[row_id_idx]; + // Due to the iterative nature of CG, it is possible for the solver to + // overstep 0 and return a negative number by an incredibly small margin. + // Clamp the number to 0 in this case. + // TODO: Should investigate good bounds on this, the bounds below were + // chosen since any difference higher than 1e-9 would concern me. + double x_pos = x_soln[row_id_idx]; + if (x_pos < 0.0) { + VTR_ASSERT_SAFE(std::abs(x_pos) < negative_soln_tolerance_); + x_pos = 0.0; + } + double y_pos = y_soln[row_id_idx]; + if (y_pos < 0.0) { + VTR_ASSERT_SAFE(std::abs(y_pos) < negative_soln_tolerance_); + y_pos = 0.0; + } + p_placement.block_x_locs[blk_id] = x_pos; + p_placement.block_y_locs[blk_id] = y_pos; } } diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index 02e5bafd8b1..dee0eb4674a 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -9,7 +9,8 @@ #pragma once #include -#include "ap_netlist_fwd.h" +#include "ap_netlist.h" +#include "device_grid.h" #include "vtr_strong_id.h" #include "vtr_vector.h" @@ -98,6 +99,9 @@ class AnalyticalSolver { /// when allocating matrices. size_t num_moveable_blocks_ = 0; + /// @brief The number of fixed blocks in the netlist. + size_t num_fixed_blocks_ = 0; + /// @brief A lookup between a moveable APBlock and its linear ID from /// [0, num_moveable_blocks). Fixed blocks will return an invalid row /// ID. This is useful when knowing which row in the matrix @@ -114,7 +118,8 @@ class AnalyticalSolver { * @brief A factory method which creates an Analytical Solver of the given type. */ std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, - const APNetlist& netlist); + const APNetlist& netlist, + const DeviceGrid& device_grid); // The Eigen library is used to solve matrix equations in the following solvers. // The solver cannot be built if Eigen is not installed. @@ -170,6 +175,14 @@ class QPHybridSolver : public AnalyticalSolver { /// weights to grow slower. static constexpr double anchor_weight_exp_fac_ = 5.0; + /// @brief Due to the iterative nature of Conjugate Gradient method, the + /// solver may overstep 0 to give a slightly negative solution. This + /// is ok, and we can just clamp the position to 0. However, negative + /// values that are too large may be indicative of an issue in the + /// formulation. This value is how negative we tolerate the positions + /// to be. + static constexpr double negative_soln_tolerance_ = 1e-9; + /** * @brief Initializes the linear system of Ax = b_x and Ay = b_y based on * the APNetlist and the fixed APBlock locations. @@ -180,6 +193,14 @@ class QPHybridSolver : public AnalyticalSolver { */ void init_linear_system(); + /** + * @brief Intializes the guesses which will be used in the solver. + * + * The guesses will be used as starting points for the CG solver. The better + * these guesses are, the faster the solver will converge. + */ + void init_guesses(const DeviceGrid& device_grid); + /** * @brief Helper method to update the linear system with anchors to the * current partial placement. @@ -209,6 +230,14 @@ class QPHybridSolver : public AnalyticalSolver { PartialPlacement& p_placement, unsigned iteration); + /** + * @brief Store the x and y solutions in Eigen's vectors into the partial + * placement object. + */ + void store_solution_into_placement(const Eigen::VectorXd& x_soln, + const Eigen::VectorXd& y_soln, + PartialPlacement& p_placement); + // The following variables represent the linear system without any anchor // points. These are filled in the constructor and never modified. // When the anchor-points are taken into consideration, the diagonal of the @@ -224,6 +253,15 @@ class QPHybridSolver : public AnalyticalSolver { Eigen::VectorXd b_x; /// @brief The constant vector in the y dimension for the linear system. Eigen::VectorXd b_y; + /// @brief The number of variables in the solver. This is the sum of the + /// number of moveable blocks in the netlist and the number of star + /// nodes that exist. + size_t num_variables_ = 0; + + /// @brief The current guess for the x positions of the blocks. + Eigen::VectorXd guess_x; + /// @brief The current guess for the y positions of the blocks. + Eigen::VectorXd guess_y; public: /** @@ -231,12 +269,15 @@ class QPHybridSolver : public AnalyticalSolver { * * Initializes internal data and constructs the initial linear system. */ - QPHybridSolver(const APNetlist& netlist) + QPHybridSolver(const APNetlist& netlist, const DeviceGrid& device_grid) : AnalyticalSolver(netlist) { // Initializing the linear system only depends on the netlist and fixed // block locations. Both are provided by the netlist, allowing this to // be initialized in the constructor. init_linear_system(); + + // Initialize the guesses for the first iteration. + init_guesses(device_grid); } /** diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp index 0bcbb8f7052..a89a1fb8ef4 100644 --- a/vpr/src/analytical_place/global_placer.cpp +++ b/vpr/src/analytical_place/global_placer.cpp @@ -77,7 +77,8 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type, // Build the solver. VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the solver...\n"); solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID, - ap_netlist_); + ap_netlist_, + device_grid); // Build the density manager used by the partial legalizer. VTR_LOGV(log_verbosity_ >= 10, "\tBuilding the density manager...\n"); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/config.txt new file mode 100755 index 00000000000..7678dea6b00 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/config.txt @@ -0,0 +1,51 @@ +############################################################################### +# Configuration file for running the MCNC benchmarks through the AP flow. +# +# The AP flow requires that each circuit contains fixed blocks and is fixed +# to a specific device size. The device sizes here were chosen to match the +# device sizes of the default VTR flow. +############################################################################### + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml + +# Add circuits to list to sweep +circuit_list_add=boundtop.v +circuit_list_add=ch_intrinsics.v +circuit_list_add=or1200.v +circuit_list_add=spree.v +circuit_list_add=stereovision3.v + +# Constrain the circuits to their devices +circuit_constraint_list_add=(stereovision3.v, device=vtr_extra_small) +circuit_constraint_list_add=(ch_intrinsics.v, device=vtr_extra_small) +circuit_constraint_list_add=(spree.v, device=vtr_extra_small) +circuit_constraint_list_add=(boundtop.v, device=vtr_extra_small) +circuit_constraint_list_add=(or1200.v, device=vtr_small) + +# Constrain the circuits to their channel widths +# 1.3 * minW +circuit_constraint_list_add=(stereovision3.v, route_chan_width=44) +circuit_constraint_list_add=(ch_intrinsics.v, route_chan_width=52) +circuit_constraint_list_add=(spree.v, route_chan_width=78) +circuit_constraint_list_add=(boundtop.v, route_chan_width=50) +circuit_constraint_list_add=(or1200.v, route_chan_width=118) + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt + +# How to parse QoR info +qor_parse_file=qor_ap_fixed_chan_width.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt + +# Pass the script params while writing the vpr constraints. +script_params=-track_memory_usage -crit_path_router_iterations 100 --analytical_place --route + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/golden_results.txt new file mode 100644 index 00000000000..cc6c8605186 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/no_fixed_blocks/config/golden_results.txt @@ -0,0 +1,6 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time +k6_frac_N10_frac_chain_mem32K_40nm.xml boundtop.v common 15.01 vpr 82.36 MiB -1 -1 9.68 47504 3 0.64 -1 -1 38420 -1 -1 47 196 1 0 success v8.0.0-12284-g51bddabcb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-20T13:31:45 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 84332 196 193 800 0 1 602 437 20 20 400 -1 vtr_extra_small -1 -1 10070 3652 132997 35694 84550 12753 82.4 MiB 1.72 0.01 3.2653 2.38057 -1112.29 -2.38057 2.38057 0.06 0.00193977 0.00167903 0.171366 0.148678 82.4 MiB 1.72 82.4 MiB 0.91 5647 9.52277 1600 2.69814 1586 2429 170107 42789 2.07112e+07 3.08102e+06 1.26946e+06 3173.65 12 38988 203232 -1 2.67914 2.67914 -1215.24 -2.67914 0 0 0.19 -1 -1 82.4 MiB 0.11 0.257849 0.228742 82.4 MiB -1 0.06 +k6_frac_N10_frac_chain_mem32K_40nm.xml ch_intrinsics.v common 2.43 vpr 77.15 MiB -1 -1 0.25 22148 3 0.07 -1 -1 36668 -1 -1 68 99 1 0 success v8.0.0-12284-g51bddabcb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-20T13:31:45 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 79000 99 130 264 0 1 224 298 20 20 400 -1 vtr_extra_small -1 -1 3122 687 77908 28584 34410 14914 77.1 MiB 0.68 0.01 3.02031 1.87385 -123.342 -1.87385 1.87385 0.06 0.00108168 0.000960787 0.0635174 0.0563103 77.1 MiB 0.68 77.1 MiB 0.30 1330 8.06061 400 2.42424 409 668 26732 7858 2.07112e+07 4.21279e+06 1.31074e+06 3276.84 11 39388 210115 -1 1.99317 1.99317 -137.96 -1.99317 0 0 0.19 -1 -1 77.1 MiB 0.04 0.0899682 0.0807575 77.1 MiB -1 0.06 +k6_frac_N10_frac_chain_mem32K_40nm.xml or1200.v common 44.46 vpr 128.95 MiB -1 -1 3.76 64528 8 3.02 -1 -1 44620 -1 -1 248 385 2 1 success v8.0.0-12284-g51bddabcb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-20T13:31:45 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 132040 385 362 3324 0 1 2366 998 30 30 900 -1 vtr_small -1 -1 70396 30779 584243 232629 326108 25506 128.9 MiB 15.88 0.07 12.6272 9.30433 -9557.78 -9.30433 9.30433 0.27 0.00915945 0.00804694 1.17687 1.02821 128.9 MiB 15.88 128.9 MiB 8.51 42328 18.0043 10832 4.60740 9932 32198 1817849 333160 4.8774e+07 1.48577e+07 6.56785e+06 7297.61 14 120772 1084977 -1 9.70184 9.70184 -9948.19 -9.70184 0 0 1.27 -1 -1 128.9 MiB 0.82 1.65611 1.47053 128.9 MiB -1 0.27 +k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 11.31 vpr 85.20 MiB -1 -1 2.11 35716 16 0.42 -1 -1 38820 -1 -1 61 45 3 1 success v8.0.0-12284-g51bddabcb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-20T13:31:45 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 87240 45 32 936 0 1 769 142 20 20 400 -1 vtr_extra_small -1 -1 14278 6632 31222 9013 20166 2043 85.2 MiB 3.44 0.01 14.223 10.6451 -7116.98 -10.6451 10.6451 0.08 0.00250891 0.00209652 0.234093 0.194408 85.2 MiB 3.44 85.2 MiB 2.40 11049 14.4243 2887 3.76893 3469 9247 708024 169456 2.07112e+07 5.32753e+06 1.91495e+06 4787.38 18 44576 305072 -1 10.6885 10.6885 -7341.62 -10.6885 0 0 0.34 -1 -1 85.2 MiB 0.28 0.384243 0.329459 85.2 MiB -1 0.08 +k6_frac_N10_frac_chain_mem32K_40nm.xml stereovision3.v common 2.20 vpr 76.85 MiB -1 -1 0.46 26500 4 0.11 -1 -1 36476 -1 -1 15 11 0 0 success v8.0.0-12284-g51bddabcb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-20T13:31:45 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78692 11 2 140 0 2 87 28 20 20 400 -1 vtr_extra_small -1 -1 1104 346 1498 346 953 199 76.8 MiB 0.42 0.00 3.08719 2.10685 -179.409 -2.10685 1.95087 0.06 0.000442371 0.000365901 0.0198711 0.0167916 76.8 MiB 0.42 76.8 MiB 0.28 569 7.02469 145 1.79012 164 245 5326 1432 2.07112e+07 808410 1.12964e+06 2824.09 7 37792 180905 -1 2.08145 1.90829 -178.671 -2.08145 0 0 0.16 -1 -1 76.8 MiB 0.02 0.0370597 0.0326026 76.8 MiB -1 0.06