diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index d085caa7d7d..5e0c3009766 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -236,41 +236,17 @@ void QPHybridSolver::init_linear_system() { A_sparse.setFromTriplets(tripletList.begin(), tripletList.end()); } -/** - * @brief Helper method to update the linear system with anchors to the current - * partial placement. - * - * For each moveable block (with row = i) in the netlist: - * A[i][i] = A[i][i] + coeff_pseudo_anchor; - * b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor; - * Where coeff_pseudo_anchor grows with each iteration. - * - * This is basically a fast way of adding a connection between all moveable - * blocks in the netlist and their target fixed placement location. - * - * See add_connection_to_system. - * - * @param A_sparse_diff The ceofficient matrix to update. - * @param b_x_diff The x-dimension constant vector to update. - * @param b_y_diff The y-dimension constant vector to update. - * @param p_placement The location the moveable blocks should be anchored - * to. - * @param num_moveable_blocks The number of moveable blocks in the netlist. - * @param row_id_to_blk_id Lookup for the row id from the APBlock Id. - * @param iteration The current iteration of the Global Placer. - */ -static inline void update_linear_system_with_anchors(Eigen::SparseMatrix& A_sparse_diff, - Eigen::VectorXd& b_x_diff, - Eigen::VectorXd& b_y_diff, - PartialPlacement& p_placement, - size_t num_moveable_blocks, - vtr::vector row_id_to_blk_id, - unsigned iteration) { +void QPHybridSolver::update_linear_system_with_anchors( + Eigen::SparseMatrix& A_sparse_diff, + Eigen::VectorXd& b_x_diff, + Eigen::VectorXd& b_y_diff, + PartialPlacement& p_placement, + unsigned iteration) { // Anchor weights grow exponentially with iteration. - double coeff_pseudo_anchor = 0.01 * std::exp((double)iteration / 5); - for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks; row_id_idx++) { + double coeff_pseudo_anchor = anchor_weight_mult_ * std::exp((double)iteration / anchor_weight_exp_fac_); + for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { APRowId row_id = APRowId(row_id_idx); - APBlockId blk_id = row_id_to_blk_id[row_id]; + APBlockId blk_id = row_id_to_blk_id_[row_id]; double pseudo_w = coeff_pseudo_anchor; A_sparse_diff.coeffRef(row_id_idx, row_id_idx) += pseudo_w; b_x_diff(row_id_idx) += pseudo_w * p_placement.block_x_locs[blk_id]; @@ -289,8 +265,7 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement& p_placement) { // anchor-points (fixed block positions). if (iteration != 0) { update_linear_system_with_anchors(A_sparse_diff, b_x_diff, b_y_diff, - p_placement, num_moveable_blocks_, - row_id_to_blk_id_, iteration); + p_placement, iteration); } // Verify that the constant vectors are valid. VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!"); diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index bab26ab3b9d..02e5bafd8b1 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -155,6 +155,21 @@ class QPHybridSolver : public AnalyticalSolver { /// sparse. static constexpr size_t star_num_pins_threshold = 3; + // The following constants are used to configure the anchor weighting. + // The weights of anchors grow exponentially each iteration by the following + // function: + // anchor_w = anchor_weight_mult_ * e^(iter / anchor_weight_exp_fac_) + // The numbers below were empircally found to work well. + + /// @brief Multiplier for the anchorweight. The smaller this number is, the + /// weaker the anchors will be at the start. + static constexpr double anchor_weight_mult_ = 0.001; + + /// @brief Factor for controlling the growth of the exponential term in the + /// weight factor function. Larger numbers will cause the anchor + /// weights to grow slower. + static constexpr double anchor_weight_exp_fac_ = 5.0; + /** * @brief Initializes the linear system of Ax = b_x and Ay = b_y based on * the APNetlist and the fixed APBlock locations. @@ -165,6 +180,35 @@ class QPHybridSolver : public AnalyticalSolver { */ void init_linear_system(); + /** + * @brief Helper method to update the linear system with anchors to the + * current partial placement. + * + * For each moveable block (with row = i) in the netlist: + * A[i][i] = A[i][i] + coeff_pseudo_anchor; + * b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor; + * Where coeff_pseudo_anchor grows with each iteration. + * + * This is basically a fast way of adding a connection between all moveable + * blocks in the netlist and their target fixed placement location. + * + * See add_connection_to_system. + * + * @param A_sparse_diff The ceofficient matrix to update. + * @param b_x_diff The x-dimension constant vector to update. + * @param b_y_diff The y-dimension constant vector to update. + * @param p_placement The location the moveable blocks should be + * anchored to. + * @param num_moveable_blocks The number of moveable blocks in the netlist. + * @param row_id_to_blk_id Lookup for the row id from the APBlock Id. + * @param iteration The current iteration of the Global Placer. + */ + void update_linear_system_with_anchors(Eigen::SparseMatrix& A_sparse_diff, + Eigen::VectorXd& b_x_diff, + Eigen::VectorXd& b_y_diff, + PartialPlacement& p_placement, + unsigned iteration); + // The following variables represent the linear system without any anchor // points. These are filled in the constructor and never modified. // When the anchor-points are taken into consideration, the diagonal of the diff --git a/vpr/src/analytical_place/flat_placement_bins.h b/vpr/src/analytical_place/flat_placement_bins.h index 89cd8900eb8..e94ed958312 100644 --- a/vpr/src/analytical_place/flat_placement_bins.h +++ b/vpr/src/analytical_place/flat_placement_bins.h @@ -111,7 +111,6 @@ class FlatPlacementBins { inline const vtr::Rect& bin_region(FlatPlacementBinId bin_id) const { VTR_ASSERT(bin_id.is_valid()); return bin_region_[bin_id]; - ; } /** diff --git a/vpr/src/analytical_place/flat_placement_density_manager.cpp b/vpr/src/analytical_place/flat_placement_density_manager.cpp index 11209e19759..f4c37b191c8 100644 --- a/vpr/src/analytical_place/flat_placement_density_manager.cpp +++ b/vpr/src/analytical_place/flat_placement_density_manager.cpp @@ -80,6 +80,7 @@ FlatPlacementDensityManager::FlatPlacementDensityManager(const APNetlist& ap_net auto tile_type = device_grid.get_physical_type(tile_loc); int tw = tile_type->width; int th = tile_type->height; + VTR_ASSERT_SAFE(tw != 0 && th != 0); vtr::Rect new_bin_region(vtr::Point(x, y), vtr::Point(x + tw, y + th)); @@ -162,6 +163,10 @@ void FlatPlacementDensityManager::remove_block_from_bin(APBlockId blk_id, } void FlatPlacementDensityManager::import_placement_into_bins(const PartialPlacement& p_placement) { + // Empty the bins such that all blocks are no longer within the bins. + empty_bins(); + + // Insert each block in the netlist into their bin based on their placement. // TODO: Maybe import the fixed block locations in the constructor and then // only import the moveable block locations. for (APBlockId blk_id : ap_netlist_.blocks()) { @@ -215,9 +220,9 @@ void FlatPlacementDensityManager::empty_bins() { // Reset all of the bins and their utilizations. for (FlatPlacementBinId bin_id : bins_.bins()) { bins_.remove_all_blocks_from_bin(bin_id); - bin_utilization_[bin_id] = PrimitiveVector(); - bin_overfill_[bin_id] = calc_bin_overfill(bin_utilization_[bin_id], bin_capacity_[bin_id]); - bin_underfill_[bin_id] = calc_bin_underfill(bin_utilization_[bin_id], bin_capacity_[bin_id]); + bin_utilization_[bin_id].clear(); + bin_overfill_[bin_id].clear(); + bin_underfill_[bin_id] = bin_capacity_[bin_id]; } // Once all the bins are reset, all bins should be empty; therefore no bins // are overfilled. diff --git a/vpr/src/analytical_place/flat_placement_density_manager.h b/vpr/src/analytical_place/flat_placement_density_manager.h index d2038bbe34a..ad3977589c1 100644 --- a/vpr/src/analytical_place/flat_placement_density_manager.h +++ b/vpr/src/analytical_place/flat_placement_density_manager.h @@ -185,6 +185,9 @@ class FlatPlacementDensityManager { * @brief Import the given flat placement into the bins. * * This will place AP blocks into the bins that they are placed over. + * + * This will reset the bins before importing the placement. Anything inside + * the bins will be removed. */ void import_placement_into_bins(const PartialPlacement& p_placement); diff --git a/vpr/src/analytical_place/flat_placement_mass_calculator.cpp b/vpr/src/analytical_place/flat_placement_mass_calculator.cpp index a7e34120357..c99aaf29339 100644 --- a/vpr/src/analytical_place/flat_placement_mass_calculator.cpp +++ b/vpr/src/analytical_place/flat_placement_mass_calculator.cpp @@ -234,6 +234,7 @@ static void print_capacities(const std::vector& logical_block_t VTR_LOG("\n"); } VTR_LOG("\n"); + // TODO: Print the masses of each model. } FlatPlacementMassCalculator::FlatPlacementMassCalculator(const APNetlist& ap_netlist, diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp index 4c51e454f18..0bcbb8f7052 100644 --- a/vpr/src/analytical_place/global_placer.cpp +++ b/vpr/src/analytical_place/global_placer.cpp @@ -13,12 +13,16 @@ #include "analytical_solver.h" #include "ap_flow_enums.h" #include "ap_netlist.h" +#include "ap_netlist_fwd.h" #include "atom_netlist.h" #include "device_grid.h" +#include "flat_placement_bins.h" #include "flat_placement_density_manager.h" +#include "globals.h" #include "partial_legalizer.h" #include "partial_placement.h" #include "physical_types.h" +#include "primitive_vector.h" #include "vpr_error.h" #include "vtr_log.h" #include "vtr_time.h" @@ -90,9 +94,74 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type, partial_legalizer_ = make_partial_legalizer(partial_legalizer_type, ap_netlist_, density_manager_, + prepacker, log_verbosity_); } +/** + * @brief Helper method to print the statistics on the given partial placement. + */ +static void print_placement_stats(const PartialPlacement& p_placement, + const APNetlist& ap_netlist, + FlatPlacementDensityManager& density_manager) { + // Print the placement HPWL + VTR_LOG("\tPlacement HPWL: %f\n", p_placement.get_hpwl(ap_netlist)); + + // Print density information. Need to reset the density manager to ensure + // the data is valid. + density_manager.import_placement_into_bins(p_placement); + + // Print the number of overfilled bins. + size_t num_overfilled_bins = density_manager.get_overfilled_bins().size(); + VTR_LOG("\tNumber of overfilled bins: %zu\n", num_overfilled_bins); + + // Print the average overfill + float total_overfill = 0.0f; + for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) { + total_overfill += density_manager.get_bin_overfill(bin_id).manhattan_norm(); + } + float avg_overfill = 0.0f; + if (num_overfilled_bins != 0) + avg_overfill = total_overfill / static_cast(num_overfilled_bins); + VTR_LOG("\tAverage overfill magnitude: %f\n", avg_overfill); + + // Print the number of overfilled tiles per type. + const auto& physical_tile_types = g_vpr_ctx.device().physical_tile_types; + const auto& device_grid = g_vpr_ctx.device().grid; + std::vector overfilled_tiles_by_type(physical_tile_types.size(), 0); + for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) { + const auto& bin_region = density_manager.flat_placement_bins().bin_region(bin_id); + auto tile_loc = t_physical_tile_loc((int)bin_region.xmin(), + (int)bin_region.ymin(), + 0); + auto tile_type = device_grid.get_physical_type(tile_loc); + overfilled_tiles_by_type[tile_type->index]++; + } + VTR_LOG("\tOverfilled bins by tile type:\n"); + for (size_t type_idx = 0; type_idx < physical_tile_types.size(); type_idx++) { + VTR_LOG("\t\t%10s: %zu\n", + physical_tile_types[type_idx].name.c_str(), + overfilled_tiles_by_type[type_idx]); + } + + // Count the number of blocks that were placed in a bin which they cannot + // physically be placed into (according to their mass). + unsigned num_misplaced_blocks = 0; + for (FlatPlacementBinId bin_id : density_manager.get_overfilled_bins()) { + for (APBlockId ap_blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) { + // Get the blk mass and project it onto the capacity of its bin. + PrimitiveVector blk_mass = density_manager.mass_calculator().get_block_mass(ap_blk_id); + PrimitiveVector projected_mass = blk_mass; + projected_mass.project(density_manager.get_bin_capacity(bin_id)); + // If the projected mass does not match its match, this implies that + // there this block does not belong in this bin. + if (projected_mass != blk_mass) + num_misplaced_blocks++; + } + } + VTR_LOG("\tNumber of blocks in an incompatible bin: %zu\n", num_misplaced_blocks); +} + /** * @brief Helper method to print the header of the per-iteration status updates * of the global placer. @@ -177,6 +246,13 @@ PartialPlacement SimPLGlobalPlacer::place() { if (hpwl_relative_gap < target_hpwl_relative_gap_) break; } + + // Print some statistics on the final placement. + VTR_LOG("Placement after Global Placement:\n"); + print_placement_stats(p_placement, + ap_netlist_, + *density_manager_); + // Return the placement from the final iteration. // TODO: investigate saving the best solution found so far. It should be // cheap to save a copy of the PartialPlacement object. diff --git a/vpr/src/analytical_place/global_placer.h b/vpr/src/analytical_place/global_placer.h index 9a120c8e5a8..196de86220c 100644 --- a/vpr/src/analytical_place/global_placer.h +++ b/vpr/src/analytical_place/global_placer.h @@ -116,7 +116,8 @@ class SimPLGlobalPlacer : public GlobalPlacer { /// lower-bound placements. The placer will stop if the difference /// between the two bounds, normalized to the upper-bound, is smaller /// than this number. - static constexpr double target_hpwl_relative_gap_ = 0.10; + /// This number was empircally found to work well. + static constexpr double target_hpwl_relative_gap_ = 0.05; /// @brief The solver which generates the lower-bound placement. std::unique_ptr solver_; diff --git a/vpr/src/analytical_place/model_grouper.cpp b/vpr/src/analytical_place/model_grouper.cpp new file mode 100644 index 00000000000..0aca963c96a --- /dev/null +++ b/vpr/src/analytical_place/model_grouper.cpp @@ -0,0 +1,184 @@ +/** + * @file + * @author Alex Singer + * @date March 2025 + * @brief Implementation of a model grouper class which groups models together + * which must be legalized together in a flat placement. + */ + +#include "model_grouper.h" +#include +#include +#include +#include +#include "cad_types.h" +#include "logic_types.h" +#include "prepack.h" +#include "vtr_assert.h" +#include "vtr_log.h" + +/** + * @brief Recursive helper function which gets the models in the given pattern + * block. + * + * @param pattern_block + * The pattern block to get the models of. + * @param models + * A set of the models found so far. + * @param block_visited + * A vector of flags for each pattern block to signify which blocks have + * been visited. + */ +static void get_pattern_models_recurr(t_pack_pattern_block* pattern_block, + std::unordered_set& models, + std::vector& block_visited) { + // If the pattern block is invalid or this block has been visited, return. + if (pattern_block == nullptr || block_visited[pattern_block->block_id]) { + return; + } + + // Mark this block as visited and insert its model into the models vector. + block_visited[pattern_block->block_id] = true; + models.insert(pattern_block->pb_type->model->index); + + // Go through this block's connections and get their pattern models. + t_pack_pattern_connections* connection = pattern_block->connections; + while (connection != nullptr) { + get_pattern_models_recurr(connection->from_block, models, block_visited); + get_pattern_models_recurr(connection->to_block, models, block_visited); + connection = connection->next; + } +} + +/** + * @brief Entry point into the recursive function above. Gets the models in + * the given pack pattern. + */ +static std::unordered_set get_pattern_models(const t_pack_patterns& pack_pattern) { + std::unordered_set models_in_pattern; + + // Initialize the visited flags for each block to false. + std::vector block_visited(pack_pattern.num_blocks, false); + // Begin the recursion with the root block. + get_pattern_models_recurr(pack_pattern.root_block, models_in_pattern, block_visited); + + return models_in_pattern; +} + +ModelGrouper::ModelGrouper(const Prepacker& prepacker, + t_model* user_models, + t_model* library_models, + int log_verbosity) { + /** + * Group the models together based on their pack patterns. If model A and + * model B form a pattern, and model B and model C form a pattern, then + * models A, B, and C are in a group together. + * + * An efficient way to find this is to represent this problem as a graph, + * where each node is a model and each edge is a relationship where a model + * is in a pack pattern with another model. We can then perform BFS to find + * the connected sub-graphs which will be the groups. + */ + + // Get the number of models + // TODO: Clean up the models vectors in VTR. + std::unordered_map model_name; + unsigned num_models = 0; + t_model* model = library_models; + while (model != nullptr) { + model_name[model->index] = model->name; + num_models++; + model = model->next; + } + model = user_models; + while (model != nullptr) { + model_name[model->index] = model->name; + num_models++; + model = model->next; + } + + // Create an adjacency list for the edges. An edge is formed where two + // models share a pack pattern together. + std::vector> adj_list(num_models); + for (const t_pack_patterns& pack_pattern : prepacker.get_all_pack_patterns()) { + // Get the models within this pattern. + auto models_in_pattern = get_pattern_models(pack_pattern); + VTR_ASSERT_SAFE(!models_in_pattern.empty()); + + // Debug print the models within the pattern. + if (log_verbosity >= 20) { + VTR_LOG("Pattern: %s\n\t", pack_pattern.name); + for (int model_idx : models_in_pattern) { + VTR_LOG("%s ", model_name[model_idx]); + } + VTR_LOG("\n"); + } + + // Connect each of the models to the first model in the pattern. Since + // we only care if there exist a path from each model to another, we do + // not need to connect the models in a clique. + int first_model_idx = *models_in_pattern.begin(); + for (int model_idx : models_in_pattern) { + adj_list[model_idx].insert(first_model_idx); + adj_list[first_model_idx].insert(model_idx); + } + } + + // Perform BFS to group the models. + VTR_LOGV(log_verbosity >= 20, + "Finding model groups...\n"); + std::queue node_queue; + model_group_id_.resize(num_models, ModelGroupId::INVALID()); + for (int model_idx = 0; model_idx < (int)num_models; model_idx++) { + // If this model is already in a group, skip it. + if (model_group_id_[model_idx].is_valid()) { + VTR_LOGV(log_verbosity >= 20, + "\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]); + continue; + } + + ModelGroupId group_id = ModelGroupId(group_ids_.size()); + // Put the model in this group and push to the queue. + model_group_id_[model_idx] = group_id; + node_queue.push(model_idx); + + while (!node_queue.empty()) { + // Pop a node from the queue, and explore its neighbors. + int node_model_idx = node_queue.front(); + node_queue.pop(); + for (int neighbor_model_idx : adj_list[node_model_idx]) { + // If this neighbor is already in this group, skip it. + if (model_group_id_[neighbor_model_idx].is_valid()) { + VTR_ASSERT_SAFE(model_group_id_[neighbor_model_idx] == group_id); + continue; + } + // Put the neighbor in this group and push it to the queue. + model_group_id_[neighbor_model_idx] = group_id; + node_queue.push(neighbor_model_idx); + } + } + + VTR_LOGV(log_verbosity >= 20, + "\t(%d -> %d)\n", model_idx, model_group_id_[model_idx]); + group_ids_.push_back(group_id); + } + + // Create a lookup between each group and the models it contains. + groups_.resize(groups().size()); + for (int model_idx = 0; model_idx < (int)num_models; model_idx++) { + groups_[model_group_id_[model_idx]].push_back(model_idx); + } + + // Debug printing for each group. + if (log_verbosity >= 20) { + for (ModelGroupId group_id : groups()) { + const std::vector& group = groups_[group_id]; + VTR_LOG("Group %zu:\n", group_id); + VTR_LOG("\tSize = %zu\n", group.size()); + VTR_LOG("\tContained models:\n"); + for (int model_idx : group) { + VTR_LOG("\t\t%s\n", model_name[model_idx]); + } + } + } +} diff --git a/vpr/src/analytical_place/model_grouper.h b/vpr/src/analytical_place/model_grouper.h new file mode 100644 index 00000000000..d5a9113d6c1 --- /dev/null +++ b/vpr/src/analytical_place/model_grouper.h @@ -0,0 +1,115 @@ +/** + * @file + * @author Alex Singer + * @date March 2025 + * @brief Declaration of a model grouper class which groups together models + * that must be legalized together in a flat placement. + */ + +#pragma once + +#include +#include "vtr_assert.h" +#include "vtr_range.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" +#include "vtr_vector_map.h" + +// Forward declarations. +class Prepacker; +struct t_model; + +/// @brief Tag for the ModelGroupId +struct model_group_id_tag; + +/// @brief A unique ID of a group of models created by the ModelGrouper class. +typedef vtr::StrongId ModelGroupId; + +/** + * @brief A manager class for grouping together models that must be legalized + * together in a flat placement due to how they form molecules with each + * other. + * + * When performing legalization of a flat placement, it is desirable to split + * the problem into independent legalization problems. We cannot place all of + * the blocks of different model types independently since some blocks are made + * of multiple different types of models. We wish to find the minimum number of + * models that we need to legalize at the same time. + * + * This class groups models together based on the pack patterns that they can + * form in the prepacker. If model A and model B can form a pack pattern, and + * model B and model C can form a pack pattern, then models A, B, and C form a + * group and must be legalized together. + * + * This class also manages what models each group contains and the group of each + * model, where the user can use IDs to get relavent information. + */ +class ModelGrouper { + public: + // Iterator for the model group IDs + typedef typename vtr::vector_map::const_iterator group_iterator; + + // Range for the model group IDs + typedef typename vtr::Range group_range; + + public: + ModelGrouper() = delete; + + /** + * @brief Constructor for the model grouper class. Groups are formed here. + * + * @param prepacker + * The prepacker used to create molecules in the flat placement. This + * provides the pack patterns for forming the groups. + * @param user_models + * Linked list of user-provided models. + * @param library_models + * Linked list of library models. + * @param log_verbosity + * The verbosity of log messages in the grouper class. + */ + ModelGrouper(const Prepacker& prepacker, + t_model* user_models, + t_model* library_models, + int log_verbosity); + + /** + * @brief Returns a list of all valid group IDs. + */ + inline group_range groups() const { + return vtr::make_range(group_ids_.begin(), group_ids_.end()); + } + + /** + * @brief Gets the group ID of the given model. + */ + inline ModelGroupId get_model_group_id(int model_index) const { + VTR_ASSERT_SAFE_MSG(model_index < (int)model_group_id_.size(), + "Model index outside of range for model_group_id_"); + ModelGroupId group_id = model_group_id_[model_index]; + VTR_ASSERT_SAFE_MSG(group_id.is_valid(), + "Model is not in a group"); + return group_id; + } + + /** + * @brief Gets the models in the given group. + */ + inline const std::vector& get_models_in_group(ModelGroupId group_id) const { + VTR_ASSERT_SAFE_MSG(group_id.is_valid(), + "Invalid group id"); + VTR_ASSERT_SAFE_MSG(groups_[group_id].size() != 0, + "Group is empty"); + return groups_[group_id]; + } + + private: + /// @brief List of all group IDs. + vtr::vector_map group_ids_; + + /// @brief A lookup between models and the group ID that contains them. + std::vector model_group_id_; + + /// @brief A lookup between each group ID and the models in that group. + vtr::vector> groups_; +}; diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp index f91f66b74e0..3d5d8dd25e9 100644 --- a/vpr/src/analytical_place/partial_legalizer.cpp +++ b/vpr/src/analytical_place/partial_legalizer.cpp @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -23,22 +25,27 @@ #include "flat_placement_density_manager.h" #include "flat_placement_mass_calculator.h" #include "globals.h" +#include "model_grouper.h" #include "partial_placement.h" #include "physical_types.h" +#include "prepack.h" #include "primitive_vector.h" #include "vpr_context.h" #include "vpr_error.h" #include "vtr_assert.h" #include "vtr_geometry.h" #include "vtr_log.h" +#include "vtr_math.h" #include "vtr_prefix_sum.h" #include "vtr_strong_id.h" +#include "vtr_time.h" #include "vtr_vector.h" #include "vtr_vector_map.h" std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, const APNetlist& netlist, std::shared_ptr density_manager, + const Prepacker& prepacker, int log_verbosity) { // Based on the partial legalizer type passed in, build the partial legalizer. switch (legalizer_type) { @@ -49,6 +56,7 @@ std::unique_ptr make_partial_legalizer(e_partial_legalizer leg case e_partial_legalizer::BI_PARTITIONING: return std::make_unique(netlist, density_manager, + prepacker, log_verbosity); default: VPR_FATAL_ERROR(VPR_ERROR_AP, @@ -592,8 +600,6 @@ static void print_flow_based_legalizer_status(size_t iteration, void FlowBasedLegalizer::legalize(PartialPlacement& p_placement) { VTR_LOGV(log_verbosity_ >= 10, "Running Flow-Based Legalizer\n"); - // Reset the bins from the previous iteration and prepare for this iteration. - density_manager_->empty_bins(); // Import the partial placement into bins. density_manager_->import_placement_into_bins(p_placement); // Verify that the placement was imported correctly. @@ -696,104 +702,375 @@ void FlowBasedLegalizer::legalize(PartialPlacement& p_placement) { density_manager_->export_placement_from_bins(p_placement); } -// This namespace contains enums and classes used for bi-partitioning. -namespace { +PerModelPrefixSum2D::PerModelPrefixSum2D(const FlatPlacementDensityManager& density_manager, + t_model* user_models, + t_model* library_models, + std::function lookup) { + // Get the number of models in the architecture. + // TODO: We really need to clean up how models are stored in VPR... + t_model* cur = user_models; + int num_models = 0; + while (cur != nullptr) { + num_models++; + cur = cur->next; + } + cur = library_models; + while (cur != nullptr) { + num_models++; + cur = cur->next; + } -/** - * @brief Enum for the direction of a partition. - */ -enum class e_partition_dir { - VERTICAL, - HORIZONTAL -}; + // Get the size that the prefix sums should be. + size_t width, height, layers; + std::tie(width, height, layers) = density_manager.get_overall_placeable_region_size(); -/** - * @brief Spatial window used to spread the blocks contained within. - * - * This window's region is identified and grown until it has enough space to - * accomodate the blocks stored within. This window is then successivly - * partitioned until it is small enough (blocks are not too dense). - */ -struct SpreadingWindow { - /// @brief The blocks contained within this window. - std::vector contained_blocks; + // Create each of the prefix sums. + model_prefix_sum_.resize(num_models); + for (int model_index = 0; model_index < num_models; model_index++) { + model_prefix_sum_[model_index] = vtr::PrefixSum2D( + width, + height, + [&](size_t x, size_t y) { + return lookup(model_index, x, y); + }); + } +} - /// @brief The 2D region of space that this window covers. - vtr::Rect region; -}; +float PerModelPrefixSum2D::get_model_sum(int model_index, + const vtr::Rect& region) const { + VTR_ASSERT_SAFE(model_index < (int)model_prefix_sum_.size() && model_index >= 0); + // Get the sum over the given region. + return model_prefix_sum_[model_index].get_sum(region.xmin(), + region.ymin(), + region.xmax() - 1, + region.ymax() - 1); +} -} // namespace +PrimitiveVector PerModelPrefixSum2D::get_sum(const std::vector& model_indices, + const vtr::Rect& region) const { + PrimitiveVector res; + for (int model_index : model_indices) { + VTR_ASSERT_SAFE(res.get_dim_val(model_index) == 0.0f); + res.set_dim_val(model_index, get_model_sum(model_index, region)); + } + return res; +} BiPartitioningPartialLegalizer::BiPartitioningPartialLegalizer( const APNetlist& netlist, std::shared_ptr density_manager, + const Prepacker& prepacker, int log_verbosity) : PartialLegalizer(netlist, log_verbosity) - , density_manager_(density_manager) {} + , density_manager_(density_manager) + , model_grouper_(prepacker, + g_vpr_ctx.device().arch->models, + g_vpr_ctx.device().arch->model_library, + log_verbosity) { + // Compute the capacity prefix sum. Capacity is assumed to not change + // between iterations of the partial legalizer. + capacity_prefix_sum_ = PerModelPrefixSum2D( + *density_manager, + g_vpr_ctx.device().arch->models, + g_vpr_ctx.device().arch->model_library, + [&](int model_index, size_t x, size_t y) { + // Get the bin at this grid location. + FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0); + // Get the capacity of the bin for this model. + float cap = density_manager_->get_bin_capacity(bin_id).get_dim_val(model_index); + VTR_ASSERT_SAFE(cap >= 0.0f); + // Bins may be large, but the prefix sum assumes a 1x1 grid of + // values. Normalize by the area of the bin to turn this into + // a 1x1 bin equivalent. + const vtr::Rect& bin_region = density_manager_->flat_placement_bins().bin_region(bin_id); + float bin_area = bin_region.width() * bin_region.height(); + VTR_ASSERT_SAFE(!vtr::isclose(bin_area, 0.f)); + return cap / bin_area; + }); +} + +void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) { + VTR_LOGV(log_verbosity_ >= 10, "Running Bi-Partitioning Legalizer\n"); + + // Prepare the density manager. + density_manager_->import_placement_into_bins(p_placement); + + // Quick return. If there are no overfilled bins, there is nothing to spread. + if (density_manager_->get_overfilled_bins().size() == 0) { + VTR_LOGV(log_verbosity_ >= 10, "No overfilled bins. Nothing to legalize.\n"); + return; + } + + if (log_verbosity_ >= 10) { + size_t num_overfilled_bins = density_manager_->get_overfilled_bins().size(); + VTR_LOG("\tNumber of overfilled blocks before legalization: %zu\n", + num_overfilled_bins); + // FIXME: Make this a method in the density manager class. + float avg_overfill = 0.f; + for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) { + avg_overfill += density_manager_->get_bin_overfill(overfilled_bin_id).manhattan_norm(); + } + VTR_LOG("\t\tAverage overfill per overfilled bin: %f\n", + avg_overfill / static_cast(num_overfilled_bins)); + } + + // 1) Identify the groups that need to be spread + std::unordered_set groups_to_spread; + for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) { + // Get the overfilled models in this bin. + const PrimitiveVector& overfill = density_manager_->get_bin_overfill(overfilled_bin_id); + std::vector overfilled_models = overfill.get_non_zero_dims(); + // For each model, insert its group into the set. Set will handle dupes. + for (int model_index : overfilled_models) { + groups_to_spread.insert(model_grouper_.get_model_group_id(model_index)); + } + } + + // 2) For each group, identify non-overlapping windows and spread + vtr::Timer runtime_timer; + float window_identification_time = 0.0f; + float window_spreading_time = 0.0f; + for (ModelGroupId group_id : groups_to_spread) { + VTR_LOGV(log_verbosity_ >= 10, "\tSpreading group %zu\n", group_id); + // Identify non-overlapping spreading windows. + float window_identification_start_time = runtime_timer.elapsed_sec(); + auto non_overlapping_windows = identify_non_overlapping_windows(group_id); + window_identification_time += runtime_timer.elapsed_sec() - window_identification_start_time; + VTR_ASSERT(non_overlapping_windows.size() != 0); + + // Spread the blocks over the non-overlapping windows. + float window_spreading_start_time = runtime_timer.elapsed_sec(); + spread_over_windows(non_overlapping_windows, p_placement, group_id); + window_spreading_time += runtime_timer.elapsed_sec() - window_spreading_start_time; + } + + // FIXME: Remove this duplicate code... + if (log_verbosity_ >= 10) { + size_t num_overfilled_bins = density_manager_->get_overfilled_bins().size(); + VTR_LOG("\tNumber of overfilled blocks after legalization: %zu\n", + num_overfilled_bins); + // FIXME: Make this a method in the density manager class. + float avg_overfill = 0.f; + for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) { + avg_overfill += density_manager_->get_bin_overfill(overfilled_bin_id).manhattan_norm(); + } + VTR_LOG("\t\tAverage overfill per overfilled bin: %f\n", + avg_overfill / static_cast(num_overfilled_bins)); + VTR_LOG("\tTime spent identifying windows: %g\n", window_identification_time); + VTR_LOG("\tTime spent spreading windows: %g\n", window_spreading_time); + } + + // Export the legalized placement to the partial placement. + density_manager_->export_placement_from_bins(p_placement); +} + +std::vector BiPartitioningPartialLegalizer::identify_non_overlapping_windows(ModelGroupId group_id) { + + // 1) Cluster the overfilled bins. This will make creating minimum spanning + // windows more efficient. + auto overfilled_bin_clusters = get_overfilled_bin_clusters(group_id); + + // 2) For each of the overfilled bin clusters, create a minimum window such + // that there is enough space in the window for the atoms inside. + auto windows = get_min_windows_around_clusters(overfilled_bin_clusters, group_id); + + // 3) Merge overlapping windows. + merge_overlapping_windows(windows); + + // TODO: Investigate shrinking the windows. + + // 4) Move the blocks out of their bins and into the windows. + move_blocks_into_windows(windows, group_id); + + return windows; +} /** - * @brief Identify spreading windows which contain overfilled bins on the device - * and do not overlap. + * @brief Helper method to check if the given PrimitiveVector has any values + * in the model dimensions in the given group. * - * This process is split into 3 stages: - * 1) Identify overfilled bins and grow windows around them. These windows - * will grow until there is just enough space to accomodate the blocks - * within the window (capacity of the window is larger than the utilization). - * 2) Merge overlapping windows. - * 3) Move the blocks within these window regions from their bins into - * their windows. This updates the current utilization of bins, making - * spreading easier. + * This method assumes the vector is non-negative. If the vector had any negative + * dimensions, it does not make sense to ask if it is in the group or not. */ -static std::vector identify_non_overlapping_windows( - const APNetlist& netlist, - FlatPlacementDensityManager& density_manager) { - // Identify overfilled bins - const std::unordered_set& overfilled_bins = density_manager.get_overfilled_bins(); - - // Create a prefix sum for the capacity. - // We will need to get the capacity of 2D regions of the device very often - // in the algorithm below. This greatly improves the time complexity. - // TODO: This should not change between iterations of spreading. This can - // be moved to the constructor. - size_t width, height, layers; - std::tie(width, height, layers) = density_manager.get_overall_placeable_region_size(); - vtr::PrefixSum2D capacity_prefix_sum(width, height, [&](size_t x, size_t y) { - FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); - // For now we take the L1 norm of the bin divided by its area. - // The L1 norm is just a count of the number of primitives that - // can fit into the bin (without caring for primitive type). We - // divide by area such that large bins (1x4 for example) get - // normalized to 1x1 regions. - const vtr::Rect& bin_region = density_manager.flat_placement_bins().bin_region(bin_id); - float bin_area = bin_region.width() * bin_region.height(); - return density_manager.get_bin_capacity(bin_id).manhattan_norm() / bin_area; - }); +static bool is_vector_in_group(const PrimitiveVector& vec, + ModelGroupId group_id, + const ModelGrouper& model_grouper) { + VTR_ASSERT_SAFE(vec.is_non_negative()); + const std::vector& models_in_group = model_grouper.get_models_in_group(group_id); + for (int model_index : models_in_group) { + float dim_val = vec.get_dim_val(model_index); + if (dim_val != 0.0f) + return true; + } + return false; +} - // Create a prefix sum for the utilization. - // The utilization of the bins will change between routing iterations, so - // this prefix sum must be recomputed. - vtr::PrefixSum2D utilization_prefix_sum(width, height, [&](size_t x, size_t y) { - FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); - // This is computed the same way as the capacity prefix sum above. - const vtr::Rect& bin_region = density_manager.flat_placement_bins().bin_region(bin_id); - float bin_area = bin_region.width() * bin_region.height(); - return density_manager.get_bin_utilization(bin_id).manhattan_norm() / bin_area; - }); +/** + * @brief Checks if the overfilled models in the given overfilled bin is in the + * given model group. + * + * This method does not check if the bin could be in the given group (for + * example the capacity), this checks if the overfilled blocks are in the group. + */ +static bool is_overfilled_bin_in_group(FlatPlacementBinId overfilled_bin_id, + ModelGroupId group_id, + const FlatPlacementDensityManager& density_manager, + const ModelGrouper& model_grouper) { + const PrimitiveVector& bin_overfill = density_manager.get_bin_overfill(overfilled_bin_id); + VTR_ASSERT_SAFE(bin_overfill.is_non_zero()); + return is_vector_in_group(bin_overfill, group_id, model_grouper); +} + +/** + * @brief Checks if the given AP block is in the given model group. + * + * An AP block is in a model group if it contains any models in the model group. + */ +static bool is_block_in_group(APBlockId blk_id, + ModelGroupId group_id, + const FlatPlacementDensityManager& density_manager, + const ModelGrouper& model_grouper) { + const PrimitiveVector& blk_mass = density_manager.mass_calculator().get_block_mass(blk_id); + return is_vector_in_group(blk_mass, group_id, model_grouper); +} + +std::vector BiPartitioningPartialLegalizer::get_overfilled_bin_clusters( + ModelGroupId group_id) { + // Use BFS over the overfilled bins to cluster them. + std::vector overfilled_bin_clusters; + // Maintain the distance from the last overfilled bin + vtr::vector dist(density_manager_->flat_placement_bins().bins().size(), -1); + for (FlatPlacementBinId overfilled_bin_id : density_manager_->get_overfilled_bins()) { + // If this bin is not overfilled with the models in the group, skip. + if (!is_overfilled_bin_in_group(overfilled_bin_id, + group_id, + *density_manager_, + model_grouper_)) { + continue; + } + // If this bin is already in a cluster, skip. + if (dist[overfilled_bin_id] != -1) + continue; + dist[overfilled_bin_id] = 0; + // Collect nearby bins into a vector. + FlatPlacementBinCluster nearby_bins; + nearby_bins.push_back(overfilled_bin_id); + // Create a queue and insert the overfilled bin into it. + std::queue bin_queue; + bin_queue.push(overfilled_bin_id); + while (!bin_queue.empty()) { + // Pop a bin from queue. + FlatPlacementBinId bin_node = bin_queue.front(); + bin_queue.pop(); + // If the node's distance from an overfilled bin is the max gap, + // do not explore its neighbors. + if (dist[bin_node] > max_bin_cluster_gap_) + continue; + // Explore the neighbors of this bin. + for (FlatPlacementBinId neighbor : get_direct_neighbors_of_bin(bin_node, *density_manager_)) { + int neighbor_dist = dist[bin_node] + 1; + // If this neighbor has been explore with a better distance, + // do not explore it. + if (dist[neighbor] != -1 && dist[neighbor] <= neighbor_dist) + continue; + // If the neighbor is an overfilled bin that we care about, add + // it to the list of nearby bins and set its distance to 0. + if (density_manager_->bin_is_overfilled(neighbor) + && is_overfilled_bin_in_group(neighbor, group_id, *density_manager_, model_grouper_)) { + nearby_bins.push_back(neighbor); + dist[neighbor] = 0; + } else { + dist[neighbor] = neighbor_dist; + } + // Enqueue the neighbor. + bin_queue.push(neighbor); + } + } + + // Move the cluster into the vector of overfilled bin clusters. + overfilled_bin_clusters.push_back(std::move(nearby_bins)); + } + + return overfilled_bin_clusters; +} - // 1) For each of the overfilled bins, create and store a minimum window. - // TODO: This is a very simple algorithm which currently only uses the number - // of primitives within the regions, not the primitive types. Need to - // investigate this further. +/** + * @brief Helper method to decide if the given region's utilization is higher + * than its capacity. + */ +static bool is_region_overfilled(const vtr::Rect& region, + const PerModelPrefixSum2D& capacity_prefix_sum, + const PerModelPrefixSum2D& utilization_prefix_sum, + const std::vector& model_indices) { + // Go through each model in the model group we are interested in. + for (int model_index : model_indices) { + // Get the capacity of this region for this model. + float region_model_capacity = capacity_prefix_sum.get_model_sum(model_index, + region); + // Get the utilization of this region for this model. + float region_model_utilization = utilization_prefix_sum.get_model_sum(model_index, + region); + // If the utilization is higher than the capacity, then this region is + // overfilled. + // TODO: Look into adding some head room to account for rounding. + if (region_model_utilization > region_model_capacity) + return true; + } + + // If the utilization is less than or equal to the capacity for each model + // then this region is not overfilled. + return false; +} + +std::vector BiPartitioningPartialLegalizer::get_min_windows_around_clusters( + const std::vector& overfilled_bin_clusters, + ModelGroupId group_id) { // TODO: Currently, we greedily grow the region by 1 in all directions until // the capacity is larger than the utilization. This may not produce // the minimum window. Should investigate "touching-up" the windows. + // FIXME: It may be a good idea to sort the bins by their overfill here. Then + // we can check for overlap as we go. + + // Get the width, height, and number of layers for the spreading region. + // This is used by the growing part of this routine to prevent the windows + // from outgrowing the device. + size_t width, height, layers; + std::tie(width, height, layers) = density_manager_->get_overall_placeable_region_size(); + + // Precompute a prefix sum for the current utilization of each 1x1 region + // of the device. This needs to be recomputed every time the bins are + // modified, so it is recomputed here. + PerModelPrefixSum2D utilization_prefix_sum( + *density_manager_, + g_vpr_ctx.device().arch->models, + g_vpr_ctx.device().arch->model_library, + [&](int model_index, size_t x, size_t y) { + FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0); + // This is computed the same way as the capacity prefix sum above. + const vtr::Rect& bin_region = density_manager_->flat_placement_bins().bin_region(bin_id); + float bin_area = bin_region.width() * bin_region.height(); + float util = density_manager_->get_bin_utilization(bin_id).get_dim_val(model_index); + VTR_ASSERT_SAFE(util >= 0.0f); + return util / bin_area; + }); + + // Create windows for each overfilled bin cluster. std::vector windows; - for (FlatPlacementBinId bin_id : overfilled_bins) { - // Create a new window for this bin. + for (const std::vector& overfilled_bin_cluster : overfilled_bin_clusters) { + // Create a new window for this cluster of bins. SpreadingWindow new_window; - // Initialize the region to the region of the bin. - new_window.region = density_manager.flat_placement_bins().bin_region(bin_id); + + // Set the region of the window to the bounding box of the cluster of bins. + size_t num_bins_in_cluster = overfilled_bin_cluster.size(); + VTR_ASSERT_SAFE(num_bins_in_cluster != 0); vtr::Rect& region = new_window.region; + region = density_manager_->flat_placement_bins().bin_region(overfilled_bin_cluster[0]); + for (size_t i = 1; i < num_bins_in_cluster; i++) { + region = vtr::bounding_box(region, + density_manager_->flat_placement_bins().bin_region(overfilled_bin_cluster[i])); + } + + // Grow the region until it is just large enough to not overfill while (true) { // Grow the region by 1 on all sides. double new_xmin = std::clamp(region.xmin() - 1.0, 0.0, width); @@ -807,28 +1084,25 @@ static std::vector identify_non_overlapping_windows( break; } - // If the utilization is lower than the capacity, stop growing. region.set_xmin(new_xmin); region.set_xmax(new_xmax); region.set_ymin(new_ymin); region.set_ymax(new_ymax); - float region_capacity = capacity_prefix_sum.get_sum(region.xmin(), - region.ymin(), - region.xmax() - 1, - region.ymax() - 1); - - float region_utilization = utilization_prefix_sum.get_sum(region.xmin(), - region.ymin(), - region.xmax() - 1, - region.ymax() - 1); - if (region_utilization < region_capacity) + + // If the region is no longer overfilled, stop growing. + if (!is_region_overfilled(region, capacity_prefix_sum_, utilization_prefix_sum, model_grouper_.get_models_in_group(group_id))) break; } // Insert this window into the list of windows. windows.emplace_back(std::move(new_window)); } - // 2) Merge overlapping bins and store into new array. + return windows; +} + +void BiPartitioningPartialLegalizer::merge_overlapping_windows( + std::vector& windows) { + // Merge overlapping windows. // TODO: This is a very basic merging process which will identify the // minimum region containing both windows; however, after merging it // is very likely that this window will now be too large. Need to @@ -877,7 +1151,14 @@ static std::vector identify_non_overlapping_windows( non_overlapping_windows.emplace_back(std::move(windows[i])); } - // 3) Move the blocks out of their bins and into the windows. + // Store the results into the input window. + windows = std::move(non_overlapping_windows); +} + +void BiPartitioningPartialLegalizer::move_blocks_into_windows( + std::vector& non_overlapping_windows, + ModelGroupId group_id) { + // Move the blocks from their bins into the windows that should contain them. // TODO: It may be good for debugging to check if the windows have nothing // to move. This may indicate a problem (overfilled bins of fixed // blocks, overlapping windows, etc.). @@ -891,49 +1172,56 @@ static std::vector identify_non_overlapping_windows( for (size_t x = lower_x; x <= upper_x; x++) { for (size_t y = lower_y; y <= upper_y; y++) { // Get all of the movable blocks from the bin. - FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); std::vector moveable_blks; - moveable_blks.reserve(density_manager.flat_placement_bins().bin_contained_blocks(bin_id).size()); - for (APBlockId blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) { - if (netlist.block_mobility(blk_id) == APBlockMobility::MOVEABLE) - moveable_blks.push_back(blk_id); + FlatPlacementBinId bin_id = density_manager_->get_bin(x, y, 0); + const auto& bin_contained_blocks = density_manager_->flat_placement_bins().bin_contained_blocks(bin_id); + moveable_blks.reserve(bin_contained_blocks.size()); + for (APBlockId blk_id : bin_contained_blocks) { + // If this block is not moveable, do not move it. + if (netlist_.block_mobility(blk_id) != APBlockMobility::MOVEABLE) + continue; + // If this block is not in the group, do not move it. + if (!is_block_in_group(blk_id, group_id, *density_manager_, model_grouper_)) + continue; + + moveable_blks.push_back(blk_id); } // Remove the moveable blocks from their bins and store into // the windows. for (APBlockId blk_id : moveable_blks) { - density_manager.remove_block_from_bin(blk_id, bin_id); + density_manager_->remove_block_from_bin(blk_id, bin_id); window.contained_blocks.push_back(blk_id); } } } } - - return non_overlapping_windows; } -void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) { - VTR_LOGV(log_verbosity_ >= 10, "Running Bi-Partitioning Legalizer\n"); - - // Prepare the density manager. - density_manager_->empty_bins(); - density_manager_->import_placement_into_bins(p_placement); - - // Quick return. If there are no overfilled bins, there is nothing to spread. - if (density_manager_->get_overfilled_bins().size() == 0) { - VTR_LOGV(log_verbosity_ >= 10, "No overfilled bins. Nothing to legalize.\n"); - return; +void BiPartitioningPartialLegalizer::spread_over_windows(std::vector& non_overlapping_windows, + const PartialPlacement& p_placement, + ModelGroupId group_id) { + if (log_verbosity_ >= 10) { + VTR_LOG("\tIdentified %zu non-overlapping spreading windows.\n", + non_overlapping_windows.size()); + + if (log_verbosity_ >= 20) { + for (const SpreadingWindow& window : non_overlapping_windows) { + VTR_LOG("\t\t[(%.1f, %.1f), (%.1f, %.1f)]\n", + window.region.xmin(), window.region.ymin(), + window.region.xmax(), window.region.ymax()); + PrimitiveVector window_capacity = capacity_prefix_sum_.get_sum(model_grouper_.get_models_in_group(group_id), + window.region); + VTR_LOG("\t\t\tCapacity: %f\n", + window_capacity.manhattan_norm()); + VTR_LOG("\t\t\tNumber of contained blocks: %zu\n", + window.contained_blocks.size()); + } + } } - // Identify non-overlapping spreading windows. - std::vector initial_windows = identify_non_overlapping_windows(netlist_, *density_manager_); - VTR_ASSERT(initial_windows.size() != 0); - VTR_LOGV(log_verbosity_ >= 10, - "\tIdentified %zu non-overlapping spreading windows.\n", - initial_windows.size()); - // Insert the windows into a queue for spreading. std::queue window_queue; - for (SpreadingWindow& window : initial_windows) { + for (SpreadingWindow& window : non_overlapping_windows) { window_queue.push(std::move(window)); } @@ -971,101 +1259,256 @@ void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) { } // 2) Partition the window. - // Select the partition direction. - // To keep it simple, we partition the direction which would cut the - // region the most. - // TODO: Should explore making the partition line based on the capacity - // of the two partitioned regions. We may want to cut the - // region in half such that the mass of the atoms contained within - // the two future regions is equal. - e_partition_dir partition_dir = e_partition_dir::VERTICAL; - if (window.region.height() > window.region.width()) - partition_dir = e_partition_dir::HORIZONTAL; - - // To keep it simple, just cut the space in half. - // TODO: Should investigate other cutting techniques. Cutting perfectly - // in half may not be the most efficient technique. - SpreadingWindow lower_window; - SpreadingWindow upper_window; - if (partition_dir == e_partition_dir::VERTICAL) { - // Find the x-coordinate of a cut line directly in the middle of the - // region. We floor this to prevent fractional cut lines. - double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0); - - // Cut the region at this cut line. - lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - window.region.ymin()), - vtr::Point(pivot_x, - window.region.ymax())); - - upper_window.region = vtr::Rect(vtr::Point(pivot_x, - window.region.ymin()), - vtr::Point(window.region.xmax(), - window.region.ymax())); - } else { - VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL); - // Similarly in the y direction, find the non-fractional y coordinate - // to make a horizontal cut. - double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0); - - // Then cut the window. - lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - window.region.ymin()), - vtr::Point(window.region.xmax(), - pivot_y)); - - upper_window.region = vtr::Rect(vtr::Point(window.region.xmin(), - pivot_y), - vtr::Point(window.region.xmax(), - window.region.ymax())); - } + auto partitioned_window = partition_window(window); // 3) Partition the blocks. - // For now, just evenly partition the blocks based on their solved - // positions. - // TODO: This is a huge simplification. We do not even know if the lower - // partition has space for the blocks that want to be on that side! - // Instead of just using x/y position, we also need to take into - // account the mass of the blocks and ensure that there is enough - // capacity for the given block's mass. One idea is to partition - // the blocks using this basic approach and then fixing up any - // blocks that should not be on the given side (due to type or - // capacity constraints). - if (partition_dir == e_partition_dir::VERTICAL) { - // Sort the blocks in the window by the x coordinate. - std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { - return p_placement.block_x_locs[a] < p_placement.block_x_locs[b]; - }); + partition_blocks_in_window(window, partitioned_window, group_id, p_placement); - } else { - VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL); - // Sort the blocks in the window by the y coordinate. - std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { - return p_placement.block_y_locs[a] < p_placement.block_y_locs[b]; - }); + // 4) Enqueue the new windows. + window_queue.push(std::move(partitioned_window.lower_window)); + window_queue.push(std::move(partitioned_window.upper_window)); + + // Pop the top element off the queue. This will invalidate the window + // object. + window_queue.pop(); + } + + if (log_verbosity_ >= 10) { + VTR_LOG("\t%zu finalized windows.\n", + finished_windows.size()); + + if (log_verbosity_ >= 30) { + for (const SpreadingWindow& window : finished_windows) { + VTR_LOG("\t\t[(%.1f, %.1f), (%.1f, %.1f)]\n", + window.region.xmin(), window.region.ymin(), + window.region.xmax(), window.region.ymax()); + PrimitiveVector window_capacity = capacity_prefix_sum_.get_sum(model_grouper_.get_models_in_group(group_id), + window.region); + VTR_LOG("\t\t\tCapacity: %f\n", + window_capacity.manhattan_norm()); + VTR_LOG("\t\t\tNumber of contained blocks: %zu\n", + window.contained_blocks.size()); + } } + } + + // Move the blocks into the bins. + move_blocks_out_of_windows(finished_windows); + + // Verify that the bins are valid after moving blocks back from windows. + VTR_ASSERT_SAFE(density_manager_->verify()); +} + +PartitionedWindow BiPartitioningPartialLegalizer::partition_window(SpreadingWindow& window) { + PartitionedWindow partitioned_window; + + // Select the partition direction. + // To keep it simple, we partition the direction which would cut the + // region the most. + // TODO: Should explore making the partition line based on the capacity + // of the two partitioned regions. We may want to cut the + // region in half such that the mass of the atoms contained within + // the two future regions is equal. + partitioned_window.partition_dir = e_partition_dir::VERTICAL; + if (window.region.height() > window.region.width()) + partitioned_window.partition_dir = e_partition_dir::HORIZONTAL; + + // To keep it simple, just cut the space in half. + // TODO: Should investigate other cutting techniques. Cutting perfectly + // in half may not be the most efficient technique. + SpreadingWindow& lower_window = partitioned_window.lower_window; + SpreadingWindow& upper_window = partitioned_window.upper_window; + partitioned_window.pivot_pos = 0.f; + if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) { + // Find the x-coordinate of a cut line directly in the middle of the + // region. We floor this to prevent fractional cut lines. + double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0); + + // Cut the region at this cut line. + lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(pivot_x, + window.region.ymax())); + + upper_window.region = vtr::Rect(vtr::Point(pivot_x, + window.region.ymin()), + vtr::Point(window.region.xmax(), + window.region.ymax())); + partitioned_window.pivot_pos = pivot_x; + } else { + VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL); + // Similarly in the y direction, find the non-fractional y coordinate + // to make a horizontal cut. + double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0); + + // Then cut the window. + lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(window.region.xmax(), + pivot_y)); + + upper_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + pivot_y), + vtr::Point(window.region.xmax(), + window.region.ymax())); + partitioned_window.pivot_pos = pivot_y; + } + + return partitioned_window; +} + +void BiPartitioningPartialLegalizer::partition_blocks_in_window( + SpreadingWindow& window, + PartitionedWindow& partitioned_window, + ModelGroupId group_id, + const PartialPlacement& p_placement) { + + SpreadingWindow& lower_window = partitioned_window.lower_window; + SpreadingWindow& upper_window = partitioned_window.upper_window; + + // Get the capacity of each window partition. + const std::vector& model_indices = model_grouper_.get_models_in_group(group_id); + PrimitiveVector lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices, + lower_window.region); + PrimitiveVector upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices, + upper_window.region); + + // Due to the division by the area, we may get numerical underflows / + // overflows which accumulate. If they accumulate in the positive + // direction, it is not a big deal; but in the negative direction it + // will cause problems with the algorithm below. Clamp any negative + // numbers to 0. + lower_window_capacity.relu(); + upper_window_capacity.relu(); + PrimitiveVector lower_window_underfill = lower_window_capacity; + PrimitiveVector upper_window_underfill = upper_window_capacity; + VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative()); + VTR_ASSERT_SAFE(upper_window_underfill.is_non_negative()); + + // FIXME: We need to take into account the current utilization of the + // fixed blocks... We need to take into account that they are there. + // Currently we assume the underfill is the capacity + // Without this, we may overfill blocks which have fixed blocks in + // them. + + // If the lower window has no space, put all of the blocks in the upper window. + // NOTE: We give some room due to numerical overflows from the prefix sum. + if (lower_window_underfill.manhattan_norm() < 0.01f) { + upper_window.contained_blocks = std::move(window.contained_blocks); + return; + } + // If the upper window has no space, put all of the blocks in the lower window. + if (upper_window_underfill.manhattan_norm() < 0.01f) { + lower_window.contained_blocks = std::move(window.contained_blocks); + return; + } - // Find the pivot block position. - size_t pivot = window.contained_blocks.size() / 2; + // Reserve space in each of the windows to make insertion faster. + upper_window.contained_blocks.reserve(window.contained_blocks.size()); + lower_window.contained_blocks.reserve(window.contained_blocks.size()); + + // Sort the blocks and get the pivot index. The pivot index is the index in + // the windows contained block which decides which sub-window the block + // wants to be in. The blocks at indices [0, pivot) want to be in the lower + // window, blocks at indices [pivot, num_blks) want to be in the upper window. + // This want is based on the solved positions of the blocks. + size_t pivot; + if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) { + // Sort the blocks in the window by the x coordinate. + std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { + return p_placement.block_x_locs[a] < p_placement.block_x_locs[b]; + }); + auto upper = std::upper_bound(window.contained_blocks.begin(), + window.contained_blocks.end(), + partitioned_window.pivot_pos, + [&](double value, APBlockId blk_id) { + return value < p_placement.block_x_locs[blk_id]; + }); + pivot = std::distance(window.contained_blocks.begin(), upper); + } else { + VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL); + // Sort the blocks in the window by the y coordinate. + std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { + return p_placement.block_y_locs[a] < p_placement.block_y_locs[b]; + }); + auto upper = std::upper_bound(window.contained_blocks.begin(), + window.contained_blocks.end(), + partitioned_window.pivot_pos, + [&](double value, APBlockId blk_id) { + return value < p_placement.block_y_locs[blk_id]; + }); + pivot = std::distance(window.contained_blocks.begin(), upper); + } - // Copy the blocks to the windows based on the pivot. - for (size_t i = 0; i < pivot; i++) { + // Try to place the blocks that want to be in the lower window from lower + // to upper. + std::vector unplaced_blocks; + for (size_t i = 0; i < pivot; i++) { + const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]); + VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative()); + // Try to put the blk in the window. + lower_window_underfill -= blk_mass; + if (lower_window_underfill.is_non_negative()) + // If the underfill is not negative, then we can add it to the window. lower_window.contained_blocks.push_back(window.contained_blocks[i]); + else { + // If the underfill went negative, undo the addition and mark this + // block as unplaced. + lower_window_underfill += blk_mass; + unplaced_blocks.push_back(window.contained_blocks[i]); } - for (size_t i = pivot; i < window.contained_blocks.size(); i++) { + } + // Try to place the blocks that want to be in the upper window from upper + // to lower. + // NOTE: This needs to be an int in case the pivot is 0. + for (int i = window.contained_blocks.size() - 1; i >= (int)pivot; i--) { + const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]); + VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative()); + upper_window_underfill -= blk_mass; + if (upper_window_underfill.is_non_negative()) upper_window.contained_blocks.push_back(window.contained_blocks[i]); + else { + upper_window_underfill += blk_mass; + unplaced_blocks.push_back(window.contained_blocks[i]); } + } - // 4) Enqueue the new windows. - window_queue.push(std::move(lower_window)); - window_queue.push(std::move(upper_window)); - - // Pop the top element off the queue. This will invalidate the window - // object. - window_queue.pop(); + // Handle the unplaced blocks. + // To handle these blocks, we will try to balance the overfill in both + // windows. To do this we sort the unplaced blocks by largest mass to + // smallest mass. Then we place each block in the bin with the highest + // underfill. + std::sort(unplaced_blocks.begin(), + unplaced_blocks.end(), + [&](APBlockId a, APBlockId b) { + const auto& blk_a_mass = density_manager_->mass_calculator().get_block_mass(a); + const auto& blk_b_mass = density_manager_->mass_calculator().get_block_mass(b); + return blk_a_mass.manhattan_norm() > blk_b_mass.manhattan_norm(); + }); + for (APBlockId blk_id : unplaced_blocks) { + // Project the underfill from each window onto the mass. This gives us + // the overfill in the dimensions the mass cares about. + const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(blk_id); + PrimitiveVector projected_lower_window_underfill = lower_window_underfill; + lower_window_underfill.project(blk_mass); + PrimitiveVector projected_upper_window_underfill = upper_window_underfill; + upper_window_underfill.project(blk_mass); + // Put the block in the window with a higher underfill. This tries to + // balance the overfill as much as possible. This works even if the + // overfill becomes negative. + if (projected_lower_window_underfill.manhattan_norm() >= projected_upper_window_underfill.manhattan_norm()) { + lower_window.contained_blocks.push_back(blk_id); + lower_window_underfill -= blk_mass; + } else { + upper_window.contained_blocks.push_back(blk_id); + upper_window_underfill -= blk_mass; + } } +} + +void BiPartitioningPartialLegalizer::move_blocks_out_of_windows( + std::vector& finished_windows) { - // Move the blocks into the bins. for (const SpreadingWindow& window : finished_windows) { // Get the bin at the center of the window. vtr::Point center = get_center_of_rect(window.region); @@ -1079,10 +1522,4 @@ void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) { density_manager_->insert_block_into_bin(blk_id, bin_id); } } - - // Verify that the bins are valid before export. - VTR_ASSERT(density_manager_->verify()); - - // Export the legalized placement to the partial placement. - density_manager_->export_placement_from_bins(p_placement); } diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h index 2921465fae3..a6cf5d30bc9 100644 --- a/vpr/src/analytical_place/partial_legalizer.h +++ b/vpr/src/analytical_place/partial_legalizer.h @@ -13,12 +13,16 @@ #pragma once +#include #include #include #include "ap_netlist_fwd.h" #include "flat_placement_bins.h" #include "flat_placement_density_manager.h" +#include "model_grouper.h" #include "primitive_vector.h" +#include "vtr_geometry.h" +#include "vtr_prefix_sum.h" #include "vtr_vector.h" // Forward declarations @@ -90,6 +94,7 @@ class PartialLegalizer { std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, const APNetlist& netlist, std::shared_ptr density_manager, + const Prepacker& prepacker, int log_verbosity); /** @@ -240,6 +245,97 @@ class FlowBasedLegalizer : public PartialLegalizer { void legalize(PartialPlacement& p_placement) final; }; +/** + * @brief A cluster of flat placement bins. + */ +typedef typename std::vector FlatPlacementBinCluster; + +/** + * @brief Enum for the direction of a partition. + */ +enum class e_partition_dir { + VERTICAL, + HORIZONTAL +}; + +/** + * @brief Spatial window used to spread the blocks contained within. + * + * This window's region is identified and grown until it has enough space to + * accomodate the blocks stored within. This window is then successivly + * partitioned until it is small enough (blocks are not too dense). + */ +struct SpreadingWindow { + /// @brief The blocks contained within this window. + std::vector contained_blocks; + + /// @brief The 2D region of space that this window covers. + vtr::Rect region; +}; + +/** + * @brief Struct to hold the information from partitioning a window. Contains + * the two window partitions and some information about how they were + * generated. + */ +struct PartitionedWindow { + /// @brief The direction of the partition. + e_partition_dir partition_dir; + + /// @brief The position that the parent window was split at. + double pivot_pos; + + /// @brief The lower window. This is the left partition when the direction + /// is vertical, and the bottom partition when the direction is + /// horizontal. + SpreadingWindow lower_window; + + /// @brief The upper window. This is the right partition when the direction + /// is vertical, and the top partition when the direction is + /// horizontal. + SpreadingWindow upper_window; +}; + +/** + * @brief Wrapper class around the prefix sum class which creates a prefix sum + * for each model type and has helper methods for getting the sums over + * regions. + */ +class PerModelPrefixSum2D { + public: + PerModelPrefixSum2D() = default; + + /** + * @brief Construct prefix sums for each of the models in the architecture. + * + * Uses the density manager to get the size of the placeable region. + * + * The lookup is a lambda used to populate the prefix sum. It provides + * the model index, x, and y to be populated. + */ + PerModelPrefixSum2D(const FlatPlacementDensityManager& density_manager, + t_model* user_models, + t_model* library_models, + std::function lookup); + + /** + * @brief Get the sum for a given model over the given region. + */ + float get_model_sum(int model_index, + const vtr::Rect& region) const; + + /** + * @brief Get the multi-dimensional sum over the given model indices over + * the given region. + */ + PrimitiveVector get_sum(const std::vector& model_indices, + const vtr::Rect& region) const; + + private: + /// @brief Per-Model Prefix Sums + std::vector> model_prefix_sum_; +}; + /** * @brief A bi-paritioning spreading full legalizer. * @@ -258,6 +354,19 @@ class FlowBasedLegalizer : public PartialLegalizer { * GPlace3.0: https://doi.org/10.1145/3233244 */ class BiPartitioningPartialLegalizer : public PartialLegalizer { + private: + /// @brief The maximum gap between overfilled bins we can have in a flat + /// placement bin cluster. For example, if this is set to 1, we will + /// allow two overfilled bins to be clustered together if they only + /// have 1 non-overfilled bin of gap between them. + /// The rational behind this is that it allows us to predict that the windows + /// created for each cluster will overlap if they are within some gap distance. + /// Increasing this number too much may cluster bins together too much and + /// create large windows; decreasing this number will put more pressure on + /// the window generation code, which can increase window size and runtime. + /// TODO: Should this be distance instead of number of bins? + static constexpr int max_bin_cluster_gap_ = 1; + public: /** * @brief Constructor for the bi-partitioning partial legalizer. @@ -267,6 +376,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer { */ BiPartitioningPartialLegalizer(const APNetlist& netlist, std::shared_ptr density_manager, + const Prepacker& prepacker, int log_verbosity); /** @@ -278,8 +388,130 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer { */ void legalize(PartialPlacement& p_placement) final; + private: + // ======================================================================== + // Identifying spreading windows + // ======================================================================== + + /** + * @brief Identify spreading windows which contain overfilled bins in the + * given model group on the device and do not overlap. + * + * This process is split into 4 stages: + * 1) Overfilled bins are identified and clustered. + * 2) Grow windows around the overfilled bin clusters. These windows + * will grow until there is just enough space to accomodate the blocks + * within the window (capacity of the window is larger than the utilization). + * 3) Merge overlapping windows. + * 4) Move the blocks within these window regions from their bins into + * their windows. This updates the current utilization of bins, making + * spreading easier. + * + * We identify non-overlapping windows for different model groups independtly + * for a few reasons: + * - Each model group, by design, can be spread independent of each other. + * This reduces the problem size by the number of groups. + * - Without model groups, one block placed on the wrong side of the chip + * may create a window the size of the entire chip! This would rip up and + * spread all the blocks in the chip, which is very expensive. + * - This allows us to ignore block models which are already in legal + * positions. + */ + std::vector identify_non_overlapping_windows(ModelGroupId group_id); + + /** + * @brief Identifies clusters of overfilled bins for the given model group. + * + * This locates clusters of overfilled bins which are within a given + * distance from each other. + */ + std::vector get_overfilled_bin_clusters(ModelGroupId group_id); + + /** + * @brief Creates and grows minimum spanning windows around the given + * overfilled bin clusters. + * + * Here, minimum means that the windows are just large enough such that the + * capacity of the bins within the window is larger than the utilization for + * the given model group. + */ + std::vector get_min_windows_around_clusters( + const std::vector& overfilled_bin_clusters, + ModelGroupId group_id); + + /** + * @brief Merges overlapping windows in the given vector of windows. + * + * The resulting merged windows is stored in the given windows object. + */ + void merge_overlapping_windows(std::vector& windows); + + /** + * @brief Moves the blocks out of their bins and into their window. + * + * Only blocks in the given model group will be moved. + */ + void move_blocks_into_windows(std::vector& non_overlapping_windows, + ModelGroupId group_id); + + // ======================================================================== + // Spreading blocks over windows + // ======================================================================== + + /** + * @brief Spread the blocks over each of the given non-overlapping windows. + * + * The partial placement solution from the solver is used to decide which + * window partition to put a block into. The model group this window is + * spreading over can make it more efficient to make decisions. + */ + void spread_over_windows(std::vector& non_overlapping_windows, + const PartialPlacement& p_placement, + ModelGroupId group_id); + + /** + * @brief Partition the given window into two sub-windows. + * + * We return extra information about how the window was created; for example, + * the direction of the partition (vertical / horizontal) and the position + * of the cut. + */ + PartitionedWindow partition_window(SpreadingWindow& window); + + /** + * @brief Partition the blocks in the given window into the partitioned + * windows. + * + * This is kept separate from splitting the physical window region for + * cleanliness. After this point, the window will not have any atoms in + * it. + */ + void partition_blocks_in_window(SpreadingWindow& window, + PartitionedWindow& partitioned_window, + ModelGroupId group_id, + const PartialPlacement& p_placement); + + /** + * @brief Move the blocks out of the given windows and put them back into + * the correct bin according to the window that contains them. + */ + void move_blocks_out_of_windows(std::vector& finished_windows); + private: /// @brief The density manager which manages the capacity and utilization /// of regions of the device. std::shared_ptr density_manager_; + + /// @brief Grouper object which handles grouping together models which must + /// be spread together. Models are grouped based on the pack patterns + /// that they can form with each other. + ModelGrouper model_grouper_; + + /// @brief The prefix sum for the capacity of the device, as given by the + /// density manager. We will need to get the capacity of 2D regions + /// of the device very often for this partial legalizer. This data + /// structure greatly improves the time complexity of this operation. + /// + /// This is populated in the constructor and not modified. + PerModelPrefixSum2D capacity_prefix_sum_; }; diff --git a/vpr/src/analytical_place/primitive_vector.h b/vpr/src/analytical_place/primitive_vector.h index 3297d417915..d76ae8b509d 100644 --- a/vpr/src/analytical_place/primitive_vector.h +++ b/vpr/src/analytical_place/primitive_vector.h @@ -10,8 +10,11 @@ #pragma once +#include #include #include +#include +#include "vtr_log.h" /** * @brief A sparse vector class to store an M-dimensional quantity of primitives @@ -48,9 +51,24 @@ class PrimitiveVector { * This is a common enough feature to use its own setter. */ inline void add_val_to_dim(float val, size_t dim) { - if (data_.count(dim) == 0) - data_[dim] = 0.f; - data_[dim] += val; + auto it = data_.find(dim); + if (it == data_.end()) + data_.insert({dim, val}); + else { + it->second += val; + } + } + + /** + * @brief Subtract the value to the given dimension. + */ + inline void subtract_val_from_dim(float val, size_t dim) { + auto it = data_.find(dim); + if (it == data_.end()) + data_.insert({dim, -1.0f * val}); + else { + it->second -= val; + } } /** @@ -104,19 +122,26 @@ class PrimitiveVector { */ inline PrimitiveVector& operator+=(const PrimitiveVector& rhs) { for (const auto& p : rhs.data_) { - float dim_val = get_dim_val(p.first); - set_dim_val(p.first, dim_val + p.second); + add_val_to_dim(p.second, p.first); } return *this; } + /** + * @brief Element-wise addition of this with rhs. + */ + inline PrimitiveVector operator+(const PrimitiveVector& rhs) const { + PrimitiveVector res = *this; + res += rhs; + return res; + } + /** * @brief Element-wise de-accumulation of rhs into this. */ inline PrimitiveVector& operator-=(const PrimitiveVector& rhs) { for (const auto& p : rhs.data_) { - float dim_val = get_dim_val(p.first); - set_dim_val(p.first, dim_val - p.second); + subtract_val_from_dim(p.second, p.first); } return *this; } @@ -140,6 +165,25 @@ class PrimitiveVector { return *this; } + /** + * @brief Element-wise division with a scalar. + */ + inline PrimitiveVector& operator/=(float rhs) { + for (auto& p : data_) { + p.second /= rhs; + } + return *this; + } + + /** + * @brief Element-wise division with a scalar. + */ + inline PrimitiveVector operator/(float rhs) const { + PrimitiveVector res = *this; + res /= rhs; + return res; + } + /** * @brief Returns true if any dimension of this vector is less than any * dimension of rhs; false otherwise. @@ -168,12 +212,11 @@ class PrimitiveVector { * is positive, it will not change. */ inline void relu() { - for (auto& p : data_) { - // TODO: Should remove the zero elements from the map to improve - // efficiency. - if (p.second < 0.f) - p.second = 0.f; - } + std::erase_if(data_, [](const std::pair& p) { + // Note: we erase the numbers from the map to improve the performance + // of future operations on this vector. + return p.second <= 0.0f; + }); } /** @@ -234,12 +277,36 @@ class PrimitiveVector { inline void project(const PrimitiveVector& dir) { // For each dimension of this vector, if that dimension is zero in dir // set the dimension to zero. + std::erase_if(data_, [&](const std::pair& p) { + return dir.get_dim_val(p.first) == 0.0f; + }); + } + + /** + * @brief Gets the non-zero dimensions of this vector. + */ + inline std::vector get_non_zero_dims() const { + std::vector non_zero_dims; for (auto& p : data_) { - // TODO: Instead of zeroing the dimension, it should be removed - // from the map. - if (dir.get_dim_val(p.first) == 0.f) - p.second = 0.f; + if (p.second != 0.0f) + non_zero_dims.push_back(p.first); } + return non_zero_dims; + } + + /** + * @brief Returns true if this and other do not share any non-zero dimensions. + */ + inline bool are_dims_disjoint(const PrimitiveVector& other) const { + for (const auto& p : other.data_) { + // If this and other both have a shared dimension, then they are not + // perpendicular. + if (p.second != 0.0f && get_dim_val(p.first) != 0.0f) { + return false; + } + } + // If they do not share any dimensions, then they are perpendicular. + return true; } /** @@ -268,4 +335,13 @@ class PrimitiveVector { } return res; } + + /** + * @brief Debug printing method. + */ + inline void print() const { + for (const auto& p : data_) { + VTR_LOG("(%zu, %f)\n", p.first, p.second); + } + } }; diff --git a/vpr/src/pack/appack_context.h b/vpr/src/pack/appack_context.h index 9ec11fb3273..fac548d6360 100644 --- a/vpr/src/pack/appack_context.h +++ b/vpr/src/pack/appack_context.h @@ -62,14 +62,16 @@ struct t_appack_options { // We use the following gain attenuation function: // attenuation = { 1 - (quad_fac * d)^2 if d < dist_th // { 1 / sqrt(d - sqrt_offset) if d >= dist_th + // The numbers below were empirically found to work well. + // Distance threshold which decides when to use quadratic decay or inverted // sqrt decay. If the distance is less than this threshold, quadratic decay // is used. Inverted sqrt is used otherwise. - float dist_th = 1.0f; + float dist_th = 5.0f; // Horizontal offset to the inverted sqrt decay. - float sqrt_offset = -2.9f; + float sqrt_offset = -1.1f; // Scaling factor for the quadratic decay term. - float quad_fac = 0.7f; + float quad_fac = 0.1543f; // =========== Candidate selection distance ============================ // // When selecting candidates, what distance from the cluster will we diff --git a/vpr/src/pack/prepack.h b/vpr/src/pack/prepack.h index e521908d251..5222046ddb6 100644 --- a/vpr/src/pack/prepack.h +++ b/vpr/src/pack/prepack.h @@ -286,6 +286,13 @@ class Prepacker { return chain_info_.size(); } + /** + * @brief Get a list of all the pack patterns in the architecture. + */ + inline const std::vector& get_all_pack_patterns() const { + return list_of_pack_patterns; + } + private: /** * Pre-pack atoms in netlist to molecules diff --git a/vpr/test/test_ap_primitive_vector.cpp b/vpr/test/test_ap_primitive_vector.cpp index 7a29334e939..425f4e20f35 100644 --- a/vpr/test/test_ap_primitive_vector.cpp +++ b/vpr/test/test_ap_primitive_vector.cpp @@ -8,6 +8,7 @@ * PrimitiveVector object are working as expected. */ +#include #include "catch2/catch_test_macros.hpp" #include "primitive_vector.h" @@ -310,6 +311,60 @@ TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") { res = PrimitiveVector::max(vec2, vec1); REQUIRE(res == golden); } + + SECTION("Test more operators and methods") { + PrimitiveVector vec1, vec2; + + // Subtract value from dimension + vec1.set_dim_val(0, 5.f); + vec1.subtract_val_from_dim(3.f, 0); + REQUIRE(vec1.get_dim_val(0) == 2.f); + + // Element-wise addition operator + vec1.clear(); + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(1, 2.f); + vec2.clear(); + vec2.set_dim_val(0, 3.f); + vec2.set_dim_val(1, 4.f); + PrimitiveVector vec_sum = vec1 + vec2; + REQUIRE(vec_sum.get_dim_val(0) == 4.f); + REQUIRE(vec_sum.get_dim_val(1) == 6.f); + + // Element-wise division operator + vec1.clear(); + vec1.set_dim_val(0, 10.f); + vec1.set_dim_val(1, 20.f); + vec1 /= 2.f; + REQUIRE(vec1.get_dim_val(0) == 5.f); + REQUIRE(vec1.get_dim_val(1) == 10.f); + + // Element-wise division operator (const) + vec1.clear(); + vec1.set_dim_val(0, 10.f); + vec1.set_dim_val(1, 20.f); + PrimitiveVector vec_div = vec1 / 2.f; + REQUIRE(vec_div.get_dim_val(0) == 5.f); + REQUIRE(vec_div.get_dim_val(1) == 10.f); + + // Get non-zero dimensions + vec1.clear(); + vec1.set_dim_val(0, 1.f); + vec1.set_dim_val(2, 3.f); + std::vector non_zero_dims = vec1.get_non_zero_dims(); + REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 0) != non_zero_dims.end()); + REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 2) != non_zero_dims.end()); + REQUIRE(std::find(non_zero_dims.begin(), non_zero_dims.end(), 1) == non_zero_dims.end()); + + // Test orthogonal vectors + vec1.clear(); + vec2.clear(); + vec1.set_dim_val(0, 1.f); + vec2.set_dim_val(1, 2.f); + REQUIRE(vec1.are_dims_disjoint(vec2)); + vec2.set_dim_val(0, 3.f); + REQUIRE(!vec1.are_dims_disjoint(vec2)); + } } } // namespace diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt index 54b30cafac6..f132845c781 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt @@ -1,5 +1,5 @@ - arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time - k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 6.15 vpr 74.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 9 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76312 9 19 897 28 0 768 114 16 16 256 -1 mcnc_medium -1 -1 7446 10050 1525 5847 2678 74.5 MiB 1.98 0.01 5.22187 -85.9445 -5.22187 nan 0.05 0.00204197 0.00165471 0.0922108 0.0778153 74.5 MiB 1.98 74.5 MiB 1.87 12280 16.0104 3195 4.16558 8207 35340 1711962 391448 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 35 28900 206586 -1 5.82297 nan -93.0212 -5.82297 0 0 0.19 -1 -1 74.5 MiB 0.60 0.268738 0.231571 74.5 MiB -1 0.05 - k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.87 vpr 75.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77752 256 245 954 501 0 711 554 22 22 484 -1 mcnc_large -1 -1 8904 66500 1807 14947 49746 75.9 MiB 0.88 0.01 4.19633 -806.67 -4.19633 nan 0.07 0.00209601 0.00184942 0.0749397 0.0672821 75.9 MiB 0.88 75.9 MiB 0.87 12620 17.7496 3382 4.75668 3608 8619 480767 96513 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 15 47664 245996 -1 4.54897 nan -867.702 -4.54897 0 0 0.22 -1 -1 75.9 MiB 0.19 0.170591 0.156391 75.9 MiB -1 0.07 - k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 19.24 vpr 103.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 289 10 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 106224 10 10 2659 20 0 2320 309 22 22 484 -1 mcnc_large -1 -1 33337 60861 15622 40285 4954 103.7 MiB 7.37 0.03 7.08906 -67.526 -7.08906 nan 0.15 0.00509718 0.00406142 0.35604 0.28949 103.7 MiB 7.37 103.7 MiB 7.07 48698 20.9905 12433 5.35905 17466 71913 3700066 508136 2.15576e+07 1.55754e+07 3.51389e+06 7260.09 20 64568 594370 -1 7.09981 nan -68.5294 -7.09981 0 0 0.63 -1 -1 103.7 MiB 1.13 0.742979 0.632564 103.7 MiB -1 0.15 - k6_frac_N10_40nm.xml seq.pre-vpr.blif common 5.15 vpr 75.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 85 41 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77640 41 35 1006 76 0 827 161 16 16 256 -1 mcnc_medium -1 -1 8073 13708 1574 6075 6059 75.8 MiB 1.93 0.01 5.2078 -150.175 -5.2078 nan 0.05 0.00241319 0.00198256 0.0910059 0.0770604 75.8 MiB 1.93 75.8 MiB 1.81 13112 15.8549 3429 4.14631 6281 26105 949531 164260 1.05632e+07 4.58099e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.48717 nan -159.221 -5.48717 0 0 0.19 -1 -1 75.8 MiB 0.33 0.222488 0.193946 75.8 MiB -1 0.05 + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.94 vpr 74.77 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 80 9 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76564 9 19 897 28 0 624 108 16 16 256 -1 mcnc_medium -1 -1 10315 6596 9617 1559 5516 2542 74.8 MiB 1.83 0.01 6.75959 5.07271 -83.5391 -5.07271 nan 0.05 0.00162447 0.001265 0.077793 0.0643277 74.8 MiB 1.83 74.8 MiB 1.37 11052 17.7400 2817 4.52167 5101 22566 851127 138852 1.05632e+07 4.31152e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.37355 nan -88.7113 -5.37355 0 0 0.20 -1 -1 74.8 MiB 0.27 0.190594 0.164391 74.8 MiB -1 0.05 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.43 vpr 75.06 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 256 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76860 256 245 954 501 0 589 560 22 22 484 -1 mcnc_large -1 -1 10234 7797 51314 1070 11670 38574 75.1 MiB 0.67 0.01 6.53248 4.02447 -785.149 -4.02447 nan 0.07 0.00226809 0.00205398 0.0618196 0.0559082 75.1 MiB 0.67 75.1 MiB 0.37 10533 17.8829 2862 4.85908 2507 5465 336298 76364 2.15576e+07 3.17975e+06 1.49107e+06 3080.73 19 47664 245996 -1 4.35047 nan -842.961 -4.35047 0 0 0.22 -1 -1 75.1 MiB 0.19 0.175627 0.161726 75.1 MiB -1 0.07 + k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 18.05 vpr 102.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 283 10 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 104988 10 10 2659 20 0 1537 303 22 22 484 -1 mcnc_large -1 -1 38269 26758 56238 15119 35900 5219 102.5 MiB 7.03 0.02 10.0331 6.59208 -63.1998 -6.59208 nan 0.16 0.00681329 0.00553283 0.410131 0.342368 102.5 MiB 7.03 102.5 MiB 4.80 40340 26.2459 10213 6.64476 10566 57669 2722491 354615 2.15576e+07 1.5252e+07 3.51389e+06 7260.09 18 64568 594370 -1 6.59758 nan -64.3078 -6.59758 0 0 0.64 -1 -1 102.5 MiB 0.98 0.800154 0.691255 102.5 MiB -1 0.16 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.80 vpr 75.61 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 41 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77428 41 35 1006 76 0 667 163 16 16 256 -1 mcnc_medium -1 -1 11495 7037 12623 1276 5735 5612 75.6 MiB 1.80 0.01 6.34209 4.94158 -140.443 -4.94158 nan 0.05 0.00182801 0.00144126 0.0708206 0.0592281 75.6 MiB 1.80 75.6 MiB 1.33 11301 16.9430 2961 4.43928 4738 21343 723412 125961 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 18 28900 206586 -1 5.29948 nan -148.755 -5.29948 0 0 0.19 -1 -1 75.6 MiB 0.26 0.191646 0.16645 75.6 MiB -1 0.05 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt index 787b532b0b7..6597f69926e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/none_detailed_placer/config/golden_results.txt @@ -1,4 +1,4 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time -k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.74 vpr 74.21 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 9 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 75996 9 19 897 28 0 768 114 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.66 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.66 74.2 MiB 1.55 17094 22.2868 4573 5.96219 5603 20605 894991 145381 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 18 28900 206586 -1 6.8999 nan -108.582 -6.8999 0 0 0.19 -1 -1 74.2 MiB 0.30 0.11634 0.103759 74.2 MiB -1 0.05 -k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.23 vpr 74.90 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76700 256 245 954 501 0 711 554 22 22 484 -1 mcnc_large -1 -1 -1 -1 -1 -1 -1 74.9 MiB 0.48 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.9 MiB 0.48 74.9 MiB 0.47 14934 21.0042 3961 5.57103 3454 8241 562985 107042 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 15 47664 245996 -1 5.95192 nan -973.234 -5.95192 0 0 0.22 -1 -1 74.9 MiB 0.20 0.0953982 0.0888954 74.9 MiB -1 0.07 -k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.68 vpr 75.69 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 85 41 -1 -1 success v8.0.0-12241-g26615cb38 release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-12T19:05:19 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77504 41 35 1006 76 0 827 161 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 75.7 MiB 1.57 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.7 MiB 1.57 75.7 MiB 1.44 19170 23.1802 5187 6.27207 6058 23325 1081692 174542 1.05632e+07 4.58099e+06 1.26944e+06 4958.75 18 28900 206586 -1 6.76552 nan -194.633 -6.76552 0 0 0.20 -1 -1 75.7 MiB 0.35 0.128528 0.115002 75.7 MiB -1 0.06 + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 4.57 vpr 74.60 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 80 9 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76392 9 19 897 28 0 624 108 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 -1 74.6 MiB 1.65 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.6 MiB 1.65 74.6 MiB 1.48 14371 23.0674 3784 6.07384 4075 16657 665456 103737 1.05632e+07 4.31152e+06 1.26944e+06 4958.75 17 28900 206586 -1 6.63192 nan -103.794 -6.63192 0 0 0.19 -1 -1 74.6 MiB 0.27 0.120295 0.107523 74.6 MiB -1 0.05 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 1.93 vpr 75.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 256 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77456 256 245 954 501 0 589 560 22 22 484 -1 mcnc_large -1 -1 -1 -1 -1 -1 -1 -1 75.6 MiB 0.38 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.6 MiB 0.38 75.6 MiB 0.37 12828 21.7793 3449 5.85569 2290 4763 363294 72848 2.15576e+07 3.17975e+06 1.49107e+06 3080.73 12 47664 245996 -1 6.32147 nan -1032.91 -6.32147 0 0 0.22 -1 -1 75.6 MiB 0.16 0.08541 0.0798207 75.6 MiB -1 0.07 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 4.34 vpr 75.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 41 -1 -1 success v8.0.0-12284-g0a886e4da-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-19T20:42:32 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77332 41 35 1006 76 0 667 163 16 16 256 -1 mcnc_medium -1 -1 -1 -1 -1 -1 -1 -1 75.5 MiB 1.46 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 75.5 MiB 1.46 75.5 MiB 1.27 15928 23.8801 4303 6.45127 4201 18009 720686 116311 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 16 28900 206586 -1 6.42149 nan -177.756 -6.42149 0 0 0.20 -1 -1 75.5 MiB 0.28 0.122598 0.110096 75.5 MiB -1 0.05