diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index c77a07fa000..a40c4042432 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -1188,6 +1188,16 @@ Analytical Placement is generally split into three stages: Analytical Placement is experimental and under active development. +.. option:: --ap_global_placer {quadratic-bipartitioning-lookahead | quadratic-flowbased-lookahead} + + Controls which Global Placer to use in the AP Flow. + + * ``quadratic-bipartitioning-lookahead`` Use a Global Placer which uses a quadratic solver and a bi-partitioning lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution. + + * ``quadratic-flowbased-lookahead`` Use a Global Placer which uses a quadratic solver and a multi-commodity-flow-based lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution. + + **Default:** ``quadratic-bipartitioning-lookahead`` + .. option:: --ap_full_legalizer {naive | appack} Controls which Full Legalizer to use in the AP Flow. @@ -1208,6 +1218,23 @@ Analytical Placement is generally split into three stages: **Default:** ``annealer`` +.. option:: --ap_verbosity + + Controls the verbosity of the AP flow output. + Larger values produce more detailed output, which may be useful for + debugging the algorithms in the AP flow. + + * ``1 <= verbosity < 10`` Print standard, stage-level messages. This will + print messages at the GP, FL, or DP level. + + * ``10 <= verbosity < 20`` Print more detailed messages of what is happening + within stages. For example, show high-level information on the legalization + iterations within the Global Placer. + + * ``20 <= verbosity`` Print very detailed messages on intra-stage algorithms. + + **Default:** ``1`` + .. _router_options: diff --git a/libs/libvtrutil/src/vtr_geometry.h b/libs/libvtrutil/src/vtr_geometry.h index 44a39764b5a..3c45a7a8379 100644 --- a/libs/libvtrutil/src/vtr_geometry.h +++ b/libs/libvtrutil/src/vtr_geometry.h @@ -180,6 +180,9 @@ class Rect { ///@brief Returns true if other is contained within the rectangle (including all edges) bool contains(const Rect& other) const; + ///@brief Returns true if other strictly overlaps this rectangle (two rectangles that only share an edge do not overlap) + bool strictly_overlaps(const Rect& other) const; + /** * @brief Checks whether the rectangle is empty * diff --git a/libs/libvtrutil/src/vtr_geometry.tpp b/libs/libvtrutil/src/vtr_geometry.tpp index e2dcc9d3a3c..6d421a856db 100644 --- a/libs/libvtrutil/src/vtr_geometry.tpp +++ b/libs/libvtrutil/src/vtr_geometry.tpp @@ -187,6 +187,12 @@ bool Rect::contains(const Rect& other) const { && other.ymin() >= ymin() && other.ymax() <= ymax(); } +template +bool Rect::strictly_overlaps(const Rect& other) const { + return xmin() < other.xmax() && xmax() > other.xmin() + && ymax() > other.ymin() && ymin() < other.ymax(); +} + template bool Rect::empty() const { return xmax() <= xmin() || ymax() <= ymin(); diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp index 089995e1cad..ca35516b463 100644 --- a/vpr/src/analytical_place/analytical_placement_flow.cpp +++ b/vpr/src/analytical_place/analytical_placement_flow.cpp @@ -114,7 +114,11 @@ static void convert_flat_to_partial_placement(const FlatPlacementInfo& flat_plac * @brief If a flat placement is provided, skips the Global Placer and * converts it to a partial placement. Otherwise, runs the Global Placer. */ -static PartialPlacement run_global_placer(const AtomNetlist& atom_nlist, const APNetlist& ap_netlist, const Prepacker& prepacker, const DeviceContext& device_ctx) { +static PartialPlacement run_global_placer(const t_ap_opts& ap_opts, + const AtomNetlist& atom_nlist, + const APNetlist& ap_netlist, + const Prepacker& prepacker, + const DeviceContext& device_ctx) { if (g_vpr_ctx.atom().flat_placement_info().valid) { VTR_LOG("Flat Placement is provided in the AP flow, skipping the Global Placement.\n"); PartialPlacement p_placement(ap_netlist); @@ -125,13 +129,14 @@ static PartialPlacement run_global_placer(const AtomNetlist& atom_nlist, const A return p_placement; } else { // Run the Global Placer - std::unique_ptr global_placer = make_global_placer(e_global_placer::SimPL, + std::unique_ptr global_placer = make_global_placer(ap_opts.global_placer_type, ap_netlist, prepacker, atom_nlist, device_ctx.grid, device_ctx.logical_block_types, - device_ctx.physical_tile_types); + device_ctx.physical_tile_types, + ap_opts.log_verbosity); return global_placer->place(); } } @@ -156,7 +161,9 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { print_ap_netlist_stats(ap_netlist); // Run the Global Placer. - PartialPlacement p_placement = run_global_placer(atom_nlist, + const t_ap_opts& ap_opts = vpr_setup.APOpts; + PartialPlacement p_placement = run_global_placer(ap_opts, + atom_nlist, ap_netlist, prepacker, device_ctx); @@ -171,7 +178,6 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { device_ctx.grid.get_num_layers())); // Run the Full Legalizer. - const t_ap_opts& ap_opts = vpr_setup.APOpts; std::unique_ptr full_legalizer = make_full_legalizer(ap_opts.full_legalizer_type, ap_netlist, atom_nlist, diff --git a/vpr/src/analytical_place/ap_flow_enums.h b/vpr/src/analytical_place/ap_flow_enums.h index 6a1148499c1..0c30d74234f 100644 --- a/vpr/src/analytical_place/ap_flow_enums.h +++ b/vpr/src/analytical_place/ap_flow_enums.h @@ -7,6 +7,18 @@ #pragma once +/** + * @brief The type of a Global Placer. + * + * The Analytical Placement flow may implement different Global Placers. This + * enum can select between these different Global Placers. + */ +enum class e_ap_global_placer { + // Global placers based on the the SimPL paper. + SimPL_BiParitioning, ///< Global Placer based on the SimPL technique to Global Placement. Uses a quadratic solver and a bi-partitioning Partial Legalizer. + SimPL_FlowBased ///< Global Placer based on the SimPL technique to Global Placement. Uses a quadratic solver and a multi-commodity-flow-baed Partial Legalizer. +}; + /** * @brief The type of a Full Legalizer. * diff --git a/vpr/src/analytical_place/global_placer.cpp b/vpr/src/analytical_place/global_placer.cpp index 9a60e7fa4ae..7c667295c02 100644 --- a/vpr/src/analytical_place/global_placer.cpp +++ b/vpr/src/analytical_place/global_placer.cpp @@ -11,6 +11,7 @@ #include #include #include "analytical_solver.h" +#include "ap_flow_enums.h" #include "ap_netlist.h" #include "atom_netlist.h" #include "device_grid.h" @@ -22,22 +23,34 @@ #include "vtr_log.h" #include "vtr_time.h" -std::unique_ptr make_global_placer(e_global_placer placer_type, +std::unique_ptr make_global_placer(e_ap_global_placer placer_type, const APNetlist& ap_netlist, const Prepacker& prepacker, const AtomNetlist& atom_netlist, const DeviceGrid& device_grid, const std::vector& logical_block_types, - const std::vector& physical_tile_types) { + const std::vector& physical_tile_types, + int log_verbosity) { // Based on the placer type passed in, build the global placer. switch (placer_type) { - case e_global_placer::SimPL: - return std::make_unique(ap_netlist, + case e_ap_global_placer::SimPL_BiParitioning: + return std::make_unique(e_partial_legalizer::BI_PARTITIONING, + ap_netlist, prepacker, atom_netlist, device_grid, logical_block_types, - physical_tile_types); + physical_tile_types, + log_verbosity); + case e_ap_global_placer::SimPL_FlowBased: + return std::make_unique(e_partial_legalizer::FLOW_BASED, + ap_netlist, + prepacker, + atom_netlist, + device_grid, + logical_block_types, + physical_tile_types, + log_verbosity); default: VPR_FATAL_ERROR(VPR_ERROR_AP, "Unrecognized global placer type"); @@ -45,13 +58,15 @@ std::unique_ptr make_global_placer(e_global_placer placer_type, } } -SimPLGlobalPlacer::SimPLGlobalPlacer(const APNetlist& ap_netlist, +SimPLGlobalPlacer::SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type, + const APNetlist& ap_netlist, const Prepacker& prepacker, const AtomNetlist& atom_netlist, const DeviceGrid& device_grid, const std::vector& logical_block_types, - const std::vector& physical_tile_types) - : GlobalPlacer(ap_netlist) { + const std::vector& physical_tile_types, + int log_verbosity) + : GlobalPlacer(ap_netlist, log_verbosity) { // This can be a long method. Good to time this to see how long it takes to // construct the global placer. vtr::ScopedStartFinishTimer global_placer_building_timer("Constructing Global Placer"); @@ -67,9 +82,10 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(const APNetlist& ap_netlist, physical_tile_types, log_verbosity_); // Build the partial legalizer - partial_legalizer_ = make_partial_legalizer(e_partial_legalizer::FLOW_BASED, + partial_legalizer_ = make_partial_legalizer(partial_legalizer_type, ap_netlist_, - density_manager_); + density_manager_, + log_verbosity_); } /** diff --git a/vpr/src/analytical_place/global_placer.h b/vpr/src/analytical_place/global_placer.h index 91753ff10ff..82bad07b4d9 100644 --- a/vpr/src/analytical_place/global_placer.h +++ b/vpr/src/analytical_place/global_placer.h @@ -15,7 +15,9 @@ #pragma once #include +#include "ap_flow_enums.h" #include "flat_placement_density_manager.h" +#include "partial_legalizer.h" // Forward declarations class APNetlist; @@ -24,13 +26,6 @@ class PartialLegalizer; class Prepacker; struct PartialPlacement; -/** - * @brief Enumeration of all of the global placers currently implemented in VPR. - */ -enum class e_global_placer { - SimPL // Global placer based on the SimPL paper. -}; - /** * @brief The Global Placer base class * @@ -52,7 +47,7 @@ class GlobalPlacer { * @param log_verbosity The verbosity of log messages in the Global * Placer. */ - GlobalPlacer(const APNetlist& ap_netlist, int log_verbosity = 1) + GlobalPlacer(const APNetlist& ap_netlist, int log_verbosity) : ap_netlist_(ap_netlist), log_verbosity_(log_verbosity) {} @@ -78,13 +73,14 @@ class GlobalPlacer { /** * @brief A factory method which creates a Global Placer of the given type. */ -std::unique_ptr make_global_placer(e_global_placer placer_type, +std::unique_ptr make_global_placer(e_ap_global_placer placer_type, const APNetlist& ap_netlist, const Prepacker& prepacker, const AtomNetlist& atom_netlist, const DeviceGrid& device_grid, const std::vector& logical_block_types, - const std::vector& physical_tile_types); + const std::vector& physical_tile_types, + int log_verbosity); /** * @brief A Global Placer based on the SimPL work for analytical ASIC placement. @@ -140,12 +136,14 @@ class SimPLGlobalPlacer : public GlobalPlacer { * * Constructs the solver and partial legalizer. */ - SimPLGlobalPlacer(const APNetlist& ap_netlist, + SimPLGlobalPlacer(e_partial_legalizer partial_legalizer_type, + const APNetlist& ap_netlist, const Prepacker& prepacker, const AtomNetlist& atom_netlist, const DeviceGrid& device_grid, const std::vector& logical_block_types, - const std::vector& physical_tile_types); + const std::vector& physical_tile_types, + int log_verbosity); /** * @brief Run a SimPL-like global placement algorithm diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp index 3a912090ec8..072cd8ebd19 100644 --- a/vpr/src/analytical_place/partial_legalizer.cpp +++ b/vpr/src/analytical_place/partial_legalizer.cpp @@ -31,17 +31,25 @@ #include "vtr_assert.h" #include "vtr_geometry.h" #include "vtr_log.h" +#include "vtr_prefix_sum.h" #include "vtr_strong_id.h" #include "vtr_vector.h" #include "vtr_vector_map.h" std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, const APNetlist& netlist, - std::shared_ptr density_manager) { + std::shared_ptr density_manager, + int log_verbosity) { // Based on the partial legalizer type passed in, build the partial legalizer. switch (legalizer_type) { case e_partial_legalizer::FLOW_BASED: - return std::make_unique(netlist, density_manager); + return std::make_unique(netlist, + density_manager, + log_verbosity); + case e_partial_legalizer::BI_PARTITIONING: + return std::make_unique(netlist, + density_manager, + log_verbosity); default: VPR_FATAL_ERROR(VPR_ERROR_AP, "Unrecognized partial legalizer type"); @@ -252,8 +260,9 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(FlatPlacementBinId src_bin_id, } FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist, - std::shared_ptr density_manager) - : PartialLegalizer(netlist) + std::shared_ptr density_manager, + int log_verbosity) + : PartialLegalizer(netlist, log_verbosity) , density_manager_(density_manager) , bin_neighbors_(density_manager_->flat_placement_bins().bins().size()) { @@ -692,3 +701,395 @@ void FlowBasedLegalizer::legalize(PartialPlacement &p_placement) { density_manager_->export_placement_from_bins(p_placement); } +// This namespace contains enums and classes used for bi-partitioning. +namespace { + +/** + * @brief Enum for the direction of a partition. + */ +enum class e_partition_dir { + VERTICAL, + HORIZONTAL +}; + +/** + * @brief Spatial window used to spread the blocks contained within. + * + * This window's region is identified and grown until it has enough space to + * accomodate the blocks stored within. This window is then successivly + * partitioned until it is small enough (blocks are not too dense). + */ +struct SpreadingWindow { + /// @brief The blocks contained within this window. + std::vector contained_blocks; + + /// @brief The 2D region of space that this window covers. + vtr::Rect region; +}; + +} // namespace + +BiPartitioningPartialLegalizer::BiPartitioningPartialLegalizer( + const APNetlist& netlist, + std::shared_ptr density_manager, + int log_verbosity) + : PartialLegalizer(netlist, log_verbosity) + , density_manager_(density_manager) {} + +/** + * @brief Identify spreading windows which contain overfilled bins on the device + * and do not overlap. + * + * This process is split into 3 stages: + * 1) Identify overfilled bins and grow windows around them. These windows + * will grow until there is just enough space to accomodate the blocks + * within the window (capacity of the window is larger than the utilization). + * 2) Merge overlapping windows. + * 3) Move the blocks within these window regions from their bins into + * their windows. This updates the current utilization of bins, making + * spreading easier. + */ +static std::vector identify_non_overlapping_windows( + const APNetlist& netlist, + FlatPlacementDensityManager& density_manager) { + // Identify overfilled bins + const std::unordered_set& overfilled_bins = density_manager.get_overfilled_bins(); + + // Create a prefix sum for the capacity. + // We will need to get the capacity of 2D regions of the device very often + // in the algorithm below. This greatly improves the time complexity. + // TODO: This should not change between iterations of spreading. This can + // be moved to the constructor. + size_t width, height, layers; + std::tie(width, height, layers) = density_manager.get_overall_placeable_region_size(); + vtr::PrefixSum2D capacity_prefix_sum(width, height, [&](size_t x, size_t y) { + FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); + // For now we take the L1 norm of the bin divided by its area. + // The L1 norm is just a count of the number of primitives that + // can fit into the bin (without caring for primitive type). We + // divide by area such that large bins (1x4 for example) get + // normalized to 1x1 regions. + const vtr::Rect& bin_region = density_manager.flat_placement_bins().bin_region(bin_id); + float bin_area = bin_region.width() * bin_region.height(); + return density_manager.get_bin_capacity(bin_id).manhattan_norm() / bin_area; + }); + + // Create a prefix sum for the utilization. + // The utilization of the bins will change between routing iterations, so + // this prefix sum must be recomputed. + vtr::PrefixSum2D utilization_prefix_sum(width, height, [&](size_t x, size_t y) { + FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); + // This is computed the same way as the capacity prefix sum above. + const vtr::Rect& bin_region = density_manager.flat_placement_bins().bin_region(bin_id); + float bin_area = bin_region.width() * bin_region.height(); + return density_manager.get_bin_utilization(bin_id).manhattan_norm() / bin_area; + }); + + // 1) For each of the overfilled bins, create and store a minimum window. + // TODO: This is a very simple algorithm which currently only uses the number + // of primitives within the regions, not the primitive types. Need to + // investigate this further. + // TODO: Currently, we greedily grow the region by 1 in all directions until + // the capacity is larger than the utilization. This may not produce + // the minimum window. Should investigate "touching-up" the windows. + std::vector windows; + for (FlatPlacementBinId bin_id : overfilled_bins) { + // Create a new window for this bin. + SpreadingWindow new_window; + // Initialize the region to the region of the bin. + new_window.region = density_manager.flat_placement_bins().bin_region(bin_id); + vtr::Rect& region = new_window.region; + while (true) { + // Grow the region by 1 on all sides. + double new_xmin = std::clamp(region.xmin() - 1.0, 0.0, width); + double new_xmax = std::clamp(region.xmax() + 1.0, 0.0, width); + double new_ymin = std::clamp(region.ymin() - 1.0, 0.0, height); + double new_ymax = std::clamp(region.ymax() + 1.0, 0.0, height); + + // If the region did not grow, exit. This is a maximal bin. + // TODO: Maybe print warning. + if (new_xmin == region.xmin() && new_xmax == region.xmax() && + new_ymin == region.ymin() && new_ymax == region.ymax()) { + break; + } + + // If the utilization is lower than the capacity, stop growing. + region.set_xmin(new_xmin); + region.set_xmax(new_xmax); + region.set_ymin(new_ymin); + region.set_ymax(new_ymax); + float region_capacity = capacity_prefix_sum.get_sum(region.xmin(), + region.ymin(), + region.xmax() - 1, + region.ymax() - 1); + + float region_utilization = utilization_prefix_sum.get_sum(region.xmin(), + region.ymin(), + region.xmax() - 1, + region.ymax() - 1); + if (region_utilization < region_capacity) + break; + } + // Insert this window into the list of windows. + windows.emplace_back(std::move(new_window)); + } + + // 2) Merge overlapping bins and store into new array. + // TODO: This is a very basic merging process which will identify the + // minimum region containing both windows; however, after merging it + // is very likely that this window will now be too large. Need to + // investigate shrinking the windows after merging. + // TODO: I am not sure if it is possible, but after merging 2 windows, the + // new window may overlap with another window that has been already + // created. This should not cause issues with the algorithm since one + // of the new windows will just be empty, but it is not ideal. + // FIXME: This loop is O(N^2) with the number of overfilled bins which may + // get expensive as the circuit sizes increase. Should investigate + // spatial sorting structures (like kd-trees) to help keep this fast. + // Another idea is to merge windows early on (before growing them). + std::vector non_overlapping_windows; + size_t num_windows = windows.size(); + // Need to keep track of which windows have been merged or not to prevent + // merging windows multiple times. + std::vector finished_window(num_windows, false); + for (size_t i = 0; i < num_windows; i++) { + // If the window has already been finished (merged), nothing to do. + if (finished_window[i]) + continue; + + // Check for overlaps between this window and the future windows and + // update the region accordingly. + vtr::Rect& region = windows[i].region; + for (size_t j = i + 1; j < num_windows; j++) { + // No need to check windows which have already finished. + if (finished_window[j]) + continue; + // Check for overlap + if (region.strictly_overlaps(windows[j].region)) { + // If overlap, merge with this region and mark the window as + // finished. + // Here, the merged region is the bounding box around the two + // regions. + region = vtr::bounding_box(region, windows[j].region); + finished_window[j] = true; + } + } + + // This is not strictly necessary, but marking this window as finished + // is just a nice, clean thing to do. + finished_window[i] = true; + + // Move this window into the new list of non-overlapping windows. + non_overlapping_windows.emplace_back(std::move(windows[i])); + } + + // 3) Move the blocks out of their bins and into the windows. + // TODO: It may be good for debugging to check if the windows have nothing + // to move. This may indicate a problem (overfilled bins of fixed + // blocks, overlapping windows, etc.). + for (SpreadingWindow& window : non_overlapping_windows) { + // Iterate over all bins that this window covers. + // TODO: This is a bit crude and should somehow be made more robust. + size_t lower_x = window.region.xmin(); + size_t upper_x = window.region.xmax() - 1; + size_t lower_y = window.region.ymin(); + size_t upper_y = window.region.ymax() - 1; + for (size_t x = lower_x; x <= upper_x; x++) { + for (size_t y = lower_y; y <= upper_y; y++) { + // Get all of the movable blocks from the bin. + FlatPlacementBinId bin_id = density_manager.get_bin(x, y, 0); + std::vector moveable_blks; + moveable_blks.reserve(density_manager.flat_placement_bins().bin_contained_blocks(bin_id).size()); + for (APBlockId blk_id : density_manager.flat_placement_bins().bin_contained_blocks(bin_id)) { + if (netlist.block_mobility(blk_id) == APBlockMobility::MOVEABLE) + moveable_blks.push_back(blk_id); + } + // Remove the moveable blocks from their bins and store into + // the windows. + for (APBlockId blk_id : moveable_blks) { + density_manager.remove_block_from_bin(blk_id, bin_id); + window.contained_blocks.push_back(blk_id); + } + } + } + } + + return non_overlapping_windows; +} + +void BiPartitioningPartialLegalizer::legalize(PartialPlacement& p_placement) { + VTR_LOGV(log_verbosity_ >= 10, "Running Bi-Partitioning Legalizer\n"); + + // Prepare the density manager. + density_manager_->empty_bins(); + density_manager_->import_placement_into_bins(p_placement); + + // Quick return. If there are no overfilled bins, there is nothing to spread. + if (density_manager_->get_overfilled_bins().size() == 0) { + VTR_LOGV(log_verbosity_ >= 10, "No overfilled bins. Nothing to legalize.\n"); + return; + } + + // Identify non-overlapping spreading windows. + std::vector initial_windows = identify_non_overlapping_windows(netlist_, *density_manager_); + VTR_ASSERT(initial_windows.size() != 0); + VTR_LOGV(log_verbosity_ >= 10, + "\tIdentified %zu non-overlapping spreading windows.\n", + initial_windows.size()); + + // Insert the windows into a queue for spreading. + std::queue window_queue; + for (SpreadingWindow& window : initial_windows) { + window_queue.push(std::move(window)); + } + + // For each window in the queue: + // 1) If the window is small enough, do not partition further. + // 2) Partition the window + // 3) Partition the blocks into the window partitions + // 4) Insert the new windows into the queue + std::vector finished_windows; + while (!window_queue.empty()) { + // Get a reference to the front of the queue but do not pop it yet. We + // can save time from having to copy the element out since these windows + // contain vectors. + SpreadingWindow& window = window_queue.front(); + + // Check if the window is empty. This can happen when there is odd + // numbers of blocks or when things do not perfectly fit. + if (window.contained_blocks.empty()) { + // If the window does not contain any blocks, pop it from the queue + // and do not put it in finished windows. There is no point + // operating on it further. + window_queue.pop(); + continue; + } + + // 1) Check if the window is small enough (one bin in size). + // TODO: Perhaps we can make this stopping criteria more intelligent. + // Like stopping when we know there is only one bin within the + // window. + double window_area = window.region.width() * window.region.height(); + if (window_area <= 1.0) { + finished_windows.emplace_back(std::move(window)); + window_queue.pop(); + continue; + } + + // 2) Partition the window. + // Select the partition direction. + // To keep it simple, we partition the direction which would cut the + // region the most. + // TODO: Should explore making the partition line based on the capacity + // of the two partitioned regions. We may want to cut the + // region in half such that the mass of the atoms contained within + // the two future regions is equal. + e_partition_dir partition_dir = e_partition_dir::VERTICAL; + if (window.region.height() > window.region.width()) + partition_dir = e_partition_dir::HORIZONTAL; + + // To keep it simple, just cut the space in half. + // TODO: Should investigate other cutting techniques. Cutting perfectly + // in half may not be the most efficient technique. + SpreadingWindow lower_window; + SpreadingWindow upper_window; + if (partition_dir == e_partition_dir::VERTICAL) { + // Find the x-coordinate of a cut line directly in the middle of the + // region. We floor this to prevent fractional cut lines. + double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0); + + // Cut the region at this cut line. + lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(pivot_x, + window.region.ymax())); + + upper_window.region = vtr::Rect(vtr::Point(pivot_x, + window.region.ymin()), + vtr::Point(window.region.xmax(), + window.region.ymax())); + } else { + VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL); + // Similarly in the y direction, find the non-fractional y coordinate + // to make a horizontal cut. + double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0); + + // Then cut the window. + lower_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + window.region.ymin()), + vtr::Point(window.region.xmax(), + pivot_y)); + + upper_window.region = vtr::Rect(vtr::Point(window.region.xmin(), + pivot_y), + vtr::Point(window.region.xmax(), + window.region.ymax())); + } + + // 3) Partition the blocks. + // For now, just evenly partition the blocks based on their solved + // positions. + // TODO: This is a huge simplification. We do not even know if the lower + // partition has space for the blocks that want to be on that side! + // Instead of just using x/y position, we also need to take into + // account the mass of the blocks and ensure that there is enough + // capacity for the given block's mass. One idea is to partition + // the blocks using this basic approach and then fixing up any + // blocks that should not be on the given side (due to type or + // capacity constraints). + if (partition_dir == e_partition_dir::VERTICAL) { + // Sort the blocks in the window by the x coordinate. + std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { + return p_placement.block_x_locs[a] < p_placement.block_x_locs[b]; + }); + + } else { + VTR_ASSERT(partition_dir == e_partition_dir::HORIZONTAL); + // Sort the blocks in the window by the y coordinate. + std::sort(window.contained_blocks.begin(), window.contained_blocks.end(), [&](APBlockId a, APBlockId b) { + return p_placement.block_y_locs[a] < p_placement.block_y_locs[b]; + }); + } + + // Find the pivot block position. + size_t pivot = window.contained_blocks.size() / 2; + + // Copy the blocks to the windows based on the pivot. + for (size_t i = 0; i < pivot; i++) { + lower_window.contained_blocks.push_back(window.contained_blocks[i]); + } + for (size_t i = pivot; i < window.contained_blocks.size(); i++) { + upper_window.contained_blocks.push_back(window.contained_blocks[i]); + } + + // 4) Enqueue the new windows. + window_queue.push(std::move(lower_window)); + window_queue.push(std::move(upper_window)); + + // Pop the top element off the queue. This will invalidate the window + // object. + window_queue.pop(); + } + + // Move the blocks into the bins. + for (const SpreadingWindow& window : finished_windows) { + // Get the bin at the center of the window. + vtr::Point center = get_center_of_rect(window.region); + FlatPlacementBinId bin_id = density_manager_->get_bin(center.x(), center.y(), 0); + + // Move all blocks in the window into this bin. + for (APBlockId blk_id : window.contained_blocks) { + // Note: The blocks should have been removed from their original + // bins when they were put into the windows. There are asserts + // within the denisty manager class which will verify this. + density_manager_->insert_block_into_bin(blk_id, bin_id); + } + } + + // Verify that the bins are valid before export. + VTR_ASSERT(density_manager_->verify()); + + // Export the legalized placement to the partial placement. + density_manager_->export_placement_from_bins(p_placement); +} + diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h index 784258a0945..11311312ae5 100644 --- a/vpr/src/analytical_place/partial_legalizer.h +++ b/vpr/src/analytical_place/partial_legalizer.h @@ -31,7 +31,8 @@ struct PartialPlacement; * VPR. */ enum class e_partial_legalizer { - FLOW_BASED // Multi-commodity flow-based partial legalizer. + FLOW_BASED, // Multi-commodity flow-based partial legalizer. + BI_PARTITIONING // Bi-partitioning partial legalizer. }; /** @@ -52,8 +53,7 @@ class PartialLegalizer { * * Currently just copies the parameters into the class as member varaibles. */ - PartialLegalizer(const APNetlist& netlist, - int log_verbosity = 1) + PartialLegalizer(const APNetlist& netlist, int log_verbosity) : netlist_(netlist), log_verbosity_(log_verbosity) {} @@ -90,7 +90,8 @@ class PartialLegalizer { */ std::unique_ptr make_partial_legalizer(e_partial_legalizer legalizer_type, const APNetlist& netlist, - std::shared_ptr density_manager); + std::shared_ptr density_manager, + int log_verbosity); /** * @brief A multi-commodity flow-based spreading partial legalizer. @@ -222,14 +223,15 @@ class FlowBasedLegalizer : public PartialLegalizer { public: /** - * @brief Construcotr for the flow-based legalizer. + * @brief Constructor for the flow-based legalizer. * * Builds all of the bins, computing their capacities based on the device * description. Builds the connectivity of bins. Computes the mass of all * blocks in the netlist. */ FlowBasedLegalizer(const APNetlist& netlist, - std::shared_ptr density_manager); + std::shared_ptr density_manager, + int log_verbosity); /** * @brief Performs flow-based spreading on the given partial placement. @@ -240,3 +242,47 @@ class FlowBasedLegalizer : public PartialLegalizer { void legalize(PartialPlacement &p_placement) final; }; +/** + * @brief A bi-paritioning spreading full legalizer. + * + * This creates minimum spanning windows around overfilled bins in the device + * such that the capacity of the bins within the window is just higher than the + * current utilization of the bins within the window. These windows are then + * split in both region and contained atoms. This spatially spreads out the + * atoms within each window. This splitting continues until the windows are + * small enough and the atoms are placed. The benefit of this approach is that + * it cuts the problem size for each partition, which can yield improved + * performance when there is a lot of overfill. + * + * This technique is based on the lookahead legalizer in SimPL and the window- + * based legalization found in GPlace3.0. + * SimPL: https://doi.org/10.1145/2461256.2461279 + * GPlace3.0: https://doi.org/10.1145/3233244 + */ +class BiPartitioningPartialLegalizer : public PartialLegalizer { +public: + /** + * @brief Constructor for the bi-partitioning partial legalizer. + * + * Uses the provided denisity manager to identify the capacity and + * utilization of regions of the device. + */ + BiPartitioningPartialLegalizer(const APNetlist& netlist, + std::shared_ptr density_manager, + int log_verbosity); + + /** + * @brief Perform bi-partitioning spreading on the given partial placement. + * + * @param p_placement + * The placement to legalize. The result of the partial legalizer + * will be stored in this object. + */ + void legalize(PartialPlacement& p_placement) final; + +private: + /// @brief The density manager which manages the capacity and utilization + /// of regions of the device. + std::shared_ptr density_manager_; +}; + diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 0e06319e383..430359d3706 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -547,8 +547,10 @@ static void SetupAnnealSched(const t_options& Options, */ void SetupAPOpts(const t_options& options, t_ap_opts& apOpts) { + apOpts.global_placer_type = options.ap_global_placer.value(); apOpts.full_legalizer_type = options.ap_full_legalizer.value(); apOpts.detailed_placer_type = options.ap_detailed_placer.value(); + apOpts.log_verbosity = options.ap_verbosity.value(); } /** diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 730490fe2be..d70797c0548 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -595,6 +595,18 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { } static void ShowAnalyticalPlacerOpts(const t_ap_opts& APOpts) { + VTR_LOG("AnalyticalPlacerOpts.global_placer_type: "); + switch (APOpts.global_placer_type) { + case e_ap_global_placer::SimPL_BiParitioning: + VTR_LOG("quadratic-bipartitioning-lookahead\n"); + break; + case e_ap_global_placer::SimPL_FlowBased: + VTR_LOG("quadratic-flowbased-lookahead\n"); + break; + default: + VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown global_placer_type\n"); + } + VTR_LOG("AnalyticalPlacerOpts.full_legalizer_type: "); switch (APOpts.full_legalizer_type) { case e_ap_full_legalizer::Naive: @@ -621,6 +633,8 @@ static void ShowAnalyticalPlacerOpts(const t_ap_opts& APOpts) { default: VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown detailed_placer_type\n"); } + + VTR_LOG("AnalyticalPlacerOpts.log_verbosity: %d\n", APOpts.log_verbosity); } static void ShowNetlistOpts(const t_netlist_opts& NetlistOpts) { diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index bd9d400a80e..433e0b8f641 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -134,6 +134,41 @@ struct ParseCircuitFormat { } }; +struct ParseAPGlobalPlacer { + ConvertedValue from_str(const std::string& str) { + ConvertedValue conv_value; + if (str == "quadratic-bipartitioning-lookahead") + conv_value.set_value(e_ap_global_placer::SimPL_BiParitioning); + else if (str == "quadratic-flowbased-lookahead") + conv_value.set_value(e_ap_global_placer::SimPL_FlowBased); + else { + std::stringstream msg; + msg << "Invalid conversion from '" << str << "' to e_ap_global_placer (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + conv_value.set_error(msg.str()); + } + return conv_value; + } + + ConvertedValue to_str(e_ap_global_placer val) { + ConvertedValue conv_value; + switch (val) { + case e_ap_global_placer::SimPL_BiParitioning: + conv_value.set_value("quadratic-bipartitioning-lookahead"); + break; + case e_ap_global_placer::SimPL_FlowBased: + conv_value.set_value("quadratic-flowbased-lookahead"); + break; + default: + VTR_ASSERT(false); + } + return conv_value; + } + + std::vector default_choices() { + return {"quadratic-bipartitioning-lookahead", "quadratic-flowbased-lookahead"}; + } +}; + struct ParseAPFullLegalizer { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; @@ -1822,6 +1857,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& ap_grp = parser.add_argument_group("analytical placement options"); + ap_grp.add_argument(args.ap_global_placer, "--ap_global_placer") + .help( + "Controls which Global Placer to use in the AP Flow.\n" + " * quadratic-bipartitioning-lookahead: Use a Global Placer which uses a quadratic solver and a bi-partitioning lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution.\n" + " * quadratic-flowbased-lookahead: Use a Global Placer which uses a quadratic solver and a multi-commodity-flow-based lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution.") + .default_value("quadratic-bipartitioning-lookahead") + .show_in(argparse::ShowIn::HELP_ONLY); + ap_grp.add_argument(args.ap_full_legalizer, "--ap_full_legalizer") .help( "Controls which Full Legalizer to use in the AP Flow.\n" @@ -1839,6 +1882,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("annealer") .show_in(argparse::ShowIn::HELP_ONLY); + ap_grp.add_argument(args.ap_verbosity, "--ap_verbosity") + .help( + "Controls how verbose the AP flow's log messages will be. Higher " + "values produce more output (useful for debugging the AP " + "algorithms).") + .default_value("1") + .show_in(argparse::ShowIn::HELP_ONLY); + auto& pack_grp = parser.add_argument_group("packing options"); pack_grp.add_argument(args.connection_driven_clustering, "--connection_driven_clustering") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index fa4f51d345c..8a2befd8884 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -97,8 +97,10 @@ struct t_options { argparse::ArgValue netlist_verbosity; /* Analytical Placement options */ + argparse::ArgValue ap_global_placer; argparse::ArgValue ap_full_legalizer; argparse::ArgValue ap_detailed_placer; + argparse::ArgValue ap_verbosity; /* Clustering options */ argparse::ArgValue connection_driven_clustering; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 038f4d38c47..8182a86d6a3 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1049,17 +1049,26 @@ struct t_placer_opts { * @param doAnalyticalPlacement * True if analytical placement is supposed to be done in the CAD * flow. False if otherwise. + * @param global_placer_type + * The type of global placer the AP flow will use. * @param full_legalizer_type * The type of full legalizer the AP flow will use. * @param detailed_placer_type * The type of detailed placter the AP flow will use. + * @param log_verbosity + * The verbosity level of log messages in the AP flow, with higher + * values leading to more verbose messages. */ struct t_ap_opts { e_stage_action doAP; + e_ap_global_placer global_placer_type; + e_ap_full_legalizer full_legalizer_type; e_ap_detailed_placer detailed_placer_type; + + int log_verbosity; }; /****************************************************************** diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt index 99d9cf853e2..458c87244df 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - fixed_k6_frac_N8_22nm.xml single_wire.v common 2.25 vpr 75.57 MiB -1 -1 0.11 20616 1 0.02 -1 -1 33172 -1 -1 0 1 0 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 77384 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 2 3 0 0 3 75.6 MiB 0.82 0.00 0.2714 -0.2714 -0.2714 nan 0.60 1.0195e-05 5.861e-06 7.0627e-05 4.5591e-05 75.6 MiB 0.82 75.6 MiB 0.78 8 16 1 6.79088e+06 0 166176. 575.005 0.22 0.0015764 0.00149137 20206 45088 -1 18 1 1 1 141 56 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.02 0.00 0.06 -1 -1 0.02 0.00154357 0.00147507 - fixed_k6_frac_N8_22nm.xml single_ff.v common 2.07 vpr 75.57 MiB -1 -1 0.11 21004 1 0.02 -1 -1 33328 -1 -1 1 2 0 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 77388 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 22 9 3 1 5 75.6 MiB 0.70 0.00 0.74674 -1.4524 -0.74674 0.74674 0.55 1.7604e-05 1.0829e-05 0.000109132 7.4428e-05 75.6 MiB 0.70 75.6 MiB 0.69 20 27 1 6.79088e+06 13472 414966. 1435.87 0.36 0.00134255 0.00124027 22510 95286 -1 26 1 2 2 102 24 0.691615 0.691615 -1.31306 -0.691615 0 0 503264. 1741.40 0.04 0.00 0.12 -1 -1 0.04 0.00165403 0.00156635 - fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 2.78 vpr 76.11 MiB -1 -1 0.25 22288 3 0.07 -1 -1 36924 -1 -1 32 99 1 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 77936 99 130 240 229 1 229 262 17 17 289 -1 unnamed_device -1 -1 883 19536 1068 3887 14581 76.1 MiB 0.67 0.00 1.86512 -124.45 -1.86512 1.86512 0.39 0.000560506 0.000504742 0.0161729 0.0146716 76.1 MiB 0.67 76.1 MiB 0.66 32 1890 11 6.79088e+06 979104 586450. 2029.24 0.47 0.0893208 0.0810973 24814 144142 -1 1712 13 543 802 57386 17520 1.9213 1.9213 -143.517 -1.9213 -0.04337 -0.04337 744469. 2576.02 0.06 0.05 0.19 -1 -1 0.06 0.053443 0.048647 - fixed_k6_frac_N8_22nm.xml diffeq1.v common 12.29 vpr 77.89 MiB -1 -1 0.61 27152 15 0.49 -1 -1 38004 -1 -1 47 162 0 5 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 79760 162 96 817 258 1 740 310 17 17 289 -1 unnamed_device -1 -1 7006 24414 236 6771 17407 77.9 MiB 1.84 0.01 21.8698 -1649.28 -21.8698 21.8698 0.44 0.00183251 0.0016749 0.0693904 0.0634239 77.9 MiB 1.84 77.9 MiB 1.10 60 14847 46 6.79088e+06 2.61318e+06 1.01997e+06 3529.29 6.71 1.05045 0.971299 29998 257685 -1 12402 16 3793 9643 1173029 292327 21.3427 21.3427 -1635.12 -21.3427 0 0 1.27783e+06 4421.56 0.06 0.28 0.21 -1 -1 0.06 0.146101 0.135935 + fixed_k6_frac_N8_22nm.xml single_wire.v common 1.44 vpr 75.50 MiB -1 -1 0.06 20472 1 0.01 -1 -1 33044 -1 -1 0 1 0 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77316 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 2 3 0 0 3 75.5 MiB 0.54 0.00 0.2714 -0.2714 -0.2714 nan 0.42 7.631e-06 4.788e-06 5.8704e-05 4.0805e-05 75.5 MiB 0.54 75.5 MiB 0.54 8 18 1 6.79088e+06 0 166176. 575.005 0.14 0.000842308 0.000774609 20206 45088 -1 18 1 1 1 114 40 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.01 0.00 0.04 -1 -1 0.01 0.00078696 0.000736601 + fixed_k6_frac_N8_22nm.xml single_ff.v common 1.60 vpr 75.69 MiB -1 -1 0.07 20716 1 0.02 -1 -1 33068 -1 -1 1 2 0 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77508 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 22 9 3 4 2 75.7 MiB 0.52 0.00 0.74674 -1.4524 -0.74674 0.74674 0.40 1.3477e-05 8.826e-06 8.6874e-05 6.3274e-05 75.7 MiB 0.52 75.7 MiB 0.52 20 27 1 6.79088e+06 13472 414966. 1435.87 0.24 0.000930532 0.000852446 22510 95286 -1 30 1 2 2 163 35 0.74674 0.74674 -1.43836 -0.74674 0 0 503264. 1741.40 0.03 0.00 0.08 -1 -1 0.03 0.00102904 0.00095538 + fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 3.50 vpr 76.36 MiB -1 -1 0.24 22124 3 0.07 -1 -1 36540 -1 -1 61 99 1 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78188 99 130 240 229 1 226 291 17 17 289 -1 unnamed_device -1 -1 838 18607 1985 1801 14821 76.4 MiB 0.69 0.00 1.6707 -127.089 -1.6707 1.6707 0.40 0.000610649 0.000537736 0.0140862 0.0125271 76.4 MiB 0.69 76.4 MiB 0.67 34 1822 16 6.79088e+06 1.36979e+06 618332. 2139.56 1.43 0.168901 0.149844 25102 150614 -1 1682 13 569 976 59358 18627 2.0466 2.0466 -143.074 -2.0466 -0.04337 -0.04337 787024. 2723.27 0.04 0.03 0.13 -1 -1 0.04 0.0351297 0.0316135 + fixed_k6_frac_N8_22nm.xml diffeq1.v common 8.53 vpr 79.32 MiB -1 -1 0.37 26988 15 0.31 -1 -1 37340 -1 -1 47 162 0 5 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 81220 162 96 817 258 1 719 310 17 17 289 -1 unnamed_device -1 -1 6797 28606 496 8327 19783 79.3 MiB 1.10 0.01 21.005 -1600.44 -21.005 21.005 0.39 0.0020065 0.00179053 0.066982 0.0598287 79.3 MiB 1.10 79.3 MiB 1.03 58 14305 35 6.79088e+06 2.61318e+06 997811. 3452.63 4.70 0.808118 0.727985 29710 251250 -1 12039 17 3605 8667 1087317 276398 20.4866 20.4866 -1547.03 -20.4866 0 0 1.25153e+06 4330.55 0.06 0.27 0.24 -1 -1 0.06 0.158734 0.14406 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/config.txt new file mode 100644 index 00000000000..d5c911fe715 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/config.txt @@ -0,0 +1,54 @@ +############################################################################### +# Configuration file for running the MCNC benchmarks through the AP flow. +# +# The AP flow requires that each circuit contains fixed blocks and is fixed +# to a specific device size. The device sizes here were chosen to match the +# device sizes of the default VTR flow. +############################################################################### + +# Path to directory of circuits to use +circuits_dir=benchmarks/blif/wiremap6 + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_40nm.xml + +# Add circuits to list to sweep +circuit_list_add=apex4.pre-vpr.blif +circuit_list_add=des.pre-vpr.blif +circuit_list_add=ex1010.pre-vpr.blif +circuit_list_add=seq.pre-vpr.blif + +# Constrain the circuits to their devices +circuit_constraint_list_add=(apex4.pre-vpr.blif, device=mcnc_medium) +circuit_constraint_list_add=(seq.pre-vpr.blif, device=mcnc_medium) +circuit_constraint_list_add=(des.pre-vpr.blif, device=mcnc_large) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, device=mcnc_large) + +# Constrain the IOs +circuit_constraint_list_add=(apex4.pre-vpr.blif, constraints=../../../../../mcnc/constraints/apex4_io_constraint.xml) +circuit_constraint_list_add=(seq.pre-vpr.blif, constraints=../../../../../mcnc/constraints/seq_io_constraint.xml) +circuit_constraint_list_add=(des.pre-vpr.blif, constraints=../../../../../mcnc/constraints/des_io_constraint.xml) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, constraints=../../../../../mcnc/constraints/ex1010_io_constraint.xml) + +# Constrain the circuits to their channel widths +# 1.3 * minW +circuit_constraint_list_add=(apex4.pre-vpr.blif, route_chan_width=78) +circuit_constraint_list_add=(seq.pre-vpr.blif, route_chan_width=78) +circuit_constraint_list_add=(des.pre-vpr.blif, route_chan_width=44) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, route_chan_width=114) + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt + +# How to parse QoR info +qor_parse_file=qor_ap_fixed_chan_width.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt + +# Pass the script params while writing the vpr constraints. +script_params=-starting_stage vpr -track_memory_usage --analytical_place --ap_global_placer quadratic-bipartitioning-lookahead --route + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt new file mode 100644 index 00000000000..54b30cafac6 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer/config/golden_results.txt @@ -0,0 +1,5 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 6.15 vpr 74.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 86 9 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 76312 9 19 897 28 0 768 114 16 16 256 -1 mcnc_medium -1 -1 7446 10050 1525 5847 2678 74.5 MiB 1.98 0.01 5.22187 -85.9445 -5.22187 nan 0.05 0.00204197 0.00165471 0.0922108 0.0778153 74.5 MiB 1.98 74.5 MiB 1.87 12280 16.0104 3195 4.16558 8207 35340 1711962 391448 1.05632e+07 4.63488e+06 1.26944e+06 4958.75 35 28900 206586 -1 5.82297 nan -93.0212 -5.82297 0 0 0.19 -1 -1 74.5 MiB 0.60 0.268738 0.231571 74.5 MiB -1 0.05 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 2.87 vpr 75.93 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 53 256 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77752 256 245 954 501 0 711 554 22 22 484 -1 mcnc_large -1 -1 8904 66500 1807 14947 49746 75.9 MiB 0.88 0.01 4.19633 -806.67 -4.19633 nan 0.07 0.00209601 0.00184942 0.0749397 0.0672821 75.9 MiB 0.88 75.9 MiB 0.87 12620 17.7496 3382 4.75668 3608 8619 480767 96513 2.15576e+07 2.85638e+06 1.49107e+06 3080.73 15 47664 245996 -1 4.54897 nan -867.702 -4.54897 0 0 0.22 -1 -1 75.9 MiB 0.19 0.170591 0.156391 75.9 MiB -1 0.07 + k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 19.24 vpr 103.73 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 289 10 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 106224 10 10 2659 20 0 2320 309 22 22 484 -1 mcnc_large -1 -1 33337 60861 15622 40285 4954 103.7 MiB 7.37 0.03 7.08906 -67.526 -7.08906 nan 0.15 0.00509718 0.00406142 0.35604 0.28949 103.7 MiB 7.37 103.7 MiB 7.07 48698 20.9905 12433 5.35905 17466 71913 3700066 508136 2.15576e+07 1.55754e+07 3.51389e+06 7260.09 20 64568 594370 -1 7.09981 nan -68.5294 -7.09981 0 0 0.63 -1 -1 103.7 MiB 1.13 0.742979 0.632564 103.7 MiB -1 0.15 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 5.15 vpr 75.82 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 85 41 -1 -1 success v8.0.0-12210-g8eeb433b5-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-01T22:46:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 77640 41 35 1006 76 0 827 161 16 16 256 -1 mcnc_medium -1 -1 8073 13708 1574 6075 6059 75.8 MiB 1.93 0.01 5.2078 -150.175 -5.2078 nan 0.05 0.00241319 0.00198256 0.0910059 0.0770604 75.8 MiB 1.93 75.8 MiB 1.81 13112 15.8549 3429 4.14631 6281 26105 949531 164260 1.05632e+07 4.58099e+06 1.26944e+06 4958.75 19 28900 206586 -1 5.48717 nan -159.221 -5.48717 0 0 0.19 -1 -1 75.8 MiB 0.33 0.222488 0.193946 75.8 MiB -1 0.05 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/config.txt new file mode 100644 index 00000000000..fbe0c8d91e1 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/config.txt @@ -0,0 +1,54 @@ +############################################################################### +# Configuration file for running the MCNC benchmarks through the AP flow. +# +# The AP flow requires that each circuit contains fixed blocks and is fixed +# to a specific device size. The device sizes here were chosen to match the +# device sizes of the default VTR flow. +############################################################################### + +# Path to directory of circuits to use +circuits_dir=benchmarks/blif/wiremap6 + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_40nm.xml + +# Add circuits to list to sweep +circuit_list_add=apex4.pre-vpr.blif +circuit_list_add=des.pre-vpr.blif +circuit_list_add=ex1010.pre-vpr.blif +circuit_list_add=seq.pre-vpr.blif + +# Constrain the circuits to their devices +circuit_constraint_list_add=(apex4.pre-vpr.blif, device=mcnc_medium) +circuit_constraint_list_add=(seq.pre-vpr.blif, device=mcnc_medium) +circuit_constraint_list_add=(des.pre-vpr.blif, device=mcnc_large) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, device=mcnc_large) + +# Constrain the IOs +circuit_constraint_list_add=(apex4.pre-vpr.blif, constraints=../../../../../mcnc/constraints/apex4_io_constraint.xml) +circuit_constraint_list_add=(seq.pre-vpr.blif, constraints=../../../../../mcnc/constraints/seq_io_constraint.xml) +circuit_constraint_list_add=(des.pre-vpr.blif, constraints=../../../../../mcnc/constraints/des_io_constraint.xml) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, constraints=../../../../../mcnc/constraints/ex1010_io_constraint.xml) + +# Constrain the circuits to their channel widths +# 1.3 * minW +circuit_constraint_list_add=(apex4.pre-vpr.blif, route_chan_width=78) +circuit_constraint_list_add=(seq.pre-vpr.blif, route_chan_width=78) +circuit_constraint_list_add=(des.pre-vpr.blif, route_chan_width=44) +circuit_constraint_list_add=(ex1010.pre-vpr.blif, route_chan_width=114) + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt + +# How to parse QoR info +qor_parse_file=qor_ap_fixed_chan_width.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap_fixed_chan_width.txt + +# Pass the script params while writing the vpr constraints. +script_params=-starting_stage vpr -track_memory_usage --analytical_place --ap_global_placer quadratic-flowbased-lookahead --route + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/golden_results.txt new file mode 100644 index 00000000000..f9888561aa3 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer/config/golden_results.txt @@ -0,0 +1,5 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + k6_frac_N10_40nm.xml apex4.pre-vpr.blif common 18.75 vpr 75.08 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 9 -1 -1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 76880 9 19 897 28 0 769 115 16 16 256 -1 mcnc_medium -1 -1 7781 7927 1041 4888 1998 75.1 MiB 8.49 0.00 5.26771 -87.3979 -5.26771 nan 0.07 0.00145318 0.00120431 0.0949999 0.0830081 75.1 MiB 8.49 75.1 MiB 2.39 13434 17.4922 3475 4.52474 7093 28842 1188919 203711 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 23 28900 206586 -1 5.71261 nan -95.5795 -5.71261 0 0 0.39 -1 -1 75.1 MiB 0.37 0.230347 0.204562 75.1 MiB -1 0.07 + k6_frac_N10_40nm.xml des.pre-vpr.blif common 4.20 vpr 76.21 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 54 256 -1 -1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 78040 256 245 954 501 0 681 555 22 22 484 -1 mcnc_large -1 -1 8645 46155 520 10044 35591 76.2 MiB 1.26 0.02 4.22842 -791.908 -4.22842 nan 0.13 0.00324274 0.00298957 0.0892817 0.0813978 76.2 MiB 1.26 76.2 MiB 1.23 12137 17.8223 3273 4.80617 3335 8120 476273 103444 2.15576e+07 2.91028e+06 1.49107e+06 3080.73 17 47664 245996 -1 4.53302 nan -861.08 -4.53302 0 0 0.36 -1 -1 76.2 MiB 0.33 0.245742 0.227797 76.2 MiB -1 0.13 + k6_frac_N10_40nm.xml ex1010.pre-vpr.blif common 79.07 vpr 104.32 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 291 10 -1 -1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 106820 10 10 2659 20 0 2325 311 22 22 484 -1 mcnc_large -1 -1 34854 66650 18085 43254 5311 104.3 MiB 40.00 0.04 6.83404 -66.0421 -6.83404 nan 0.24 0.0118021 0.00931383 0.817421 0.662705 104.3 MiB 40.00 104.3 MiB 9.54 53952 23.2052 13843 5.95398 18928 79526 5166116 753331 2.15576e+07 1.56832e+07 3.51389e+06 7260.09 23 64568 594370 -1 7.23867 nan -69.2309 -7.23867 0 0 1.06 -1 -1 104.3 MiB 1.94 1.44039 1.21281 104.3 MiB -1 0.24 + k6_frac_N10_40nm.xml seq.pre-vpr.blif common 21.04 vpr 76.02 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 87 41 -1 -1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 77844 41 35 1006 76 0 831 163 16 16 256 -1 mcnc_medium -1 -1 8267 12178 1040 5903 5235 76.0 MiB 9.43 0.01 5.26834 -151.935 -5.26834 nan 0.07 0.0028483 0.00231985 0.108866 0.0944881 76.0 MiB 9.43 76.0 MiB 2.55 13808 16.6161 3630 4.36823 7616 32394 1282541 221555 1.05632e+07 4.68878e+06 1.26944e+06 4958.75 25 28900 206586 -1 5.51169 nan -162.882 -5.51169 0 0 0.33 -1 -1 76.0 MiB 0.67 0.361937 0.321485 76.0 MiB -1 0.07 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/vtr_chain/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/vtr_chain/config/golden_results.txt index c0bd692e6ed..9c992863e74 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/vtr_chain/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/vtr_chain/config/golden_results.txt @@ -1,6 +1,6 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time - k6_frac_N10_frac_chain_mem32K_40nm.xml boundtop.v common 15.09 vpr 82.79 MiB -1 -1 9.51 48304 3 0.60 -1 -1 41228 -1 -1 46 196 1 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 84780 196 193 800 389 1 665 436 20 20 400 -1 vtr_extra_small -1 -1 4964 40084 671 10309 29104 82.8 MiB 1.85 0.01 3.49337 -1278.16 -3.49337 3.49337 0.14 0.00306243 0.00279899 0.128489 0.118473 82.8 MiB 1.85 82.8 MiB 1.82 7124 10.8598 1964 2.99390 1886 2963 178423 46654 2.07112e+07 3.02712e+06 1.26946e+06 3173.65 13 38988 203232 -1 3.79763 3.79763 -1389.8 -3.79763 0 0 0.19 -1 -1 82.8 MiB 0.23 0.296095 0.276291 82.8 MiB -1 0.14 - k6_frac_N10_frac_chain_mem32K_40nm.xml ch_intrinsics.v common 3.54 vpr 76.63 MiB -1 -1 0.42 21976 3 0.14 -1 -1 36796 -1 -1 33 99 1 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 78468 99 130 240 229 1 240 263 20 20 400 -1 vtr_extra_small -1 -1 984 15419 841 1822 12756 76.6 MiB 0.91 0.00 2.29621 -138.88 -2.29621 2.29621 0.12 0.000937633 0.000847352 0.0248974 0.0229777 76.6 MiB 0.91 76.6 MiB 0.79 1620 8.95028 464 2.56354 420 610 30907 8504 2.07112e+07 2.3265e+06 1.31074e+06 3276.84 10 39388 210115 -1 2.24226 2.24226 -158.984 -2.24226 0 0 0.40 -1 -1 76.6 MiB 0.06 0.0666377 0.0622772 76.6 MiB -1 0.12 - k6_frac_N10_frac_chain_mem32K_40nm.xml or1200.v common 78.75 vpr 130.79 MiB -1 -1 5.27 64812 8 2.94 -1 -1 45732 -1 -1 237 385 2 1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 133928 385 362 3324 747 1 2893 987 30 30 900 -1 vtr_small -1 -1 37756 158171 7353 52146 98672 130.8 MiB 37.93 0.05 9.23417 -10895.6 -9.23417 9.23417 0.43 0.010524 0.00938066 0.652661 0.57584 130.8 MiB 37.93 130.8 MiB 16.21 51523 17.9024 13155 4.57088 13893 42699 2399166 421650 4.8774e+07 1.42649e+07 6.56785e+06 7297.61 22 120772 1084977 -1 9.43895 9.43895 -11503.6 -9.43895 0 0 1.09 -1 -1 130.8 MiB 1.41 1.44341 1.30338 130.8 MiB -1 0.43 - k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 30.13 vpr 85.89 MiB -1 -1 3.42 35668 16 0.66 -1 -1 38188 -1 -1 61 45 3 1 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 87948 45 32 936 77 1 877 142 20 20 400 -1 vtr_extra_small -1 -1 8150 10132 342 5474 4316 85.9 MiB 13.87 0.01 11.604 -7183.31 -11.604 11.604 0.12 0.00408429 0.00353135 0.1504 0.130171 85.9 MiB 13.87 85.9 MiB 4.85 14198 16.2449 3595 4.11327 4591 12199 998795 227151 2.07112e+07 5.32753e+06 1.91495e+06 4787.38 18 44576 305072 -1 12.3029 12.3029 -7889.9 -12.3029 0 0 0.50 -1 -1 85.9 MiB 0.52 0.392268 0.351555 85.9 MiB -1 0.12 - k6_frac_N10_frac_chain_mem32K_40nm.xml stereovision3.v common 3.81 vpr 76.37 MiB -1 -1 0.78 26668 4 0.17 -1 -1 36060 -1 -1 14 11 0 0 success v8.0.0-12163-g0dba7016b-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-19T17:54:19 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 78204 11 2 140 13 2 110 27 20 20 400 -1 vtr_extra_small -1 -1 463 947 88 465 394 76.4 MiB 0.81 0.00 2.10685 -180.654 -2.10685 2.09683 0.09 0.000595701 0.000502088 0.0227461 0.019995 76.4 MiB 0.81 76.4 MiB 0.56 644 6.19231 188 1.80769 263 414 8183 2301 2.07112e+07 754516 1.12964e+06 2824.09 11 37792 180905 -1 2.22067 2.12928 -180.288 -2.22067 0 0 0.25 -1 -1 76.4 MiB 0.05 0.0628381 0.0574908 76.4 MiB -1 0.09 + k6_frac_N10_frac_chain_mem32K_40nm.xml boundtop.v common 13.72 vpr 82.71 MiB -1 -1 9.40 48008 3 0.64 -1 -1 38464 -1 -1 49 196 1 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 84696 196 193 800 389 1 626 439 20 20 400 -1 vtr_extra_small -1 -1 5038 43781 994 11657 31130 82.7 MiB 1.20 0.01 2.64203 -1161.38 -2.64203 2.64203 0.06 0.00194633 0.00168528 0.0656636 0.0576391 82.7 MiB 1.20 82.7 MiB 1.19 7055 11.4344 1950 3.16045 1755 2899 209816 51700 2.07112e+07 3.18881e+06 1.26946e+06 3173.65 12 38988 203232 -1 2.88892 2.88892 -1279.55 -2.88892 0 0 0.18 -1 -1 82.7 MiB 0.12 0.151559 0.137113 82.7 MiB -1 0.06 + k6_frac_N10_frac_chain_mem32K_40nm.xml ch_intrinsics.v common 1.92 vpr 76.88 MiB -1 -1 0.24 22164 3 0.07 -1 -1 36924 -1 -1 50 99 1 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78728 99 130 240 229 1 225 280 20 20 400 -1 vtr_extra_small -1 -1 969 25900 2089 2818 20993 76.9 MiB 0.43 0.00 2.12472 -144.562 -2.12472 2.12472 0.06 0.000592342 0.000524403 0.0208203 0.0186466 76.9 MiB 0.43 76.9 MiB 0.42 1536 9.25301 447 2.69277 327 496 23592 6723 2.07112e+07 3.2427e+06 1.31074e+06 3276.84 9 39388 210115 -1 2.28989 2.28989 -164.008 -2.28989 0 0 0.19 -1 -1 76.9 MiB 0.03 0.0413714 0.0377043 76.9 MiB -1 0.06 + k6_frac_N10_frac_chain_mem32K_40nm.xml or1200.v common 38.14 vpr 130.88 MiB -1 -1 3.66 65032 8 3.02 -1 -1 44616 -1 -1 244 385 2 1 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 134016 385 362 3324 747 1 2821 994 30 30 900 -1 vtr_small -1 -1 35299 179482 11454 58782 109246 130.9 MiB 12.55 0.07 8.95676 -9967.32 -8.95676 8.95676 0.27 0.00948948 0.00834711 0.432455 0.38169 130.9 MiB 12.55 130.9 MiB 12.14 47492 16.9252 12129 4.32252 12487 37871 2052105 372049 4.8774e+07 1.46421e+07 6.56785e+06 7297.61 18 120772 1084977 -1 9.20319 9.20319 -10439.6 -9.20319 0 0 1.26 -1 -1 130.9 MiB 0.95 0.988978 0.894836 130.9 MiB -1 0.26 + k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 10.70 vpr 86.04 MiB -1 -1 2.13 35476 16 0.41 -1 -1 38688 -1 -1 61 45 3 1 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 88104 45 32 936 77 1 832 142 20 20 400 -1 vtr_extra_small -1 -1 7549 10872 603 5576 4693 86.0 MiB 3.14 0.01 10.8385 -7022.95 -10.8385 10.8385 0.08 0.00253207 0.00206492 0.0927064 0.0774267 86.0 MiB 3.14 86.0 MiB 3.00 12644 15.2521 3237 3.90470 3989 10661 964169 235619 2.07112e+07 5.32753e+06 1.91495e+06 4787.38 16 44576 305072 -1 11.5449 11.5449 -7513.22 -11.5449 0 0 0.31 -1 -1 86.0 MiB 0.31 0.231829 0.203147 86.0 MiB -1 0.08 + k6_frac_N10_frac_chain_mem32K_40nm.xml stereovision3.v common 2.19 vpr 76.48 MiB -1 -1 0.46 26504 4 0.11 -1 -1 36344 -1 -1 13 11 0 0 success v8.0.0-12213-gab28a6e25-dirty release VTR_ASSERT_LEVEL=3 GNU 13.2.0 on Linux-6.8.0-49-generic x86_64 2025-03-02T13:12:27 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing 78320 11 2 140 13 2 81 26 20 20 400 -1 vtr_extra_small -1 -1 353 1014 97 516 401 76.5 MiB 0.39 0.00 2.10685 -173.976 -2.10685 1.95087 0.05 0.000439282 0.000361743 0.015698 0.0134088 76.5 MiB 0.39 76.5 MiB 0.36 498 6.64000 128 1.70667 163 286 6168 1566 2.07112e+07 700622 1.12964e+06 2824.09 13 37792 180905 -1 2.19809 1.96041 -173.241 -2.19809 0 0 0.16 -1 -1 76.5 MiB 0.02 0.0365314 0.0321375 76.5 MiB -1 0.05 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt index f1bc95cf17b..57ca61bb798 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt @@ -1,6 +1,8 @@ regression_tests/vtr_reg_strong/basic_ap regression_tests/vtr_reg_strong/strong_ap/mcnc regression_tests/vtr_reg_strong/strong_ap/vtr_chain +regression_tests/vtr_reg_strong/strong_ap/bipartitioning_global_placer +regression_tests/vtr_reg_strong/strong_ap/flowbased_global_placer regression_tests/vtr_reg_strong/strong_ap/naive_full_legalizer regression_tests/vtr_reg_strong/strong_ap/appack_full_legalizer regression_tests/vtr_reg_strong/strong_ap/annealer_detailed_placer