diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp index 7df79bf16e6..139c05e61b5 100644 --- a/vpr/src/analytical_place/analytical_placement_flow.cpp +++ b/vpr/src/analytical_place/analytical_placement_flow.cpp @@ -8,19 +8,21 @@ #include "analytical_placement_flow.h" #include "ap_netlist.h" #include "atom_netlist.h" +#include "full_legalizer.h" #include "gen_ap_netlist_from_atoms.h" #include "globals.h" +#include "partial_placement.h" #include "prepack.h" #include "user_place_constraints.h" #include "vpr_context.h" -#include "vpr_error.h" #include "vpr_types.h" +#include "vtr_assert.h" #include "vtr_time.h" void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { (void)vpr_setup; // Start an overall timer for the Analytical Placement flow. - vtr::ScopedStartFinishTimer timer("Analytical Placement Flow"); + vtr::ScopedStartFinishTimer timer("Analytical Placement"); // The global state used/modified by this flow. const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist; @@ -37,9 +39,38 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { prepacker, constraints); - // AP is currently under-construction. Fail gracefully just in case this - // is somehow being called. - VPR_FATAL_ERROR(VPR_ERROR_AP, - "Analytical Placement flow not implemented yet"); + // Run the Global Placer + // For now, just put all the moveable blocks at the center of the device + // grid. This will be replaced later. This is just for testing. + PartialPlacement p_placement(ap_netlist); + const size_t device_width = device_ctx.grid.width(); + const size_t device_height = device_ctx.grid.height(); + double device_center_x = static_cast(device_width) / 2.0; + double device_center_y = static_cast(device_height) / 2.0; + for (APBlockId ap_blk_id : ap_netlist.blocks()) { + if (ap_netlist.block_mobility(ap_blk_id) != APBlockMobility::MOVEABLE) + continue; + // If the APBlock is moveable, put it on the center for the device. + p_placement.block_x_locs[ap_blk_id] = device_center_x; + p_placement.block_y_locs[ap_blk_id] = device_center_y; + } + VTR_ASSERT(p_placement.verify(ap_netlist, + device_width, + device_height, + device_ctx.grid.get_num_layers())); + + // Run the Full Legalizer. + FullLegalizer full_legalizer(ap_netlist, + vpr_setup, + device_ctx.grid, + device_ctx.arch, + atom_nlist, + prepacker, + device_ctx.logical_block_types, + vpr_setup.PackerRRGraph, + device_ctx.arch->models, + device_ctx.arch->model_library, + vpr_setup.PackerOpts); + full_legalizer.legalize(p_placement); } diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp new file mode 100644 index 00000000000..73fd7af3152 --- /dev/null +++ b/vpr/src/analytical_place/full_legalizer.cpp @@ -0,0 +1,397 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief Implements the full legalizer in the AP flow. + */ + +#include "full_legalizer.h" +#include +#include +#include +#include +#include "partial_placement.h" +#include "ShowSetup.h" +#include "ap_netlist_fwd.h" +#include "check_netlist.h" +#include "cluster.h" +#include "cluster_legalizer.h" +#include "cluster_util.h" +#include "clustered_netlist.h" +#include "globals.h" +#include "initial_placement.h" +#include "logic_types.h" +#include "pack.h" +#include "physical_types.h" +#include "place_constraints.h" +#include "vpr_api.h" +#include "vpr_context.h" +#include "vpr_error.h" +#include "vpr_types.h" +#include "vtr_assert.h" +#include "vtr_ndmatrix.h" +#include "vtr_strong_id.h" +#include "vtr_time.h" +#include "vtr_vector.h" + +namespace { + +/// @brief A unique ID for each root tile on the device. +/// +/// This is used for putting the molecules in bins for packing. +// FIXME: Bring this into the device_grid. +// - Maybe this can be called DeviceRootTileId or something. +struct device_tile_id_tag {}; +typedef vtr::StrongId DeviceTileId; + +/** + * @brief Helper class to place cluster in the AP context. + * + * A lot of this code was lifted from the Initial Placer within the placement + * flow. + * TODO: Should try to do the same thing we did with the ClusterLegalizer to + * unify the two flows and make it more stable! + */ +class APClusterPlacer { +private: + // Get the macro for the given cluster block. + t_pl_macro get_macro(ClusterBlockId clb_blk_id) { + // Basically stolen from initial_placement.cpp:place_one_block + // TODO: Make this a cleaner interface and share the code. + int imacro; + get_imacro_from_iblk(&imacro, clb_blk_id, g_vpr_ctx.placement().pl_macros); + // If this block is part of a macro, return it. + if (imacro != -1) + return g_vpr_ctx.placement().pl_macros[imacro]; + // If not, create a "fake" macro with a single element. + t_pl_macro_member macro_member; + t_pl_offset block_offset(0, 0, 0, 0); + macro_member.blk_index = clb_blk_id; + macro_member.offset = block_offset; + + t_pl_macro pl_macro; + pl_macro.members.push_back(macro_member); + return pl_macro; + } + +public: + /** + * @brief Constructor for the APClusterPlacer + * + * Initializes internal and global state necessary to place clusters on the + * FPGA device. + */ + APClusterPlacer() { + // FIXME: This was stolen from place/place.cpp + // it used a static method, just taking what I think I will need. + + auto& block_locs = g_vpr_ctx.mutable_placement().mutable_block_locs(); + auto& grid_blocks = g_vpr_ctx.mutable_placement().mutable_grid_blocks(); + auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry(); + init_placement_context(block_locs, grid_blocks); + + // stolen from place/place.cpp:alloc_and_load_try_swap_structs + // FIXME: set cube_bb to false by hand, should be passed in. + g_vpr_ctx.mutable_placement().cube_bb = false; + g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids(); + + // Initialize the macros + const t_arch* arch = g_vpr_ctx.device().arch; + g_vpr_ctx.mutable_placement().pl_macros = alloc_and_load_placement_macros(arch->Directs, arch->num_directs); + + // TODO: The next few steps will be basically a direct copy of the initial + // placement code since it does everything we need! It would be nice + // to share the code. + + // Clear the grid locations (stolen from initial_placement) + clear_all_grid_locs(blk_loc_registry); + + // Deal with the placement constraints. + propagate_place_constraints(); + + mark_fixed_blocks(blk_loc_registry); + + alloc_and_load_compressed_cluster_constraints(); + } + + /** + * @brief Given a cluster and tile it wants to go into, try to place the + * cluster at this tile's postion. + */ + bool place_cluster(ClusterBlockId clb_blk_id, + const t_physical_tile_loc& tile_loc, + int sub_tile) { + const DeviceContext& device_ctx = g_vpr_ctx.device(); + // FIXME: THIS MUST TAKE INTO ACCOUNT THE CONSTRAINTS AS WELL!!! + // - Right now it is just implied. + // - Will work but is unstable. + const auto& block_locs = g_vpr_ctx.placement().block_locs(); + auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry(); + VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?"); + t_pl_macro pl_macro = get_macro(clb_blk_id); + t_pl_loc to_loc; + to_loc.x = tile_loc.x; + to_loc.y = tile_loc.y; + to_loc.layer = tile_loc.layer_num; + // Special case where the tile has no sub-tiles. It just cannot be placed. + if (device_ctx.grid.get_physical_type(tile_loc)->sub_tiles.size() == 0) + return false; + VTR_ASSERT(sub_tile >= 0 && sub_tile < device_ctx.grid.get_physical_type(tile_loc)->capacity); + // FIXME: Do this better. + // - May need to try all the sub-tiles in a location. + // - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755 + to_loc.sub_tile = sub_tile; + return try_place_macro(pl_macro, to_loc, blk_loc_registry); + } + + // This is not the best way of doing things, but its the simplest. Given a + // cluster, just find somewhere for it to go. + // TODO: Make this like the initial placement code where we first try + // centroid, then random, then exhaustive. + bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) { + const auto& block_locs = g_vpr_ctx.placement().block_locs(); + auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry(); + VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?"); + t_pl_macro pl_macro = get_macro(clb_blk_id); + const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region(); + t_logical_block_type_ptr block_type = g_vpr_ctx.clustering().clb_nlist.block_type(clb_blk_id); + // FIXME: We really should get this from the place context, not the device context. + // - Stealing it for now to get this to work. + enum e_pad_loc_type pad_loc_type = g_vpr_ctx.device().pad_loc_type; + return try_place_macro_exhaustively(pl_macro, pr, block_type, pad_loc_type, blk_loc_registry); + } +}; + +} // namespace + +/** + * @brief Create a new cluster for the given seed molecule using the cluster + * legalizer. + * + * @param seed_molecule The molecule to use as a starting + * point for the cluster. + * @param cluster_legalizer A cluster legalizer object to build + * the cluster. + * @param primitive_candidate_block_types A list of candidate block types for + * the given molecule. + */ +static LegalizationClusterId create_new_cluster(t_pack_molecule* seed_molecule, + ClusterLegalizer& cluster_legalizer, + const std::map>& primitive_candidate_block_types) { + const AtomContext& atom_ctx = g_vpr_ctx.atom(); + // This was stolen from pack/cluster_util.cpp:start_new_cluster + // It tries to find a block type and mode for the given molecule. + // TODO: This should take into account the tile this molecule wants to be + // placed into. + // TODO: The original implementation sorted based on balance. Perhaps this + // should do the same. + AtomBlockId root_atom = seed_molecule->atom_block_ids[seed_molecule->root]; + const t_model* root_model = atom_ctx.nlist.block_model(root_atom); + + auto itr = primitive_candidate_block_types.find(root_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + const std::vector& candidate_types = itr->second; + + for (t_logical_block_type_ptr type : candidate_types) { + int num_modes = type->pb_graph_head->pb_type->num_modes; + for (int mode = 0; mode < num_modes; mode++) { + e_block_pack_status pack_status = e_block_pack_status::BLK_STATUS_UNDEFINED; + LegalizationClusterId new_cluster_id; + std::tie(pack_status, new_cluster_id) = cluster_legalizer.start_new_cluster(seed_molecule, type, mode); + if (pack_status == e_block_pack_status::BLK_PASSED) + return new_cluster_id; + } + } + // This should never happen. + VPR_FATAL_ERROR(VPR_ERROR_AP, + "Unable to create a cluster for the given seed molecule"); + return LegalizationClusterId(); +} + +void FullLegalizer::create_clusters(const PartialPlacement& p_placement) { + // PACKING: + // Initialize the cluster legalizer (Packing) + // FIXME: The legalization strategy is currently set to full. Should handle + // this better to make it faster. + t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts_.high_fanout_threshold); + ClusterLegalizer cluster_legalizer(atom_netlist_, + prepacker_, + logical_block_types_, + lb_type_rr_graphs_, + user_models_, + library_models_, + packer_opts_.target_external_pin_util, + high_fanout_thresholds, + ClusterLegalizationStrategy::FULL, + packer_opts_.enable_pin_feasibility_filter, + packer_opts_.feasible_block_array_size, + packer_opts_.pack_verbosity); + // Create clusters for each tile. + // Start by giving each root tile a unique ID. + size_t grid_width = device_grid_.width(); + size_t grid_height = device_grid_.height(); + vtr::NdMatrix tile_grid({grid_width, grid_height}); + size_t num_device_tiles = 0; + for (size_t x = 0; x < grid_width; x++) { + for (size_t y = 0; y < grid_height; y++) { + // Ignoring 3D placement for now. + t_physical_tile_loc tile_loc(x, y, 0); + // Ignore non-root locations + size_t width_offset = device_grid_.get_width_offset(tile_loc); + size_t height_offset = device_grid_.get_height_offset(tile_loc); + if (width_offset != 0 || height_offset != 0) { + tile_grid[x][y] = tile_grid[x - width_offset][y - height_offset]; + continue; + } + tile_grid[x][y] = DeviceTileId(num_device_tiles); + num_device_tiles++; + } + } + // Next, collect the AP blocks which will go into each root tile + VTR_ASSERT_SAFE(p_placement.verify_locs(ap_netlist_, grid_width, grid_height)); + vtr::vector> blocks_in_tiles(num_device_tiles); + for (APBlockId ap_blk_id : ap_netlist_.blocks()) { + // FIXME: Add these conversions to the PartialPlacement class. + t_physical_tile_loc tile_loc = p_placement.get_containing_tile_loc(ap_blk_id); + VTR_ASSERT(p_placement.block_layer_nums[ap_blk_id] == 0); + DeviceTileId tile_id = tile_grid[tile_loc.x][tile_loc.y]; + blocks_in_tiles[tile_id].push_back(ap_blk_id); + } + // Create the legalized clusters per tile. + std::map> + primitive_candidate_block_types = identify_primitive_candidate_block_types(); + for (size_t tile_id_idx = 0; tile_id_idx < num_device_tiles; tile_id_idx++) { + DeviceTileId tile_id = DeviceTileId(tile_id_idx); + // Create the molecule list + std::list mol_list; + for (APBlockId ap_blk_id : blocks_in_tiles[tile_id]) { + // FIXME: The netlist stores a const pointer to mol; but the cluster + // legalizer does not accept this. Need to fix one or the other. + // For now, using const_cast. + t_pack_molecule* mol = const_cast(ap_netlist_.block_molecule(ap_blk_id)); + mol_list.push_back(mol); + } + // Clustering algorithm: Create clusters one at a time. + while (!mol_list.empty()) { + // Arbitrarily choose the first molecule as a seed molecule. + t_pack_molecule* seed_mol = mol_list.front(); + mol_list.pop_front(); + // Use the seed molecule to create a cluster for this tile. + LegalizationClusterId new_cluster_id = create_new_cluster(seed_mol, cluster_legalizer, primitive_candidate_block_types); + // Insert all molecules that you can into the cluster. + // NOTE: If the mol_list was somehow sorted, we can just stop at + // first failure! + auto it = mol_list.begin(); + while (it != mol_list.end()) { + t_pack_molecule* mol = *it; + if (!cluster_legalizer.is_molecule_compatible(mol, new_cluster_id)) { + ++it; + continue; + } + // Try to insert it. If successful, remove from list. + e_block_pack_status pack_status = cluster_legalizer.add_mol_to_cluster(mol, new_cluster_id); + if (pack_status == e_block_pack_status::BLK_PASSED) { + it = mol_list.erase(it); + } else { + ++it; + } + } + // Once all molecules have been inserted, clean the cluster. + cluster_legalizer.clean_cluster(new_cluster_id); + } + } + + // Check and output the clustering. + std::unordered_set is_clock = alloc_and_load_is_clock(); + check_and_output_clustering(cluster_legalizer, packer_opts_, is_clock, arch_); + // Reset the cluster legalizer. This is required to load the packing. + cluster_legalizer.reset(); + // Regenerate the clustered netlist from the file generated previously. + // FIXME: This writing and loading from a file is wasteful. Should generate + // the clusters directly from the cluster legalizer. + vpr_load_packing(vpr_setup_, *arch_); + load_cluster_constraints(); + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + // Verify the packing and print some info + check_netlist(packer_opts_.pack_verbosity); + writeClusteredNetlistStats(vpr_setup_.FileNameOpts.write_block_usage); + print_pb_type_count(clb_nlist); +} + +void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist, + const PartialPlacement& p_placement) { + // PLACING: + // Create a lookup from the AtomBlockId to the APBlockId + vtr::vector atom_to_ap_block(atom_netlist_.blocks().size()); + for (APBlockId ap_blk_id : ap_netlist_.blocks()) { + const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id); + for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) { + // Ensure that this block is not in any other AP block. That would + // be weird. + VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid()); + atom_to_ap_block[atom_blk_id] = ap_blk_id; + } + } + // Move the clusters to where they want to be first. + // TODO: The fixed clusters should probably be moved first for legality + // reasons. + APClusterPlacer ap_cluster_placer; + std::vector unplaced_clusters; + for (ClusterBlockId cluster_blk_id : clb_nlist.blocks()) { + // Assume that the cluster will always want to be placed wherever the + // first atom in the cluster wants to be placed. + // FIXME: This assumption does not always hold! Will need to unify the + // cluster legalizer and the clustered netlist! + const std::unordered_set& atoms_in_cluster = g_vpr_ctx.clustering().atoms_lookup[cluster_blk_id]; + VTR_ASSERT(atoms_in_cluster.size() > 0); + AtomBlockId first_atom_blk = *atoms_in_cluster.begin(); + APBlockId first_ap_blk = atom_to_ap_block[first_atom_blk]; + size_t blk_sub_tile = p_placement.block_sub_tiles[first_ap_blk]; + t_physical_tile_loc tile_loc = p_placement.get_containing_tile_loc(first_ap_blk); + bool placed = ap_cluster_placer.place_cluster(cluster_blk_id, tile_loc, blk_sub_tile); + if (placed) + continue; + // FIXME: Should now try all sub-tiles at this tile location. + // - May need to try all the sub-tiles in a location. + // - however this may need to be done after. + // - https://github.com/AlexandreSinger/vtr-verilog-to-routing/blob/feature-analytical-placer/vpr/src/place/initial_placement.cpp#L755 + + // Add to list of unplaced clusters. + unplaced_clusters.push_back(cluster_blk_id); + } + + // Any clusters that were not placed previously are exhaustively placed. + for (ClusterBlockId clb_blk_id : unplaced_clusters) { + bool success = ap_cluster_placer.exhaustively_place_cluster(clb_blk_id); + if (!success) { + VPR_FATAL_ERROR(VPR_ERROR_AP, + "Unable to find valid place for cluster in AP placement!"); + } + } + + // Print some statistics about what happened here. This will be useful to + // improve other algorithms. + VTR_LOG("Number of clusters which needed to be moved: %zu\n", unplaced_clusters.size()); + + // TODO: Print a breakdown per block type. We may find that specific block + // types are always conflicting. + + // FIXME: Allocate and load moveable blocks? + // - This may be needed to perform SA. Not needed right now. + + // TODO: Check initial placement legality +} + +void FullLegalizer::legalize(const PartialPlacement& p_placement) { + // Create a scoped timer for the full legalizer + vtr::ScopedStartFinishTimer full_legalizer_timer("AP Full Legalizer"); + + // Pack the atoms into clusters based on the partial placement. + create_clusters(p_placement); + const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + // Place the clusters based on where the atoms want to be placed. + place_clusters(clb_nlist, p_placement); +} + diff --git a/vpr/src/analytical_place/full_legalizer.h b/vpr/src/analytical_place/full_legalizer.h new file mode 100644 index 00000000000..e5f72586b28 --- /dev/null +++ b/vpr/src/analytical_place/full_legalizer.h @@ -0,0 +1,110 @@ +/** + * @file + * @author Alex Singer + * @date September 2024 + * @brief Defines the FullLegalizer class which takes a partial AP placement + * and generates a fully legal clustering and placement which can be + * routed by VTR. + */ + +#pragma once + +#include + +// Forward declarations +class APNetlist; +class AtomNetlist; +class ClusteredNetlist; +class DeviceGrid; +class PartialPlacement; +class Prepacker; +struct t_arch; +struct t_lb_type_rr_node; +struct t_logical_block_type; +struct t_model; +struct t_packer_opts; +struct t_vpr_setup; + +/** + * @brief The full legalizer in an AP flow + * + * Given a valid partial placement (of any level of legality), will produce a + * fully legal clustering and placement for use in the rest of the VTR flow. + */ +class FullLegalizer { +public: + /** + * @brief Constructor of the Full Legalizer class. + * + * Brings in all the necessary state here. This is the state needed from the + * AP Context. the Packer Context, and the Placer Context. + */ + FullLegalizer(const APNetlist& ap_netlist, + t_vpr_setup& vpr_setup, + const DeviceGrid& device_grid, + const t_arch* arch, + const AtomNetlist& atom_netlist, + const Prepacker& prepacker, + const std::vector& logical_block_types, + std::vector* lb_type_rr_graphs, + const t_model* user_models, + const t_model* library_models, + const t_packer_opts& packer_opts) + : ap_netlist_(ap_netlist), + vpr_setup_(vpr_setup), + device_grid_(device_grid), + arch_(arch), + atom_netlist_(atom_netlist), + prepacker_(prepacker), + logical_block_types_(logical_block_types), + lb_type_rr_graphs_(lb_type_rr_graphs), + user_models_(user_models), + library_models_(library_models), + packer_opts_(packer_opts) {} + + /** + * @brief Perform legalization on the given partial placement solution + * + * @param p_placement A valid partial placement (passes verify method). + * This implies that all blocks are placed on the + * device grid and fixed blocks are observed. + */ + void legalize(const PartialPlacement& p_placement); + +private: + /** + * @brief Helper method to create the clusters from the given partial + * placement. + * TODO: Should return a ClusteredNetlist object, but need to wait until + * it is separated from load_cluster_constraints. + */ + void create_clusters(const PartialPlacement& p_placement); + + /** + * @brief Helper method to place the clusters based on the given partial + * placement. + */ + void place_clusters(const ClusteredNetlist& clb_nlist, + const PartialPlacement& p_placement); + + // AP Context Info + const APNetlist& ap_netlist_; + // Overall Setup Info + // FIXME: I do not like bringing all of this in. Perhaps clean up the methods + // that use it. + t_vpr_setup& vpr_setup_; + // Device Context Info + const DeviceGrid& device_grid_; + const t_arch* arch_; + // Packing Context Info + const AtomNetlist& atom_netlist_; + const Prepacker& prepacker_; + const std::vector& logical_block_types_; + std::vector* lb_type_rr_graphs_; + const t_model* user_models_; + const t_model* library_models_; + const t_packer_opts& packer_opts_; + // Placement Context Info + // TODO: Populate this once the placer is cleaned up some. +}; + diff --git a/vpr/src/analytical_place/partial_placement.cpp b/vpr/src/analytical_place/partial_placement.cpp index 491764bf186..aa755e36911 100644 --- a/vpr/src/analytical_place/partial_placement.cpp +++ b/vpr/src/analytical_place/partial_placement.cpp @@ -12,7 +12,7 @@ bool PartialPlacement::verify_locs(const APNetlist& netlist, size_t grid_width, - size_t grid_height) { + size_t grid_height) const { // Make sure all of the loc values are there. if (block_x_locs.size() != netlist.blocks().size()) return false; @@ -43,7 +43,7 @@ bool PartialPlacement::verify_locs(const APNetlist& netlist, } bool PartialPlacement::verify_layer_nums(const APNetlist& netlist, - size_t grid_num_layers) { + size_t grid_num_layers) const { // Make sure all of the layer nums are there if (block_layer_nums.size() != netlist.blocks().size()) return false; @@ -62,7 +62,7 @@ bool PartialPlacement::verify_layer_nums(const APNetlist& netlist, return true; } -bool PartialPlacement::verify_sub_tiles(const APNetlist& netlist) { +bool PartialPlacement::verify_sub_tiles(const APNetlist& netlist) const { // Make sure all of the sub tiles are there if (block_sub_tiles.size() != netlist.blocks().size()) return false; @@ -88,7 +88,7 @@ bool PartialPlacement::verify_sub_tiles(const APNetlist& netlist) { bool PartialPlacement::verify(const APNetlist& netlist, size_t grid_width, size_t grid_height, - size_t grid_num_layers) { + size_t grid_num_layers) const { // Check that all the other verify methods passed. if (!verify_locs(netlist, grid_width, grid_height)) return false; diff --git a/vpr/src/analytical_place/partial_placement.h b/vpr/src/analytical_place/partial_placement.h index 304e94c0784..132fd42d919 100644 --- a/vpr/src/analytical_place/partial_placement.h +++ b/vpr/src/analytical_place/partial_placement.h @@ -12,7 +12,9 @@ #pragma once +#include #include "ap_netlist.h" +#include "physical_types.h" #include "vtr_vector.h" /** @@ -94,6 +96,44 @@ struct PartialPlacement { } } + /** + * @brief Get the location of the physical tile that contains the given + * AP block. + * + * VTR uses an integer grid. In AP, we consider a tile at (1,1) to be + * centered at (1.5,1.5). When converting from doubles back to integer + * tiles, we simply take the floor, so the tile above would receive all + * points from [(1,1) to (2,2)). When converting fixed blocks from the + * integral VPR grid to the AP locations, we should therefore add (0.5,0.5) + * to them so they are centered in their grid tiles (assuming the tiles are + * 1x1). + * + * FIXME: Ideally this should return an ID to the tile, not a location. + * This is important since there is a distinction between the two. + * We know a block will be at that tile, but it would not be at the + * corner of the block (likely it would be at the center). + */ + inline t_physical_tile_loc get_containing_tile_loc(APBlockId blk_id) const { + // We take the floor here since we want to know which tile contains this + // block. On a grid, if the block is located at x=0.99999, it would still + // be in the first tile. This is because we assume that the blocks will + // ultimately end up in the center of the tile, not at the top left + // corner of it. The physical tile loc is just a way of identifying that + // tile. + // TODO: This may be a bit more complicated than this. This assumes that + // all tiles are 1x1, but it could be the case that this is on + // the edge of a much larger block. In reality this should try + // to go into the tile where it is closest to the center. What + // is written here is not necessarily wrong, but it may put blocks + // which on are the edge of large blocks into the large blocks. + // However, this may not even matter if the partial legalizer is + // doing its job! + int tile_x_loc = std::floor(block_x_locs[blk_id]); + int tile_y_loc = std::floor(block_y_locs[blk_id]); + int tile_layer = std::floor(block_layer_nums[blk_id]); + return t_physical_tile_loc(tile_x_loc, tile_y_loc, tile_layer); + } + /** * @brief Verify the block_x_locs and block_y_locs vectors * @@ -108,7 +148,7 @@ struct PartialPlacement { */ bool verify_locs(const APNetlist& netlist, size_t grid_width, - size_t grid_height); + size_t grid_height) const; /** * @brief Verify the block_layer_nums vector @@ -121,7 +161,8 @@ struct PartialPlacement { * @param netlist The APNetlist used to generate this placement * @param grid_num_layers The number of layers in the device grid */ - bool verify_layer_nums(const APNetlist& netlist, size_t grid_num_layers); + bool verify_layer_nums(const APNetlist& netlist, + size_t grid_num_layers) const; /** * @brief Verify the sub_tiles @@ -131,7 +172,7 @@ struct PartialPlacement { * * @param netlist The APNetlist used to generate this placement */ - bool verify_sub_tiles(const APNetlist& netlist); + bool verify_sub_tiles(const APNetlist& netlist) const; /** * @brief Verify the entire partial placement object @@ -146,6 +187,6 @@ struct PartialPlacement { bool verify(const APNetlist& netlist, size_t grid_width, size_t grid_height, - size_t grid_num_layers); + size_t grid_num_layers) const; }; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index cf1c41669db..cee5d919c5b 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -411,6 +411,15 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) { // TODO: Make this return a bool if the placement was successful or not. run_analytical_placement_flow(vpr_setup); } + // Print the placement generated by AP to a .place file. + auto& filename_opts = vpr_setup.FileNameOpts; + auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = g_vpr_ctx.placement().block_locs(); + auto& placement_id = g_vpr_ctx.mutable_placement().placement_id; + placement_id = print_place(filename_opts.NetFile.c_str(), + cluster_ctx.clb_nlist.netlist_id().c_str(), + filename_opts.PlaceFile.c_str(), + block_locs); } bool is_flat = vpr_setup.RouterOpts.flat_routing; diff --git a/vpr/src/pack/cluster_legalizer.cpp b/vpr/src/pack/cluster_legalizer.cpp index a37e541ae7a..1d56a7db8cc 100644 --- a/vpr/src/pack/cluster_legalizer.cpp +++ b/vpr/src/pack/cluster_legalizer.cpp @@ -38,6 +38,27 @@ #include "vtr_vector.h" #include "vtr_vector_map.h" +/** + * @brief Counts the total number of logic models that the architecture can + * implement. + * + * @param user_models A linked list of logic models. + * @return The total number of models in the linked list + */ +static size_t count_models(const t_model* user_models) { + if (user_models == nullptr) + return 0; + + size_t n_models = 0; + const t_model* cur_model = user_models; + while (cur_model != nullptr) { + n_models++; + cur_model = cur_model->next; + } + + return n_models; +} + /* * @brief Gets the max cluster size that any logical block can have. * @@ -1636,7 +1657,8 @@ ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, const Prepacker& prepacker, const std::vector& logical_block_types, std::vector* lb_type_rr_graphs, - size_t num_models, + const t_model* user_models, + const t_model* library_models, const std::vector& target_external_pin_util_str, const t_pack_high_fanout_thresholds& high_fanout_thresholds, ClusterLegalizationStrategy cluster_legalization_strategy, @@ -1661,7 +1683,7 @@ ClusterLegalizer::ClusterLegalizer(const AtomNetlist& atom_netlist, // Get a reference to the rr graphs. lb_type_rr_graphs_ = lb_type_rr_graphs; // Get the number of models in the architecture. - num_models_ = num_models; + num_models_ = count_models(user_models) + count_models(library_models); // Find all NoC router atoms. std::vector noc_atoms = find_noc_router_atoms(atom_netlist); update_noc_reachability_partitions(noc_atoms, diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h index ddf4288a220..1eed13ebd99 100644 --- a/vpr/src/pack/cluster_legalizer.h +++ b/vpr/src/pack/cluster_legalizer.h @@ -215,10 +215,10 @@ class ClusterLegalizer { * different cluster types. A reference is stored * in the class to be used to allocate and load * the router data. - * @param num_models The total number of models in the architecture. - * This is the sum of the number of the user and - * library models. Used internally to allocate data - * structures. + * @param user_models A linked list of the user models. Used to allocate + * an internal structure. + * @param library_models A linked list of the library models. Used to + * allocate an internal structure. * @param target_external_pin_util_str A string used to initialize the * target external pin utilization of * each cluster type. @@ -246,7 +246,8 @@ class ClusterLegalizer { const Prepacker& prepacker, const std::vector& logical_block_types, std::vector* lb_type_rr_graphs, - size_t num_models, + const t_model* user_models, + const t_model* library_models, const std::vector& target_external_pin_util_str, const t_pack_high_fanout_thresholds& high_fanout_thresholds, ClusterLegalizationStrategy cluster_legalization_strategy, diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index a4a1dcc09ee..6da6e3ffbf7 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -18,14 +18,6 @@ static bool try_size_device_grid(const t_arch& arch, float target_device_utilization, const std::string& device_layout_name); -/** - * @brief Counts the total number of logic models that the architecture can implement. - * - * @param user_models A linked list of logic models. - * @return int The total number of models in the linked list - */ -static int count_models(const t_model* user_models); - bool try_pack(t_packer_opts* packer_opts, const t_analysis_opts* analysis_opts, const t_arch* arch, @@ -40,9 +32,6 @@ bool try_pack(t_packer_opts* packer_opts, t_clustering_data clustering_data; VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str()); - /* determine number of models in the architecture */ - size_t num_models = count_models(user_models) + count_models(library_models); - is_clock = alloc_and_load_is_clock(); is_global.insert(is_clock.begin(), is_clock.end()); @@ -109,7 +98,8 @@ bool try_pack(t_packer_opts* packer_opts, prepacker, device_ctx.logical_block_types, lb_type_rr_graphs, - num_models, + user_models, + library_models, packer_opts->target_external_pin_util, high_fanout_thresholds, ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE, @@ -369,18 +359,3 @@ static bool try_size_device_grid(const t_arch& arch, return fits_on_device; } -static int count_models(const t_model* user_models) { - if (user_models == nullptr) { - return 0; - } - - const t_model* cur_model = user_models; - int n_models = 0; - - while (cur_model) { - n_models++; - cur_model = cur_model->next; - } - - return n_models; -} diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 8636de52759..4a6bdd0c533 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -43,15 +43,6 @@ static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100; static void clear_block_type_grid_locs(const std::unordered_set& unplaced_blk_types_index, BlkLocRegistry& blk_loc_registry); -/** - * @brief Initializes the grid to empty. It also initialized the location for - * all blocks to unplaced. - * - * @param blk_loc_registry Placement block location information. To be filled with the location - * where pl_macro is placed. - */ -static void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry); - /** * @brief Control routine for placing a macro. * First iteration of place_marco performs the following steps to place a macro: @@ -1150,7 +1141,7 @@ static void clear_block_type_grid_locs(const std::unordered_set& unplaced_b } } -static void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry) { +void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry) { auto& device_ctx = g_vpr_ctx.device(); std::unordered_set blk_types_to_be_cleared; diff --git a/vpr/src/place/initial_placement.h b/vpr/src/place/initial_placement.h index bb1d413bd64..6cc391335e2 100644 --- a/vpr/src/place/initial_placement.h +++ b/vpr/src/place/initial_placement.h @@ -8,6 +8,8 @@ #include "vpr_types.h" #include "vtr_vector_map.h" +class BlkLocRegistry; + /* The maximum number of tries when trying to place a macro at a * * random location before trying exhaustive placement - find the first * * legal position and place it during initial placement. */ @@ -152,4 +154,14 @@ bool place_one_block(const ClusterBlockId blk_id, std::vector* blk_types_empty_locs_in_grid, vtr::vector* block_scores, BlkLocRegistry& blk_loc_registry); + +/** + * @brief Initializes the grid to empty. It also initializes the location for + * all blocks to unplaced. + * + * @param blk_loc_registry Placement block location information. To be filled with the location + * where pl_macro is placed. + */ +void clear_all_grid_locs(BlkLocRegistry& blk_loc_registry); + #endif diff --git a/vpr/src/place/place_macro.h b/vpr/src/place/place_macro.h index f0707663091..71c6a720773 100644 --- a/vpr/src/place/place_macro.h +++ b/vpr/src/place/place_macro.h @@ -135,7 +135,9 @@ #define PLACE_MACRO_H #include +#include "clustered_netlist_fwd.h" #include "physical_types.h" +#include "vpr_types.h" /* These are the placement macro structure. * It is in the form of array of structs instead of diff --git a/vtr_flow/parse/parse_config/common/vpr.ap.txt b/vtr_flow/parse/parse_config/common/vpr.ap.txt new file mode 100644 index 00000000000..f4f76fdf8bf --- /dev/null +++ b/vtr_flow/parse/parse_config/common/vpr.ap.txt @@ -0,0 +1,7 @@ +# VPR Run-time Metrics +## Entire AP Flow +ap_mem;vpr.out;.*Analytical Placement took.*\(max_rss (.*), .*\) +ap_time;vpr.out;\s*Analytical Placement took (.*) seconds +## Full Legalizer +ap_full_legalizer_mem;vpr.out;.*AP Full Legalizer took.*\(max_rss (.*), .*\) +ap_full_legalizer_time;vpr.out;\s*AP Full Legalizer took (.*) seconds diff --git a/vtr_flow/parse/parse_config/timing/vpr.ap.txt b/vtr_flow/parse/parse_config/timing/vpr.ap.txt new file mode 100644 index 00000000000..726ce4fb236 --- /dev/null +++ b/vtr_flow/parse/parse_config/timing/vpr.ap.txt @@ -0,0 +1 @@ +%include "../common/vpr.ap.txt" diff --git a/vtr_flow/parse/parse_config/vpr_standard.txt b/vtr_flow/parse/parse_config/vpr_standard.txt index ecf38d37a8d..1b64fbece0a 100644 --- a/vtr_flow/parse/parse_config/vpr_standard.txt +++ b/vtr_flow/parse/parse_config/vpr_standard.txt @@ -12,5 +12,6 @@ %include "common/vpr.common.txt" %include "timing/vpr.pack.txt" %include "timing/vpr.place.txt" +%include "timing/vpr.ap.txt" %include "timing/vpr.route_min_chan_width.txt" %include "timing/vpr.route_relaxed_chan_width.txt" diff --git a/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_ap.txt b/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_ap.txt new file mode 100644 index 00000000000..4e0013f1d32 --- /dev/null +++ b/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_ap.txt @@ -0,0 +1,4 @@ +#Common metrics for VPR analytical placement + +#Run-time +ap_time;RangeAbs(0.10,10.0,3) diff --git a/vtr_flow/parse/pass_requirements/pass_requirements_ap.txt b/vtr_flow/parse/pass_requirements/pass_requirements_ap.txt new file mode 100644 index 00000000000..a2eddc6ec21 --- /dev/null +++ b/vtr_flow/parse/pass_requirements/pass_requirements_ap.txt @@ -0,0 +1,6 @@ +%include "common/pass_requirements.vpr_status.txt" +%include "timing/pass_requirements.vpr_ap.txt" +%include "timing/pass_requirements.vpr_route_min_chan_width.txt" +%include "timing/pass_requirements.vpr_route_relaxed_chan_width.txt" + +%include "common/pass_requirements.vtr_benchmarks.txt" diff --git a/vtr_flow/parse/pass_requirements/timing/pass_requirements.vpr_ap.txt b/vtr_flow/parse/pass_requirements/timing/pass_requirements.vpr_ap.txt new file mode 100644 index 00000000000..457648adc1b --- /dev/null +++ b/vtr_flow/parse/pass_requirements/timing/pass_requirements.vpr_ap.txt @@ -0,0 +1,2 @@ +#Common metrics for VPR analytical placement with timing +%include "../common/pass_requirements.vpr_ap.txt" diff --git a/vtr_flow/parse/qor_config/qor_standard.txt b/vtr_flow/parse/qor_config/qor_standard.txt index 7aa3c13b1fc..4b52cb63991 100644 --- a/vtr_flow/parse/qor_config/qor_standard.txt +++ b/vtr_flow/parse/qor_config/qor_standard.txt @@ -7,6 +7,7 @@ total_wirelength;vpr.out;\s*Total wirelength:\s*(\d+) total_runtime;vpr.out;The entire flow of VPR took (.*) seconds #pack_time;vpr.out;Packing took (.*) seconds #place_time;vpr.out;Placement took (.*) seconds +#ap_time;vpr.out;Analytical Placement took (.*) seconds #route_time;vpr.out;Routing took (.*) seconds #num_pre_packed_nets;vpr.out;Total Nets: (\d+) #num_pre_packed_blocks;vpr.out;Total Blocks: (\d+) diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml new file mode 100644 index 00000000000..ea61f7a9fb6 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml @@ -0,0 +1,691 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/config.txt new file mode 100644 index 00000000000..ccc52ba6cc7 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/config.txt @@ -0,0 +1,28 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing/fixed_size + +# Add circuits to list to sweep +circuit_list_add=ch_intrinsics.v + +# Add architectures to list to sweep +arch_list_add=fixed_k6_frac_N8_22nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap.txt + +# Script parameters +script_params_common=-track_memory_usage --analytical_place --route --device "unnamed_device" --read_vpr_constraints ../../../../ch_intrinsics_fixed_io.xml + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt new file mode 100644 index 00000000000..2bb6c04d5aa --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 9.00 vpr 72.32 MiB -1 -1 0.46 18500 3 0.09 -1 -1 33256 -1 -1 11 99 1 0 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 74060 99 130 344 474 1 250 241 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 3176 21 6.79088e+06 696192 706193. 2443.58 5.46 0.516448 0.473535 26254 175826 -1 3100 60 1045 1843 1532128 1054081 2.28022 2.28022 -155.657 -2.28022 0 0 926341. 3205.33 0.26 0.78 0.26 -1 -1 0.26 0.189493 0.174147 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/config.txt new file mode 100644 index 00000000000..7e7eaa7fd2a --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/config.txt @@ -0,0 +1,28 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing/fixed_size + +# Add circuits to list to sweep +circuit_list_add=diffeq1.v + +# Add architectures to list to sweep +arch_list_add=fixed_k6_frac_N8_22nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap.txt + +# Script parameters +script_params_common=-track_memory_usage --analytical_place --route --device "unnamed_device" --read_vpr_constraints ../../../../diffeq1_fixed_io.xml + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt new file mode 100644 index 00000000000..d72558a15fc --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +fixed_k6_frac_N8_22nm.xml diffeq1.v common 30.54 vpr 73.96 MiB -1 -1 0.74 23360 15 0.35 -1 -1 34316 -1 -1 43 162 0 5 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 75732 162 96 1009 950 1 739 306 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 76 15726 25 6.79088e+06 2.5593e+06 1.25153e+06 4330.55 24.75 2.70597 2.57178 32878 320202 -1 14591 16 3409 7635 1299474 276683 21.3784 21.3784 -1758.79 -21.3784 0 0 1.55119e+06 5367.45 0.45 0.60 0.54 -1 -1 0.45 0.254682 0.242955 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/diffeq1_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/diffeq1_fixed_io.xml new file mode 100644 index 00000000000..5ad650937dd --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/diffeq1_fixed_io.xml @@ -0,0 +1,778 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/config.txt new file mode 100644 index 00000000000..65ff8dbdc1e --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/config.txt @@ -0,0 +1,28 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing/fixed_size + +# Add circuits to list to sweep +circuit_list_add=single_ff.v + +# Add architectures to list to sweep +arch_list_add=fixed_k6_frac_N8_22nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap.txt + +# Script parameters +script_params_common=-track_memory_usage --analytical_place --route --device "unnamed_device" --read_vpr_constraints ../../../../single_ff_fixed_io.xml + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt new file mode 100644 index 00000000000..33b0707cba4 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +fixed_k6_frac_N8_22nm.xml single_ff.v common 3.59 vpr 71.37 MiB -1 -1 0.13 17020 1 0.02 -1 -1 29764 -1 -1 1 2 0 0 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73080 2 1 3 4 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 20 32 1 6.79088e+06 13472 414966. 1435.87 1.99 0.00160215 0.00145751 22510 95286 -1 26 1 2 2 148 27 0.691615 0.691615 -1.31306 -0.691615 0 0 503264. 1741.40 0.16 0.00 0.13 -1 -1 0.16 0.00116321 0.00112664 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/single_ff_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/single_ff_fixed_io.xml new file mode 100644 index 00000000000..20aebb5be4f --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/single_ff_fixed_io.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/config.txt new file mode 100644 index 00000000000..6877712cbba --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/config.txt @@ -0,0 +1,28 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing/fixed_size + +# Add circuits to list to sweep +circuit_list_add=single_wire.v + +# Add architectures to list to sweep +arch_list_add=fixed_k6_frac_N8_22nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements_ap.txt + +# Script parameters +script_params_common=-track_memory_usage --analytical_place --route --device "unnamed_device" --read_vpr_constraints ../../../../single_wire_fixed_io.xml + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/golden_results.txt new file mode 100644 index 00000000000..c59c3914961 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +fixed_k6_frac_N8_22nm.xml single_wire.v common 1.76 vpr 71.40 MiB -1 -1 0.12 16768 1 0.02 -1 -1 30048 -1 -1 0 1 0 0 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing 73116 1 1 1 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 8 14 1 6.79088e+06 0 166176. 575.005 0.34 0.00123577 0.00119358 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.10 0.00 0.07 -1 -1 0.10 0.00143806 0.00140336 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/single_wire_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/single_wire_fixed_io.xml new file mode 100644 index 00000000000..4ad6fdaac2d --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/single_wire_fixed_io.xml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt new file mode 100644 index 00000000000..f51df8eb63c --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt @@ -0,0 +1,5 @@ +# This extra task list is for running just the basic_ap tasks in isolation. +regression_tests/vtr_reg_basic/basic_ap/single_wire +regression_tests/vtr_reg_basic/basic_ap/single_ff +#regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics +regression_tests/vtr_reg_basic/basic_ap/diffeq1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt index 386b06be76d..99aa38f49c4 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt @@ -1,3 +1,7 @@ +regression_tests/vtr_reg_basic/basic_ap/single_wire +regression_tests/vtr_reg_basic/basic_ap/single_ff +#regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics +regression_tests/vtr_reg_basic/basic_ap/diffeq1 regression_tests/vtr_reg_basic/basic_no_timing regression_tests/vtr_reg_basic/basic_timing regression_tests/vtr_reg_basic/basic_timing_no_sdc