diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index e1599c4ae1d..5842fa7185e 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -613,6 +613,8 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->place_agent_algorithm = Options.place_agent_algorithm; PlacerOpts->place_constraint_expand = Options.place_constraint_expand; PlacerOpts->place_constraint_subtile = Options.place_constraint_subtile; + PlacerOpts->floorplan_num_horizontal_partitions = Options.floorplan_num_horizontal_partitions; + PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions; } static void SetupAnalysisOpts(const t_options& Options, t_analysis_opts& analysis_opts) { diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 5ffaace39a4..d233d209129 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2024,6 +2024,22 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .default_value("off") .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.floorplan_num_horizontal_partitions, "--floorplan_num_horizontal_partitions") + .help( + "An argument used for generating test constraints files. Specifies how many partitions to " + "make in the horizontal dimension. Must be used in conjunction with " + "--floorplan_num_vertical_partitions") + .default_value("0") + .show_in(argparse::ShowIn::HELP_ONLY); + + place_grp.add_argument(args.floorplan_num_vertical_partitions, "--floorplan_num_vertical_partitions") + .help( + "An argument used for generating test constraints files. Specifies how many partitions to " + "make in the vertical dimension. Must be used in conjunction with " + "--floorplan_num_horizontal_partitions") + .default_value("0") + .show_in(argparse::ShowIn::HELP_ONLY); + /* * place_grp.add_argument(args.place_timing_cost_func, "--place_timing_cost_func") * .help( diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 94e8ac797c0..485c43fcdf1 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -135,6 +135,8 @@ struct t_options { argparse::ArgValue place_crit_limit; argparse::ArgValue place_constraint_expand; argparse::ArgValue place_constraint_subtile; + argparse::ArgValue floorplan_num_horizontal_partitions; + argparse::ArgValue floorplan_num_vertical_partitions; /* Timing-driven placement options only */ argparse::ArgValue PlaceTimingTradeoff; diff --git a/vpr/src/base/region.h b/vpr/src/base/region.h index 415060ada6a..75a25f5071d 100644 --- a/vpr/src/base/region.h +++ b/vpr/src/base/region.h @@ -58,6 +58,10 @@ class Region { */ bool is_loc_in_reg(t_pl_loc loc); + bool operator==(const Region& reg) const { + return (reg.get_region_rect() == this->get_region_rect() && reg.get_sub_tile() == this->get_sub_tile()); + } + private: //may need to include zmin, zmax for future use in 3D FPGA designs vtr::Rect region_bounds; ///< xmin, ymin, xmax, ymax inclusive @@ -88,4 +92,19 @@ Region intersection(const Region& r1, const Region& r2); ///@brief Used to print data from a Region void print_region(FILE* fp, Region region); +namespace std { +template<> +struct hash { + std::size_t operator()(const Region& reg) const noexcept { + vtr::Rect rect = reg.get_region_rect(); + std::size_t seed = std::hash{}(rect.xmin()); + vtr::hash_combine(seed, rect.ymin()); + vtr::hash_combine(seed, rect.xmax()); + vtr::hash_combine(seed, rect.ymax()); + vtr::hash_combine(seed, reg.get_sub_tile()); + return seed; + } +}; +} // namespace std + #endif /* REGION_H */ diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index dc589d75b8d..b3bd0406843 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -632,7 +632,8 @@ bool vpr_place_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { //Write out a vpr floorplanning constraints file if the option is specified if (!filename_opts.write_vpr_constraints_file.empty()) { - write_vpr_floorplan_constraints(filename_opts.write_vpr_constraints_file.c_str(), placer_opts.place_constraint_expand, placer_opts.place_constraint_subtile); + write_vpr_floorplan_constraints(filename_opts.write_vpr_constraints_file.c_str(), placer_opts.place_constraint_expand, placer_opts.place_constraint_subtile, + placer_opts.floorplan_num_horizontal_partitions, placer_opts.floorplan_num_vertical_partitions); } return true; diff --git a/vpr/src/base/vpr_constraints.cpp b/vpr/src/base/vpr_constraints.cpp index 1434c94bfd5..95c7e7b7358 100644 --- a/vpr/src/base/vpr_constraints.cpp +++ b/vpr/src/base/vpr_constraints.cpp @@ -2,13 +2,12 @@ #include "partition.h" void VprConstraints::add_constrained_atom(const AtomBlockId blk_id, const PartitionId part_id) { - constrained_atoms.insert({blk_id, part_id}); - auto got = constrained_atoms.find(blk_id); /** - * Each atom can only be in one partition. If the atoms already has a partition id assigned to it, - * the id will be switched to the new part_id being passed in instead + * Each atom can only be in one partition. If the atom is not found in constrained_atoms, it + * will be added with its partition id. + * If the atom is already in constrained_atoms, the partition id will be updated. */ if (got == constrained_atoms.end()) { constrained_atoms.insert({blk_id, part_id}); diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp index acfc281786f..e77d5268652 100644 --- a/vpr/src/base/vpr_constraints_writer.cpp +++ b/vpr/src/base/vpr_constraints_writer.cpp @@ -17,12 +17,16 @@ #include #include "vpr_constraints_writer.h" +#include "region.h" -void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile) { - //Fill in the constraints object to be printed out. +void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions) { VprConstraints constraints; - setup_vpr_floorplan_constraints(constraints, expand, subtile); + if (horizontal_partitions != 0 && vertical_partitions != 0) { + setup_vpr_floorplan_constraints_cutpoints(constraints, horizontal_partitions, vertical_partitions); + } else { + setup_vpr_floorplan_constraints_one_loc(constraints, expand, subtile); + } VprConstraintsSerializer writer(constraints); @@ -39,7 +43,7 @@ void write_vpr_floorplan_constraints(const char* file_name, int expand, bool sub } } -void setup_vpr_floorplan_constraints(VprConstraints& constraints, int expand, bool subtile) { +void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); ClusterAtomsLookup atoms_lookup; @@ -83,3 +87,134 @@ void setup_vpr_floorplan_constraints(VprConstraints& constraints, int expand, bo part_id++; } } + +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + auto& device_ctx = g_vpr_ctx.device(); + ClusterAtomsLookup atoms_lookup; + + //calculate the cutpoint values according to the grid size + //load two arrays - one for horizontal cutpoints and one for vertical + + std::vector horizontal_cuts; + + std::vector vertical_cuts; + + int horizontal_interval = device_ctx.grid.width() / horizontal_cutpoints; + VTR_LOG("Device grid width is %d, horizontal interval is %d\n", device_ctx.grid.width(), horizontal_interval); + + unsigned int horizontal_point = horizontal_interval; + horizontal_cuts.push_back(0); + int num_horizontal_cuts = 0; + while (num_horizontal_cuts < horizontal_cutpoints - 1) { + horizontal_cuts.push_back(horizontal_point); + horizontal_point = horizontal_point + horizontal_interval; + num_horizontal_cuts++; + } + //Add in the last point after your exit the while loop + horizontal_cuts.push_back(device_ctx.grid.width()); + + int vertical_interval = device_ctx.grid.height() / vertical_cutpoints; + VTR_LOG("Device grid height is %d, vertical interval is %d\n", device_ctx.grid.height(), vertical_interval); + + unsigned int vertical_point = vertical_interval; + vertical_cuts.push_back(0); + int num_vertical_cuts = 0; + while (num_vertical_cuts < vertical_cutpoints - 1) { + vertical_cuts.push_back(vertical_point); + vertical_point = vertical_point + vertical_interval; + num_vertical_cuts++; + } + //Add in the last point after your exit the while loop + vertical_cuts.push_back(device_ctx.grid.height()); + + //Create floorplan regions based on the cutpoints + std::unordered_map> region_atoms; + + for (unsigned int i = 0; i < horizontal_cuts.size() - 1; i++) { + int xmin = horizontal_cuts[i]; + int xmax = horizontal_cuts[i + 1] - 1; + + for (unsigned int j = 0; j < vertical_cuts.size() - 1; j++) { + int ymin = vertical_cuts[j]; + int ymax = vertical_cuts[j + 1] - 1; + + Region reg; + reg.set_region_rect(xmin, ymin, xmax, ymax); + std::vector atoms; + + region_atoms.insert({reg, atoms}); + } + } + + /* + * For each cluster block, see which region it belongs to, and add its atoms to the + * appropriate region accordingly + */ + for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + std::vector atoms = atoms_lookup.atoms_in_cluster(blk_id); + int num_atoms = atoms.size(); + int x = place_ctx.block_locs[blk_id].loc.x; + int y = place_ctx.block_locs[blk_id].loc.y; + int width = device_ctx.grid.width(); + int height = device_ctx.grid.height(); + VTR_ASSERT(x >= 0 && x < width); + VTR_ASSERT(y >= 0 && y < height); + int xminimum = 0, yminimum = 0, xmaximum = 0, ymaximum = 0; + + for (unsigned int h = 1; h < horizontal_cuts.size(); h++) { + if (x < horizontal_cuts[h]) { + xmaximum = horizontal_cuts[h] - 1; + xminimum = horizontal_cuts[h - 1]; + break; + } + } + + for (unsigned int v = 1; v < vertical_cuts.size(); v++) { + if (y < vertical_cuts[v]) { + ymaximum = vertical_cuts[v] - 1; + yminimum = vertical_cuts[v - 1]; + break; + } + } + + Region current_reg; + current_reg.set_region_rect(xminimum, yminimum, xmaximum, ymaximum); + + auto got = region_atoms.find(current_reg); + + VTR_ASSERT(got != region_atoms.end()); + + for (int at = 0; at < num_atoms; at++) { + got->second.push_back(atoms[at]); + } + } + + int num_partitions = 0; + for (auto region : region_atoms) { + Partition part; + PartitionId partid(num_partitions); + std::string part_name = "Part" + std::to_string(num_partitions); + vtr::Rect rect = region.first.get_region_rect(); + create_partition(part, part_name, rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax()); + constraints.add_partition(part); + + for (unsigned int k = 0; k < region.second.size(); k++) { + constraints.add_constrained_atom(region.second[k], partid); + } + + num_partitions++; + } +} + +void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax) { + part.set_name(part_name); + PartitionRegion part_pr; + Region part_region; + part_region.set_region_rect(xmin, ymin, xmax, ymax); + std::vector part_regions; + part_regions.push_back(part_region); + part_pr.set_partition_region(part_regions); + part.set_part_region(part_pr); +} diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h index 8b56ee041ef..756f8c17c29 100644 --- a/vpr/src/base/vpr_constraints_writer.h +++ b/vpr/src/base/vpr_constraints_writer.h @@ -10,7 +10,16 @@ * Routines related to writing out the file are in vpr/src/base/vpr_constraints_serializer.h. For more information on how * the writing interface works, refer to vpr/src/route/SCHEMA_GENERATOR.md * + * The option --write_vpr_constraints can be used to generate the constraints files. * + * The routines in this file are currently used to generate floorplan constraints for testing purposes. + * The constraints files they generate are used to determine whether VPR is correctly adhering to + * floorplan constraints during its packing and placement stages. + * + * The placer options --floorplan_num_horizontal_partitions (int) and --floorplan_num_vertical_partitions (int) can be used + * to specify how many partitions should be created in the test constraints file. + * For example, if both options are 2, the constraints file will split the grid into quadrants, dividing the blocks between + * four partitions - two partitions in the horizontal dimension, and two partitions in the vertical dimension. */ #ifndef VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ @@ -26,8 +35,16 @@ * @param subtile Specifies whether to write out the constraint regions with or without * subtile values. */ -void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile); +void write_vpr_floorplan_constraints(const char* file_name, int expand, bool subtile, int horizontal_partitions, int vertical_partitions); + +//Generate constraints which lock all blocks to one location. +void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int expand, bool subtile); + +/* Generate constraints which divide the grid into partition according to the horizontal and vertical partition values passed in + * and lock down blocks to their appropriate partition. + */ +void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints); -void setup_vpr_floorplan_constraints(VprConstraints& constraints, int expand, bool subtile); +void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax); #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 5e80dbc0542..a8173679de7 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -398,6 +398,8 @@ struct FloorplanningContext : public Context { * The constraints on each cluster are computed during the clustering process and can change. */ vtr::vector cluster_constraints; + + std::vector overfull_regions; }; /** diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 8668ed32b16..aa68331f353 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1076,6 +1076,8 @@ struct t_placer_opts { float place_crit_limit; int place_constraint_expand; bool place_constraint_subtile; + int floorplan_num_horizontal_partitions; + int floorplan_num_vertical_partitions; /** * @brief Tile types that should be used during delay sampling. diff --git a/vpr/src/pack/attraction_groups.cpp b/vpr/src/pack/attraction_groups.cpp index d3f02f42805..3509900f23a 100644 --- a/vpr/src/pack/attraction_groups.cpp +++ b/vpr/src/pack/attraction_groups.cpp @@ -18,7 +18,6 @@ AttractionInfo::AttractionInfo(bool attraction_groups_on) { if (attraction_groups_on) { for (int ipart = 0; ipart < num_parts; ipart++) { PartitionId partid(ipart); - Partition part = floorplanning_ctx.constraints.get_partition(partid); AttractionGroup group_info; group_info.group_atoms = floorplanning_ctx.constraints.get_part_atoms(partid); @@ -38,11 +37,74 @@ AttractionInfo::AttractionInfo(bool attraction_groups_on) { atom_attraction_group[att_group.group_atoms[iatom]] = group_id; } } + + att_group_pulls = 1; } } -const AttractionGroup& AttractionInfo::get_attraction_group_info(const AttractGroupId group_id) { - return attraction_groups[group_id]; +void AttractionInfo::create_att_groups_for_overfull_regions() { + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + auto& atom_ctx = g_vpr_ctx.atom(); + int num_parts = floorplanning_ctx.constraints.get_num_partitions(); + + //clear the data structures before continuing + atom_attraction_group.clear(); + attraction_groups.clear(); + + //Initialize every atom to have no attraction group id + int num_atoms = atom_ctx.nlist.blocks().size(); + + atom_attraction_group.resize(num_atoms); + fill(atom_attraction_group.begin(), atom_attraction_group.end(), AttractGroupId::INVALID()); + + auto& overfull_regions = floorplanning_ctx.overfull_regions; + PartitionRegion overfull_regions_pr; + for (unsigned int i = 0; i < overfull_regions.size(); i++) { + overfull_regions_pr.add_to_part_region(overfull_regions[i]); + } + /* + * Create a PartitionRegion that contains all the overfull regions so that you can + * make an attraction group for any partition that intersects with any of these regions + */ + + /* + * Create an attraction group for each parition with an overfull region. + */ + + for (int ipart = 0; ipart < num_parts; ipart++) { + PartitionId partid(ipart); + + Partition part = floorplanning_ctx.constraints.get_partition(partid); + auto& pr_regions = part.get_part_region(); + + PartitionRegion intersect_pr; + + intersect_pr = intersection(overfull_regions_pr, pr_regions); + + if (!intersect_pr.empty()) { + AttractionGroup group_info; + group_info.group_atoms = floorplanning_ctx.constraints.get_part_atoms(partid); + + attraction_groups.push_back(group_info); + } + } + + //Then, fill in the group id for the atoms that do have an attraction group + int num_att_grps = attraction_groups.size(); + + for (int igroup = 0; igroup < num_att_grps; igroup++) { + AttractGroupId group_id(igroup); + + AttractionGroup att_group = attraction_groups[group_id]; + + for (unsigned int iatom = 0; iatom < att_group.group_atoms.size(); iatom++) { + atom_attraction_group[att_group.group_atoms[iatom]] = group_id; + } + } + + att_group_pulls = 1; + + VTR_LOG("%d clustering attraction groups created. \n", num_att_grps); } void AttractionInfo::set_attraction_group_info(AttractGroupId group_id, const AttractionGroup& group_info) { diff --git a/vpr/src/pack/attraction_groups.h b/vpr/src/pack/attraction_groups.h index 80ff6b5b7c2..af85af26144 100644 --- a/vpr/src/pack/attraction_groups.h +++ b/vpr/src/pack/attraction_groups.h @@ -41,15 +41,7 @@ struct AttractionGroup { * Atoms belonging to this attraction group will receive this gain if they * are potential candidates to be put in a cluster with the same attraction group. */ - float gain = 5; - - /* - * If the group is made up from a partition of atoms that are confined to a size one spot - * (i.e. one x, y grid location), the clusterer will immediately put all atoms in the group - * into the same cluster - */ - /* TODO: Add the code in the clusterer that will do the above steps. */ - //bool must_be_packed_in_one_cluster = false; + float gain = 0.08; }; class AttractionInfo { @@ -58,10 +50,12 @@ class AttractionInfo { //If no constraints were specified, then no attraction groups will be created. AttractionInfo(bool attraction_groups_on); + void create_att_groups_for_overfull_regions(); + //Setters and getters for the class AttractGroupId get_atom_attraction_group(const AtomBlockId atom_id); - const AttractionGroup& get_attraction_group_info(const AttractGroupId group_id); + AttractionGroup& get_attraction_group_info(const AttractGroupId group_id); void set_atom_attraction_group(const AtomBlockId atom_id, const AttractGroupId group_id); @@ -75,6 +69,10 @@ class AttractionInfo { int num_attraction_groups(); + int get_att_group_pulls(); + + void set_att_group_pulls(int num_pulls); + private: //Store each atom's attraction group assuming each atom is in at most one attraction group //Atoms with no attraction group will have AttractGroupId::INVALID @@ -82,8 +80,24 @@ class AttractionInfo { //Store atoms and gain value that belong to each attraction group vtr::vector attraction_groups; + + /* When packing a cluster with molecules, we have various ways of seeking candidates molecule + * candidates for the cluster. The att_group_pulls value is a way of keeping count of how many + * times a cluster has searched for candidate molecules from its attraction group. We can increase + * this value if we want to pack the cluster more densely (i.e. fill it with more molecules from + * its attraction group). + */ + int att_group_pulls = 1; }; +inline int AttractionInfo::get_att_group_pulls() { + return att_group_pulls; +} + +inline void AttractionInfo::set_att_group_pulls(int num_pulls) { + att_group_pulls = num_pulls; +} + inline AttractGroupId AttractionInfo::get_atom_attraction_group(const AtomBlockId atom_id) { return atom_attraction_group[atom_id]; } @@ -105,4 +119,8 @@ inline void AttractionInfo::set_attraction_group_gain(const AttractGroupId group attraction_groups[group_id].gain = new_gain; } +inline AttractionGroup& AttractionInfo::get_attraction_group_info(const AttractGroupId group_id) { + return attraction_groups[group_id]; +} + #endif /* VPR_SRC_PACK_ATTRACTION_GROUPS_H_ */ diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index 5451c4da997..e27bd617a54 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -71,9 +71,21 @@ #include "tatum/report/graphviz_dot_writer.hpp" #include "tatum/TimingReporter.hpp" +#include "constraints_report.h" + #define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many sinks, must be less than packer_opts.feasible_block_array_size */ #define AAPACK_MAX_TRANSITIVE_EXPLORE 40 /* When investigating transitive fanout connections in packing, consider a maximum of this many molecules, must be less than packer_opts.feasible_block_array_size */ +/* + * When attraction groups are created, the purpose is to pack more densely by adding more molecules + * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are + * not on), the cluster keeps being packed until the get_molecule routines return either a repeated + * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the + * cluster until a nullptr is returned. So, the number of repeated molecules is changed from 1 to 500, + * effectively making the clusterer pack a cluster until a nullptr is returned. + */ +#define ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES 500 + //Constant allowing all cluster pins to be used const t_ext_pin_util FULL_EXTERNAL_PIN_UTIL(1., 1.); @@ -108,7 +120,11 @@ static bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb); static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, - int max_queue_size); + int max_queue_size, + AttractionInfo& attraction_groups); + +static void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, + t_pb* pb); static void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats, t_cluster_placement_stats** cluster_placement_stats, @@ -150,7 +166,12 @@ static void print_pack_status(int num_clb, int num_molecules_processed, int& mols_since_last_print, int device_width, - int device_height); + int device_height, + AttractionInfo& attraction_groups); + +static void rebuild_attraction_groups(AttractionInfo& attraction_groups); + +static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb); static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, @@ -171,7 +192,9 @@ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* clu static void try_fill_cluster(const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, const std::multimap& atom_molecules, + t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, + int& num_same_molecules, t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, @@ -188,6 +211,7 @@ static void try_fill_cluster(const t_packer_opts& packer_opts, t_lb_router_data* router_data, t_ext_pin_util target_external_pin_util, PartitionRegion& temp_cluster_pr, + std::map>& primitive_candidate_block_types, e_block_pack_status& block_pack_status); static t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, @@ -291,23 +315,28 @@ static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, const ClusterBlockId cluster_index, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, - const int feasible_block_array_size); + const int feasible_block_array_size, + std::map>& primitive_candidate_block_types); static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, - const int feasible_block_array_size); + const int feasible_block_array_size, + AttractionInfo& attraction_groups); static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, - const int feasible_block_array_size); + const int feasible_block_array_size, + AttractionInfo& attraction_groups); static void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, AttractionInfo& attraction_groups, - const int feasible_block_array_size); + const int feasible_block_array_size, + ClusterBlockId clb_index, + std::map>& primitive_candidate_block_types); static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, @@ -315,7 +344,8 @@ static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur vtr::vector>& clb_inter_blk_nets, const ClusterBlockId cluster_index, int transitive_fanout_threshold, - const int feasible_block_array_size); + const int feasible_block_array_size, + AttractionInfo& attraction_groups); static bool check_free_primitives_for_molecule_atoms(t_pack_molecule* molecule, t_cluster_placement_stats* cluster_placement_stats_ptr); @@ -330,7 +360,8 @@ static t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, vtr::vector>& clb_inter_blk_nets, ClusterBlockId cluster_index, - int verbosity); + int verbosity, + std::map>& primitive_candidate_block_types); static void mark_all_molecules_valid(t_pack_molecule* molecule_head); @@ -347,7 +378,7 @@ static std::vector initialize_seed_atoms(const e_cluster_seed seed_ static t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std::multimap& atom_molecules, const std::vector seed_atoms); -static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain); +static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures); static int compare_molecule_gain(const void* a, const void* b); int net_sinks_reachable_in_cluster(const t_pb_graph_pin* driver_pb_gpin, const int depth, const AtomNetId net_id); @@ -400,7 +431,8 @@ std::map do_clustering(const t_packer_opts& pa std::vector* lb_type_rr_graphs, const t_ext_pin_util_targets& ext_pin_util_targets, const t_pack_high_fanout_thresholds& high_fanout_thresholds, - AttractionInfo& attraction_groups) { + AttractionInfo& attraction_groups, + bool& floorplan_regions_overfull) { /* Does the actual work of clustering multiple netlist blocks * * into clusters. */ @@ -411,6 +443,12 @@ std::map do_clustering(const t_packer_opts& pa * */ + /* This routine returns a map that details the number of used block type instances. + * The bool floorplan_regions_overfull also acts as a return value - it is set to + * true when one or more floorplan regions have more blocks assigned to them than + * they can fit. + */ + /**************************************************************** * Initialization *****************************************************************/ @@ -560,7 +598,8 @@ std::map do_clustering(const t_packer_opts& pa cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, device_ctx.grid.width(), - device_ctx.grid.height()); + device_ctx.grid.height(), + attraction_groups); VTR_LOGV(verbosity > 2, "Complex block %d: '%s' (%s) ", num_clb, @@ -601,15 +640,35 @@ std::map do_clustering(const t_packer_opts& pa cur_cluster_placement_stats_ptr, clb_inter_blk_nets, clb_index, - verbosity); + packer_opts.pack_verbosity, + primitive_candidate_block_types); prev_molecule = istart; - while (next_molecule != nullptr && prev_molecule != next_molecule) { + + /* + * When attraction groups are created, the purpose is to pack more densely by adding more molecules + * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are + * not on), the cluster keeps being packed until the get_molecule routines return either a repeated + * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the + * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a + * large value. + */ + int max_num_repeated_molecules = 0; + if (attraction_groups.num_attraction_groups() > 0) { + max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES; + } else { + max_num_repeated_molecules = 1; + } + int num_repeated_molecules = 0; + + while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { prev_molecule = next_molecule; try_fill_cluster(packer_opts, cur_cluster_placement_stats_ptr, atom_molecules, + prev_molecule, next_molecule, + num_repeated_molecules, primitives_list, cluster_stats, num_clb, @@ -626,6 +685,7 @@ std::map do_clustering(const t_packer_opts& pa router_data, target_ext_pin_util, temp_cluster_pr, + primitive_candidate_block_types, block_pack_status); } @@ -648,7 +708,7 @@ std::map do_clustering(const t_packer_opts& pa } //check clustering and output it - check_and_output_clustering(packer_opts, is_clock, arch, num_clb, intra_lb_routing); + check_and_output_clustering(packer_opts, is_clock, arch, num_clb, intra_lb_routing, floorplan_regions_overfull); // Free Data Structures free_clustering_data(packer_opts, intra_lb_routing, hill_climbing_inputs_avail, cluster_placement_stats, @@ -671,7 +731,9 @@ static void print_pack_status(int num_clb, int num_molecules_processed, int& mols_since_last_print, int device_width, - int device_height) { + int device_height, + AttractionInfo& attraction_groups) { + //Print a packing update each time another 4% of molecules have been packed. const float print_frequency = 0.04; double percentage = (num_molecules_processed / (double)tot_num_molecules) * 100; @@ -695,6 +757,33 @@ static void print_pack_status(int num_clb, VTR_LOG("\n"); fflush(stdout); mols_since_last_print = 0; + if (attraction_groups.num_attraction_groups() > 0) { + rebuild_attraction_groups(attraction_groups); + } + } +} + +/* + * Periodically rebuild the attraction groups to reflect which atoms in them + * are still available for new clusters (i.e. remove the atoms that have already + * been packed from the attraction group). + */ +static void rebuild_attraction_groups(AttractionInfo& attraction_groups) { + auto& atom_ctx = g_vpr_ctx.atom(); + + for (int igroup = 0; igroup < attraction_groups.num_attraction_groups(); igroup++) { + AttractGroupId group_id(igroup); + AttractionGroup& group = attraction_groups.get_attraction_group_info(group_id); + AttractionGroup new_att_group_info; + + for (AtomBlockId atom : group.group_atoms) { + //If the ClusterBlockId is anything other than invalid, the atom has been packed already + if (atom_ctx.lookup.atom_clb(atom) == ClusterBlockId::INVALID()) { + new_att_group_info.group_atoms.push_back(atom); + } + } + + attraction_groups.set_attraction_group_info(group_id, new_att_group_info); } } @@ -712,25 +801,75 @@ static bool is_atom_blk_in_pb(const AtomBlockId blk_id, const t_pb* pb) { return false; } +/* Remove blk from list of feasible blocks sorted according to gain + * Useful for removing blocks that are repeatedly failing. If a block + * has been found to be illegal, we don't repeatedly consider it.*/ +static void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule, + t_pb* pb) { + int molecule_index; + bool found_molecule = false; + + //find the molecule index + for (int i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { + if (pb->pb_stats->feasible_blocks[i] == molecule) { + found_molecule = true; + molecule_index = i; + } + } + + //if it is not in the array, return + if (found_molecule == false) { + return; + } + + //Otherwise, shift the molecules while removing the specified molecule + for (int j = molecule_index; j < pb->pb_stats->num_feasible_blocks - 1; j++) { + pb->pb_stats->feasible_blocks[j] = pb->pb_stats->feasible_blocks[j + 1]; + } + pb->pb_stats->num_feasible_blocks--; +} + /* Add blk to list of feasible blocks sorted according to gain */ static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, std::map& gain, t_pb* pb, - int max_queue_size) { + int max_queue_size, + AttractionInfo& attraction_groups) { int i, j; + int num_molecule_failures = 0; + + AttractGroupId cluster_att_grp = pb->pb_stats->attraction_grp_id; + + /* When the clusterer packs with attraction groups the goal is to + * pack more densely. Removing failed molecules to make room for the exploration of + * more molecules helps to achieve this purpose. + */ + if (attraction_groups.num_attraction_groups() > 0) { + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + num_molecule_failures = 0; + } else { + num_molecule_failures = got->second; + } + + if (num_molecule_failures > 0) { + remove_molecule_from_pb_stats_candidates(molecule, pb); + return; + } + } for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { if (pb->pb_stats->feasible_blocks[i] == molecule) { - return; /* already in queue, do nothing */ + return; // already in queue, do nothing } } if (pb->pb_stats->num_feasible_blocks >= max_queue_size - 1) { /* maximum size for array, remove smallest gain element and sort */ - if (get_molecule_gain(molecule, gain) > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain)) { + if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { /* single loop insertion sort */ for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) { - if (get_molecule_gain(molecule, gain) <= get_molecule_gain(pb->pb_stats->feasible_blocks[j + 1], gain)) { + if (get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures) <= get_molecule_gain(pb->pb_stats->feasible_blocks[j + 1], gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { pb->pb_stats->feasible_blocks[j] = molecule; break; } else { @@ -744,7 +883,7 @@ static void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule, } else { /* Expand array and single loop insertion sort */ for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) { - if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain) > get_molecule_gain(molecule, gain)) { + if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain, cluster_att_grp, attraction_groups, num_molecule_failures) > get_molecule_gain(molecule, gain, cluster_att_grp, attraction_groups, num_molecule_failures)) { pb->pb_stats->feasible_blocks[j + 1] = pb->pb_stats->feasible_blocks[j]; } else { pb->pb_stats->feasible_blocks[j + 1] = molecule; @@ -1074,12 +1213,16 @@ static void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_siz pb->pb_stats->tie_break_high_fanout_net = AtomNetId::INVALID(); + pb->pb_stats->pulled_from_atom_groups = 0; + pb->pb_stats->num_att_group_atoms_used = 0; + pb->pb_stats->gain.clear(); pb->pb_stats->timinggain.clear(); pb->pb_stats->connectiongain.clear(); pb->pb_stats->sharinggain.clear(); pb->pb_stats->hillgain.clear(); pb->pb_stats->transitive_fanout_candidates.clear(); + pb->pb_stats->atom_failures.clear(); pb->pb_stats->num_pins_of_net_in_pb.clear(); @@ -1160,7 +1303,14 @@ static bool cleanup_pb(t_pb* pb) { } /** - * Try pack molecule into current cluster + * Performs legality checks to see whether the selected molecule can be + * packed into the current cluster. The legality checks are related to + * floorplanning, pin feasibility, and routing (if detailed route + * checking is enabled). The routine returns BLK_PASSED if the molecule + * can be packed in the cluster. If the block passes, the routine commits + * it to the current cluster and updates the appropriate data structures. + * Otherwise, it returns the appropriate failed pack status based on which + * legality check the molecule failed. */ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, @@ -1212,6 +1362,8 @@ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* clu // macros that limit placement flexibility. if (cluster_placement_stats_ptr->has_long_chain && molecule->is_chain() && molecule->chain_info->is_long_chain) { VTR_LOGV(verbosity > 4, "\t\t\tFAILED Placement Feasibility Filter: Only one long chain per cluster is allowed\n"); + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); return BLK_FAILED_FEASIBLE; } @@ -1227,6 +1379,8 @@ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* clu temp_cluster_pr, cluster_pr_needs_update); if (block_pack_status == BLK_FAILED_FLOORPLANNING) { + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); return block_pack_status; } if (cluster_pr_needs_update == true) { @@ -1373,6 +1527,9 @@ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* clu } } + //Record the failure of this molecule in the current pb stats + record_molecule_failure(molecule, pb); + /* Packing failed, but a part of the pb tree is still allocated and pbs have their modes set. * Before trying to pack next molecule the unused pbs need to be freed and, the most important, * their modes reset. This task is performed by the cleanup_pb() function below. */ @@ -1390,6 +1547,22 @@ static enum e_block_pack_status try_pack_molecule(t_cluster_placement_stats* clu return block_pack_status; } +/* Record the failure of the molecule in this cluster in the current pb stats. + * If a molecule fails repeatedly, it's gain will be penalized if packing with + * attraction groups on. */ +static void record_molecule_failure(t_pack_molecule* molecule, t_pb* pb) { + //Only have to record the failure for the first atom in the molecule. + //The convention when checking if a molecule has failed to pack in the cluster + //is to check whether the first atoms has been recorded as having failed + + auto got = pb->pb_stats->atom_failures.find(molecule->atom_block_ids[0]); + if (got == pb->pb_stats->atom_failures.end()) { + pb->pb_stats->atom_failures.insert({molecule->atom_block_ids[0], 1}); + } else { + got->second++; + } +} + /** * Try place atom block into current primitive location */ @@ -1706,7 +1879,9 @@ static void update_connection_gain_values(const AtomNetId net_id, const AtomBloc static void try_fill_cluster(const t_packer_opts& packer_opts, t_cluster_placement_stats* cur_cluster_placement_stats_ptr, const std::multimap& atom_molecules, + t_pack_molecule*& prev_molecule, t_pack_molecule*& next_molecule, + int& num_same_molecules, t_pb_graph_node** primitives_list, t_cluster_progress_stats& cluster_stats, int num_clb, @@ -1723,6 +1898,7 @@ static void try_fill_cluster(const t_packer_opts& packer_opts, t_lb_router_data* router_data, t_ext_pin_util target_ext_pin_util, PartitionRegion& temp_cluster_pr, + std::map>& primitive_candidate_block_types, e_block_pack_status& block_pack_status) { auto& atom_ctx = g_vpr_ctx.atom(); auto& device_ctx = g_vpr_ctx.mutable_device(); @@ -1780,7 +1956,11 @@ static void try_fill_cluster(const t_packer_opts& packer_opts, &cluster_stats.num_unrelated_clustering_attempts, cur_cluster_placement_stats_ptr, clb_inter_blk_nets, - clb_index, packer_opts.pack_verbosity); + clb_index, packer_opts.pack_verbosity, + primitive_candidate_block_types); + if (prev_molecule == next_molecule) { + num_same_molecules++; + } return; } @@ -1801,7 +1981,8 @@ static void try_fill_cluster(const t_packer_opts& packer_opts, cluster_stats.num_molecules_processed, cluster_stats.mols_since_last_print, device_ctx.grid.width(), - device_ctx.grid.height()); + device_ctx.grid.height(), + attraction_groups); update_cluster_stats(next_molecule, clb_index, is_clock, //Set of all clocks @@ -1827,7 +2008,12 @@ static void try_fill_cluster(const t_packer_opts& packer_opts, cur_cluster_placement_stats_ptr, clb_inter_blk_nets, clb_index, - packer_opts.pack_verbosity); + packer_opts.pack_verbosity, + primitive_candidate_block_types); + + if (prev_molecule == next_molecule) { + num_same_molecules++; + } } static t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& packer_opts, @@ -1893,7 +2079,10 @@ static void free_data_and_requeue_used_mols_if_illegal(const ClusterBlockId& clb int& num_clb, int& seedindex) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + PartitionRegion empty_pr; + floorplanning_ctx.cluster_constraints[clb_index] = empty_pr; num_used_type_instances[cluster_ctx.clb_nlist.block_type(clb_index)]--; revalid_molecules(cluster_ctx.clb_nlist.block_pb(clb_index), atom_molecules); cluster_ctx.clb_nlist.remove_block(clb_index); @@ -2061,13 +2250,6 @@ static void update_total_gain(float alpha, float beta, bool timing_driven, bool cur_pb->pb_stats->sharinggain[blk_id] = 0; } - AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); - if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { - //increase gain of atom based on attraction group gain - float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); - cur_pb->pb_stats->gain[blk_id] += att_grp_gain; - } - /* Todo: This was used to explore different normalization options, can * be made more efficient once we decide on which one to use*/ int num_used_input_pins = atom_ctx.nlist.block_input_pins(blk_id).size(); @@ -2094,6 +2276,13 @@ static void update_total_gain(float alpha, float beta, bool timing_driven, bool * cur_pb->pb_stats->timinggain[blk_id] + (1.0 - alpha) * (float)cur_pb->pb_stats->gain[blk_id]; } + + AttractGroupId atom_grp_id = attraction_groups.get_atom_attraction_group(blk_id); + if (atom_grp_id != AttractGroupId::INVALID() && atom_grp_id == cluster_att_grp_id) { + //increase gain of atom based on attraction group gain + float att_grp_gain = attraction_groups.get_attraction_group_gain(atom_grp_id); + cur_pb->pb_stats->gain[blk_id] += att_grp_gain; + } } } @@ -2352,9 +2541,9 @@ static void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats if (molecule->type == MOLECULE_FORCED_PACK) { VPR_FATAL_ERROR(VPR_ERROR_PACK, "Can not find any logic block that can implement molecule.\n" - "\tPattern %s %s\n", + "\tPattern %s %s (%d). Root model is %s\n", molecule->pack_pattern->name, - root_atom_name.c_str()); + root_atom_name.c_str(), root_atom, root_model->name); } else { VPR_FATAL_ERROR(VPR_ERROR_PACK, "Can not find any logic block that can implement molecule.\n" @@ -2398,7 +2587,8 @@ static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, const ClusterBlockId cluster_index, bool prioritize_transitive_connectivity, int transitive_fanout_threshold, - const int feasible_block_array_size) { + const int feasible_block_array_size, + std::map>& primitive_candidate_block_types) { /* * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster. * If there are no feasible blocks it returns a nullptr. @@ -2411,38 +2601,45 @@ static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, // 1. Find unpacked molecules based on criticality and strong connectedness (connected by low fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { - add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size); + add_cluster_molecule_candidates_by_connectivity_and_timing(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); } if (prioritize_transitive_connectivity) { // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size); + cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); } // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); } } else { //Reverse order // 3. Find unpacked molecules based on weak connectedness (connected by high fanout nets) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net) { - add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size); + add_cluster_molecule_candidates_by_highfanout_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, feasible_block_array_size, attraction_groups); } // 2. Find unpacked molecules based on transitive connections (eg. 2 hops away) with current cluster if (cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->explore_transitive_fanout) { add_cluster_molecule_candidates_by_transitive_connectivity(cur_pb, cluster_placement_stats_ptr, atom_molecules, clb_inter_blk_nets, - cluster_index, transitive_fanout_threshold, feasible_block_array_size); + cluster_index, transitive_fanout_threshold, feasible_block_array_size, attraction_groups); } } - // 4. Find unpacked molecules based on attraction group of the current cluster (if the cluster has an attraction group) - add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, atom_molecules, attraction_groups, feasible_block_array_size); - /* Grab highest gain molecule */ t_pack_molecule* molecule = nullptr; + if (cur_pb->pb_stats->num_feasible_blocks == 0) { + /* + * No suitable molecules were found from the above functions - if + * attraction groups were created, explore the attraction groups to see if + * any suitable molecules can be found. + */ + add_cluster_molecule_candidates_by_attraction_group(cur_pb, cluster_placement_stats_ptr, atom_molecules, attraction_groups, + feasible_block_array_size, cluster_index, primitive_candidate_block_types); + } + if (cur_pb->pb_stats->num_feasible_blocks > 0) { cur_pb->pb_stats->num_feasible_blocks--; int index = cur_pb->pb_stats->num_feasible_blocks; @@ -2458,7 +2655,8 @@ static t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb, static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, - const int feasible_block_array_size) { + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { VTR_ASSERT(cur_pb->pb_stats->num_feasible_blocks == NOT_VALID); cur_pb->pb_stats->num_feasible_blocks = 0; @@ -2475,7 +2673,7 @@ static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); if (success) { add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size); + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); } } } @@ -2487,7 +2685,8 @@ static void add_cluster_molecule_candidates_by_connectivity_and_timing(t_pb* cur static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, - const int feasible_block_array_size) { + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { /* Because the packer ignores high fanout nets when marking what blocks * to consider, use one of the ignored high fanout net to fill up lightly * related blocks */ @@ -2513,7 +2712,7 @@ static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); if (success) { add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE)); + cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_HIGH_FANOUT_EXPLORE), attraction_groups); count++; } } @@ -2527,33 +2726,105 @@ static void add_cluster_molecule_candidates_by_highfanout_connectivity(t_pb* cur * If the current cluster being packed has an attraction group associated with it * (i.e. there are atoms in it that belong to an attraction group), this routine adds molecules * from the associated attraction group to the list of feasible blocks for the cluster. + * Attraction groups can be very large, so we only add some randomly selected molecules for efficiency + * if the number of atoms in the group is greater than 500. Therefore, the molecules added to the candidates + * will vary each time you call this function. */ static void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, const std::multimap& atom_molecules, AttractionInfo& attraction_groups, - const int feasible_block_array_size) { + const int feasible_block_array_size, + ClusterBlockId clb_index, + std::map>& primitive_candidate_block_types) { auto& atom_ctx = g_vpr_ctx.atom(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + + auto cluster_type = cluster_ctx.clb_nlist.block_type(clb_index); + + /* + * For each cluster, we want to explore the attraction group molecules as potential + * candidates for the cluster a limited number of times. This limit is imposed because + * if the cluster belongs to a very large attraction group, we could potentially search + * through its attraction group molecules for a very long time. + * Defining a number of times to search through the attraction groups (i.e. number of + * attraction group pulls) determines how many times we search through the cluster's attraction + * group molecules for candidate molecules. + */ + int num_pulls = attraction_groups.get_att_group_pulls(); + if (cur_pb->pb_stats->pulled_from_atom_groups < num_pulls) { + cur_pb->pb_stats->pulled_from_atom_groups++; + } else { + return; + } AttractGroupId grp_id = cur_pb->pb_stats->attraction_grp_id; - if (grp_id != AttractGroupId::INVALID()) { - AttractionGroup group = attraction_groups.get_attraction_group_info(grp_id); + if (grp_id == AttractGroupId::INVALID()) { + return; + } - for (AtomBlockId blk_id : group.group_atoms) { - if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID()) { - auto rng = atom_molecules.equal_range(blk_id); + AttractionGroup& group = attraction_groups.get_attraction_group_info(grp_id); + int num_available_atoms = group.group_atoms.size(); + if (num_available_atoms == 0) { + return; + } + + if (num_available_atoms < 500) { + for (AtomBlockId atom_id : group.group_atoms) { + const auto& atom_model = atom_ctx.nlist.block_model(atom_id); + auto itr = primitive_candidate_block_types.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (atom_ctx.lookup.atom_clb(atom_id) == ClusterBlockId::INVALID() + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + auto rng = atom_molecules.equal_range(atom_id); for (const auto& kv : vtr::make_range(rng.first, rng.second)) { t_pack_molecule* molecule = kv.second; if (molecule->valid) { bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); if (success) { add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size); + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); } } } } } + return; + } + + int min = 0; + int max = num_available_atoms - 1; + + for (int j = 0; j < 500; j++) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distr(min, max); + int selected_atom = distr(gen); + + AtomBlockId blk_id = group.group_atoms[selected_atom]; + const auto& atom_model = atom_ctx.nlist.block_model(blk_id); + auto itr = primitive_candidate_block_types.find(atom_model); + VTR_ASSERT(itr != primitive_candidate_block_types.end()); + std::vector& candidate_types = itr->second; + + //Only consider molecules that are unpacked and of the correct type + if (atom_ctx.lookup.atom_clb(blk_id) == ClusterBlockId::INVALID() + && std::find(candidate_types.begin(), candidate_types.end(), cluster_type) != candidate_types.end()) { + auto rng = atom_molecules.equal_range(blk_id); + for (const auto& kv : vtr::make_range(rng.first, rng.second)) { + t_pack_molecule* molecule = kv.second; + if (molecule->valid) { + bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); + if (success) { + add_molecule_to_pb_stats_candidates(molecule, + cur_pb->pb_stats->gain, cur_pb, feasible_block_array_size, attraction_groups); + } + } + } + } } } @@ -2564,7 +2835,8 @@ static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur vtr::vector>& clb_inter_blk_nets, const ClusterBlockId cluster_index, int transitive_fanout_threshold, - const int feasible_block_array_size) { + const int feasible_block_array_size, + AttractionInfo& attraction_groups) { //TODO: For now, only done by fan-out; should also consider fan-in cur_pb->pb_stats->explore_transitive_fanout = false; @@ -2582,7 +2854,7 @@ static void add_cluster_molecule_candidates_by_transitive_connectivity(t_pb* cur bool success = check_free_primitives_for_molecule_atoms(molecule, cluster_placement_stats_ptr); if (success) { add_molecule_to_pb_stats_candidates(molecule, - cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE)); + cur_pb->pb_stats->gain, cur_pb, std::min(feasible_block_array_size, AAPACK_MAX_TRANSITIVE_EXPLORE), attraction_groups); } } } @@ -2621,7 +2893,8 @@ static t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, t_cluster_placement_stats* cluster_placement_stats_ptr, vtr::vector>& clb_inter_blk_nets, ClusterBlockId cluster_index, - int verbosity) { + int verbosity, + std::map>& primitive_candidate_block_types) { /* Finds the block with the greatest gain that satisfies the * input, clock and capacity constraints of a cluster that are * passed in. If no suitable block is found it returns ClusterBlockId::INVALID(). @@ -2634,7 +2907,7 @@ static t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb, auto best_molecule = get_highest_gain_molecule(cur_pb, atom_molecules, attraction_groups, NOT_HILL_CLIMBING, cluster_placement_stats_ptr, clb_inter_blk_nets, cluster_index, prioritize_transitive_connectivity, - transitive_fanout_threshold, feasible_block_array_size); + transitive_fanout_threshold, feasible_block_array_size, primitive_candidate_block_types); /* If no blocks have any gain to the current cluster, the code above * * will not find anything. However, another atom block with no inputs in * @@ -2959,13 +3232,15 @@ static t_pack_molecule* get_highest_gain_seed_molecule(int* seedindex, const std * + molecule_base_gain*some_factor * - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor */ -static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain) { +static float get_molecule_gain(t_pack_molecule* molecule, std::map& blk_gain, AttractGroupId cluster_attraction_group_id, AttractionInfo& attraction_groups, int num_molecule_failures) { float gain; int i; int num_introduced_inputs_of_indirectly_related_block; auto& atom_ctx = g_vpr_ctx.atom(); gain = 0; + float attraction_group_penalty = 0.1; + num_introduced_inputs_of_indirectly_related_block = 0; for (i = 0; i < get_array_size_of_molecule(molecule); i++) { auto blk_id = molecule->atom_block_ids[i]; @@ -2994,12 +3269,23 @@ static float get_molecule_gain(t_pack_molecule* molecule, std::mapbase_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); + if (num_molecule_failures > 0 && attraction_groups.num_attraction_groups() > 0) { + gain -= 0.1 * num_molecule_failures; + } + return gain; } diff --git a/vpr/src/pack/cluster.h b/vpr/src/pack/cluster.h index db30f75750d..f63c0a0eab5 100644 --- a/vpr/src/pack/cluster.h +++ b/vpr/src/pack/cluster.h @@ -22,7 +22,8 @@ std::map do_clustering(const t_packer_opts& pa std::vector* lb_type_rr_graphs, const t_ext_pin_util_targets& ext_pin_util_targets, const t_pack_high_fanout_thresholds& high_fanout_thresholds, - AttractionInfo& attraction_groups); + AttractionInfo& attraction_groups, + bool& floorplan_regions_overfull); int get_cluster_of_block(int blkidx); diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index a36a4d39bfe..3cc1bec440f 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -255,7 +255,8 @@ void check_and_output_clustering(const t_packer_opts& packer_opts, const std::unordered_set& is_clock, const t_arch* arch, const int& num_clb, - const vtr::vector*>& intra_lb_routing) { + const vtr::vector*>& intra_lb_routing, + bool& floorplan_regions_overfull) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); VTR_ASSERT(num_clb == (int)cluster_ctx.clb_nlist.blocks().size()); @@ -267,6 +268,9 @@ void check_and_output_clustering(const t_packer_opts& packer_opts, output_clustering(intra_lb_routing, packer_opts.global_clocks, is_clock, arch->architecture_id, packer_opts.output_file.c_str(), false); + //check_floorplan_regions(floorplan_regions_overfull); + floorplan_regions_overfull = floorplan_constraints_regions_overfull(); + VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); } @@ -313,4 +317,4 @@ bool check_cluster_legality(const int& verbosity, is_cluster_legal = true; } return is_cluster_legal; -} \ No newline at end of file +} diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 1359d7be9c9..bff9510c0c2 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -3,6 +3,7 @@ #include "pack_types.h" #include "echo_files.h" #include "vpr_utils.h" +#include "constraints_report.h" #include "timing_info.h" #include "PreClusterDelayCalculator.h" @@ -83,12 +84,13 @@ void free_clustering_data(const t_packer_opts& packer_opts, t_molecule_link* memory_pool, t_pb_graph_node** primitives_list); -//check ckustering legality and output it +//check clustering legality and output it void check_and_output_clustering(const t_packer_opts& packer_opts, const std::unordered_set& is_clock, const t_arch* arch, const int& num_clb, - const vtr::vector*>& intra_lb_routing); + const vtr::vector*>& intra_lb_routing, + bool& floorplan_regions_overfull); void get_max_cluster_size_and_pb_depth(int& max_cluster_size, int& max_pb_depth); diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp new file mode 100644 index 00000000000..77e612cc8b3 --- /dev/null +++ b/vpr/src/pack/constraints_report.cpp @@ -0,0 +1,60 @@ +#include "constraints_report.h" + +//To-do: alter this routine to check whether whole PartitionRegions are full instead of individual Regions +bool floorplan_constraints_regions_overfull() { + GridTileLookup grid_tiles; + + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); + auto& device_ctx = g_vpr_ctx.device(); + + auto& block_types = device_ctx.logical_block_types; + + std::unordered_map> regions_count_info; + + for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + if (!is_cluster_constrained(blk_id)) { + continue; + } + t_logical_block_type_ptr bt = cluster_ctx.clb_nlist.block_type(blk_id); + + PartitionRegion pr = floorplanning_ctx.cluster_constraints[blk_id]; + std::vector regions = pr.get_partition_region(); + + for (unsigned int i_reg = 0; i_reg < regions.size(); i_reg++) { + Region current_reg = regions[i_reg]; + + auto got = regions_count_info.find(current_reg); + + if (got == regions_count_info.end()) { + std::vector block_type_counts(block_types.size(), 0); + + block_type_counts[bt->index]++; + + regions_count_info.insert({current_reg, block_type_counts}); + + } else { + got->second[bt->index]++; + } + } + } + + bool floorplan_regions_overfull = false; + + for (auto& region_info : regions_count_info) { + vtr::Rect rect = region_info.first.get_region_rect(); + for (unsigned int j = 0; j < block_types.size(); j++) { + int num_assigned_blocks = region_info.second[j]; + int num_tiles = 0; + num_tiles = grid_tiles.region_tile_count(region_info.first, &block_types[j]); + if (num_assigned_blocks > num_tiles) { + floorplan_regions_overfull = true; + floorplanning_ctx.overfull_regions.push_back(region_info.first); + VTR_LOG("\n \nRegion (%d, %d) to (%d, %d) st %d \n", rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax(), region_info.first.get_sub_tile()); + VTR_LOG("Assigned %d blocks of type %s, but only has %d tiles of that type\n", num_assigned_blocks, block_types[j].name, num_tiles); + } + } + } + + return floorplan_regions_overfull; +} diff --git a/vpr/src/pack/constraints_report.h b/vpr/src/pack/constraints_report.h new file mode 100644 index 00000000000..c41107f8c78 --- /dev/null +++ b/vpr/src/pack/constraints_report.h @@ -0,0 +1,21 @@ +/* Perform a check at the end of each packing iteration to see whether any + * floorplan regions have been packed with too many clusters. + */ + +#ifndef VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ +#define VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ + +#include "globals.h" +#include "grid_tile_lookup.h" +#include "place_constraints.h" + +/* + * Check if any constraints regions are overfull, + * i.e. the region contains more clusters of a certain type + * than it has room for. + * If the region is overfull, a message is printed saying which + * region is overfull, and by how many clusters. + */ +bool floorplan_constraints_regions_overfull(); + +#endif /* VPR_SRC_PACK_CONSTRAINTS_REPORT_H_ */ diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index a362f0dc547..28bcc7ac84f 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -100,7 +100,10 @@ bool try_pack(t_packer_opts* packer_opts, expected_lowest_cost_pb_gnode, list_of_packing_patterns.size())); - AttractionInfo attraction_groups(packer_opts->use_attraction_groups); + /* We keep attraction groups off in the first iteration, and + * only turn on in later iterations if some floorplan regions turn out to be overfull. + */ + AttractionInfo attraction_groups(false); VTR_LOG("%d attraction groups were created during prepacking.\n", attraction_groups.num_attraction_groups()); VTR_LOG("Finish prepacking.\n"); @@ -130,6 +133,7 @@ bool try_pack(t_packer_opts* packer_opts, } int pack_iteration = 1; + bool floorplan_regions_overfull = false; while (true) { //Cluster the netlist @@ -145,14 +149,21 @@ bool try_pack(t_packer_opts* packer_opts, lb_type_rr_graphs, target_external_pin_util, high_fanout_thresholds, - attraction_groups); + attraction_groups, + floorplan_regions_overfull); //Try to size/find a device bool fits_on_device = try_size_device_grid(*arch, num_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout); - if (fits_on_device) { + /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering + * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause + * of the floorplan not fitting, so attraction groups are turned on for later iterations. + */ + bool floorplan_not_fitting = (floorplan_regions_overfull || g_vpr_ctx.mutable_floorplanning().constraints.get_num_partitions() > 0); + + if (fits_on_device && !floorplan_regions_overfull) { break; //Done - } else if (pack_iteration == 1) { + } else if (pack_iteration == 1 && !floorplan_not_fitting) { //1st pack attempt was unsucessful (i.e. not dense enough) and we have control of unrelated clustering // //Turn it on to increase packing density @@ -167,9 +178,28 @@ bool try_pack(t_packer_opts* packer_opts, VTR_LOG("Packing failed to fit on device. Re-packing with: unrelated_logic_clustering=%s balance_block_type_util=%s\n", (allow_unrelated_clustering ? "true" : "false"), (balance_block_type_util ? "true" : "false")); + } else if (pack_iteration == 1 && floorplan_not_fitting) { + VTR_LOG("Floorplan regions are overfull: trying to pack again using cluster attraction groups. \n"); + attraction_groups.create_att_groups_for_overfull_regions(); + attraction_groups.set_att_group_pulls(1); + + } else if (pack_iteration >= 2 && pack_iteration < 5 && floorplan_not_fitting) { + VTR_LOG("Floorplan regions are overfull: trying to pack again with more attraction groups exploration and higher target pin utilization. \n"); + attraction_groups.create_att_groups_for_overfull_regions(); + VTR_LOG("Pack iteration is %d\n", pack_iteration); + attraction_groups.set_att_group_pulls(4); + t_ext_pin_util pin_util(1.0, 1.0); + target_external_pin_util.set_block_pin_util("clb", pin_util); + } else { //Unable to pack densely enough: Give Up + if (floorplan_regions_overfull) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, + "Failed to find pack clusters densely enough to fit in the designated floorplan regions.\n" + "The floorplan regions may need to be expanded to run successfully. \n"); + } + //No suitable device found std::string resource_reqs; std::string resource_avail; @@ -201,6 +231,8 @@ bool try_pack(t_packer_opts* packer_opts, for (auto net : g_vpr_ctx.atom().nlist.nets()) { g_vpr_ctx.mutable_atom().lookup.set_atom_clb_net(net, ClusterNetId::INVALID()); } + g_vpr_ctx.mutable_floorplanning().cluster_constraints.clear(); + //attraction_groups.reset_attraction_groups(); ++pack_iteration; } diff --git a/vpr/src/pack/pack_types.h b/vpr/src/pack/pack_types.h index 987d407936d..ce7e2bba557 100644 --- a/vpr/src/pack/pack_types.h +++ b/vpr/src/pack/pack_types.h @@ -52,6 +52,17 @@ struct t_pb_stats { * used by all other child pbs in this parent pb. */ std::map hillgain; + /* + * stores the number of times atoms have failed to be packed into the cluster + * key: root block id of the molecule, value: number of times the molecule has failed to be packed into the cluster + */ + std::map atom_failures; + + int pulled_from_atom_groups; + int num_att_group_atoms_used; + + std::vector available_att_group_atoms; + std::vector marked_nets; //List of nets with the num_pins_of_net_in_pb and gain entries altered std::vector marked_blocks; //List of blocks with the num_pins_of_net_in_pb and gain entries altered diff --git a/vpr/src/place/grid_tile_lookup.cpp b/vpr/src/place/grid_tile_lookup.cpp index 231e997b272..fec54fa0940 100644 --- a/vpr/src/place/grid_tile_lookup.cpp +++ b/vpr/src/place/grid_tile_lookup.cpp @@ -1,18 +1,5 @@ #include "grid_tile_lookup.h" -void GridTileLookup::initialize_grid_tile_matrices() { - auto& device_ctx = g_vpr_ctx.device(); - - //Will store the max number of tile locations for each logical block type - max_placement_locations.resize(device_ctx.logical_block_types.size()); - - for (const auto& type : device_ctx.logical_block_types) { - vtr::NdMatrix type_count({device_ctx.grid.width(), device_ctx.grid.height()}); - fill_type_matrix(&type, type_count); - block_type_matrices.push_back(type_count); - } -} - void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count) { auto& device_ctx = g_vpr_ctx.device(); @@ -29,9 +16,10 @@ void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr:: for (int i_col = type_count.dim_size(0) - 1; i_col >= 0; i_col--) { for (int j_row = type_count.dim_size(1) - 1; j_row >= 0; j_row--) { auto& tile = device_ctx.grid[i_col][j_row].type; + const t_grid_tile& grid_tile = device_ctx.grid[i_col][j_row]; type_count[i_col][j_row] = 0; - if (is_tile_compatible(tile, block_type)) { + if (is_tile_compatible(tile, block_type) && grid_tile.height_offset == 0 && grid_tile.width_offset == 0) { for (const auto& sub_tile : tile->sub_tiles) { if (is_sub_tile_compatible(tile, block_type, sub_tile.capacity.low)) { type_count[i_col][j_row] = sub_tile.capacity.total(); @@ -71,13 +59,23 @@ int GridTileLookup::total_type_tiles(t_logical_block_type_ptr block_type) { * The region with subtile case is taken care of by a helper routine, region_with_subtile_count(). */ int GridTileLookup::region_tile_count(const Region& reg, t_logical_block_type_ptr block_type) { - vtr::Rect reg_rect = reg.get_region_rect(); + auto& device_ctx = g_vpr_ctx.device(); int subtile = reg.get_sub_tile(); - int xmin = reg_rect.xmin(); - int ymin = reg_rect.ymin(); - int xmax = reg_rect.xmax(); - int ymax = reg_rect.ymax(); + /*Intersect the region with the grid, in case the region passed in goes out of bounds + * By intersecting with the grid, we ensure that we are only counting tiles for the part of the + * region that fits on the grid.*/ + Region grid_reg; + grid_reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); + Region intersect_reg; + intersect_reg = intersection(reg, grid_reg); + + vtr::Rect intersect_rect = intersect_reg.get_region_rect(); + + int xmin = intersect_rect.xmin(); + int ymin = intersect_rect.ymin(); + int xmax = intersect_rect.xmax(); + int ymax = intersect_rect.ymax(); auto& type_grid = block_type_matrices[block_type->index]; int xdim = type_grid.dim_size(0); @@ -132,12 +130,3 @@ int GridTileLookup::region_with_subtile_count(const Region& reg, t_logical_block return num_sub_tiles; } - -void GridTileLookup::print_type_matrix(vtr::NdMatrix& type_count) { - for (int i_col = type_count.dim_size(0) - 1; i_col >= 0; i_col--) { - for (int j_row = type_count.dim_size(1) - 1; j_row >= 0; j_row--) { - VTR_LOG("%d ", type_count[i_col][j_row]); - } - VTR_LOG("\n"); - } -} diff --git a/vpr/src/place/grid_tile_lookup.h b/vpr/src/place/grid_tile_lookup.h index edf4388f3ea..a014e0d5786 100644 --- a/vpr/src/place/grid_tile_lookup.h +++ b/vpr/src/place/grid_tile_lookup.h @@ -15,13 +15,22 @@ class GridTileLookup { public: - vtr::NdMatrix& get_type_grid(t_logical_block_type_ptr block_type); + GridTileLookup() { + auto& device_ctx = g_vpr_ctx.device(); - void initialize_grid_tile_matrices(); + //Will store the max number of tile locations for each logical block type + max_placement_locations.resize(device_ctx.logical_block_types.size()); - void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count); + for (const auto& type : device_ctx.logical_block_types) { + vtr::NdMatrix type_count({device_ctx.grid.width(), device_ctx.grid.height()}); + fill_type_matrix(&type, type_count); + block_type_matrices.push_back(type_count); + } + } - void print_type_matrix(vtr::NdMatrix& type_count); + vtr::NdMatrix& get_type_grid(t_logical_block_type_ptr block_type); + + void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count); int region_tile_count(const Region& reg, t_logical_block_type_ptr block_type); diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 4f4d7c89405..777bf50c027 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -97,6 +97,33 @@ void print_sorted_blocks(const std::vector& sorted_blocks, const static void place_all_blocks(const std::vector& sorted_blocks, enum e_pad_loc_type pad_loc_type); +/** + * @brief If any blocks are unplaced after initial placement, this routine + * prints an error message showing the names, types, and IDs of the unplaced blocks + */ +static void print_unplaced_blocks(); + +static void print_unplaced_blocks() { + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + + int unplaced_blocks = 0; + + for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + if (place_ctx.block_locs[blk_id].loc.x == INVALID_X) { + VTR_LOG("Block %s (# %d) of type %s could not be placed during initial placement\n", cluster_ctx.clb_nlist.block_name(blk_id).c_str(), blk_id, cluster_ctx.clb_nlist.block_type(blk_id)->name); + unplaced_blocks++; + } + } + + if (unplaced_blocks > 0) { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, + "%d blocks could not be placed during initial placement, no spaces were available for them on the grid.\n" + "If VPR was run with floorplan constraints, the constraints may be too tight.\n", + unplaced_blocks); + } +} + static bool is_loc_on_chip(t_pl_loc& loc) { auto& device_ctx = g_vpr_ctx.device(); @@ -408,13 +435,7 @@ static void place_macro(int macros_max_num_tries, t_pl_macro pl_macro, enum e_pa tried_types.push_back(tile_type->name); } std::string tried_types_str = "{" + vtr::join(tried_types, ", ") + "}"; - - // Error out - VPR_FATAL_ERROR(VPR_ERROR_PLACE, - "Initial placement failed.\n" - "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type(s) %s.\n" - "Please manually size the FPGA because VPR can't do this yet.\n", - pl_macro.members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), tried_types_str.c_str()); + break; } } } @@ -434,7 +455,6 @@ static vtr::vector assign_block_scores() { //GridTileLookup class provides info needed for calculating number of tiles covered by a region GridTileLookup grid_tiles; - grid_tiles.initialize_grid_tile_matrices(); /* * For the blocks with no floorplan constraints, and the blocks that are not part of macros, @@ -584,6 +604,9 @@ void initial_placement(enum e_pad_loc_type pad_loc_type, const char* constraints //Place the blocks in sorted order place_all_blocks(sorted_blocks, pad_loc_type); + //if any blocks remain unplaced, print an error + print_unplaced_blocks(); + #ifdef VERBOSE VTR_LOG("At end of initial_placement.\n"); if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_INITIAL_CLB_PLACEMENT)) {