diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index 9e1ab94ae72..fc575e08a13 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -381,6 +381,9 @@ Use the options below to override this default naming behaviour. .. option:: --write_placement_delay_lookup Writes the placement delay lookup to the specified file. +.. option:: --write_initial_place_file + + Writes out the the placement chosen by the initial placement algorithm to the specified file .. option:: --outfile_prefix @@ -769,6 +772,19 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe **Default:** ``criticality_timing`` +.. option:: --place_bounding_box_mode {auto_bb | cube_bb | per_layer_bb} + + Specifies the type of the wirelength estimator used during placement. For single layer architectures, cube_bb (a 3D bounding box) is always used (and is the same as per_layer_bb). + For 3D architectures, cube_bb is appropriate if you can cross between layers at switch blocks, while if you can only cross between layers at output pins per_layer_bb (one bouding box per layer) is more accurate and appropriate. + + ``auto_bb``: The bounding box type is determined automatically based on the cross-layer connections. + + ``cube_bb``: ``cube_bb`` bounding box is used to estimate the wirelength. + + ``per_layer_bb``: ``per_layer_bb`` bounding box is used to estimate the wirelength + + **Default:** ``auto_bb`` + .. option:: --place_chan_width Tells VPR how many tracks a channel of relative width 1 is expected to need to complete routing of this circuit. diff --git a/libs/librrgraph/src/base/rr_graph_utils.cpp b/libs/librrgraph/src/base/rr_graph_utils.cpp index 6552c8c214b..11b6a569c9e 100644 --- a/libs/librrgraph/src/base/rr_graph_utils.cpp +++ b/libs/librrgraph/src/base/rr_graph_utils.cpp @@ -5,13 +5,9 @@ ***************************************************************************/ #include #include -#include #include "rr_graph_utils.h" -#include "vtr_memory.h" -#include "vtr_time.h" - #include "vpr_error.h" #include "rr_graph_obj.h" @@ -119,4 +115,27 @@ vtr::vector> get_fan_in_list(const RRGraphView& }); return node_fan_in_list; +} + +bool inter_layer_connections_limited_to_opin(const RRGraphView& rr_graph) { + bool limited_to_opin = true; + for (const auto& from_node : rr_graph.nodes()) { + for (t_edge_size edge : rr_graph.edges(from_node)) { + RRNodeId to_node = rr_graph.edge_sink_node(from_node, edge); + int from_layer = rr_graph.node_layer(from_node); + int to_layer = rr_graph.node_layer(to_node); + + if (from_layer != to_layer) { + if (rr_graph.node_type(from_node) != e_rr_type::OPIN) { + limited_to_opin = false; + break; + } + } + } + if (!limited_to_opin) { + break; + } + } + + return limited_to_opin; } \ No newline at end of file diff --git a/libs/librrgraph/src/base/rr_graph_utils.h b/libs/librrgraph/src/base/rr_graph_utils.h index 0725bcd0cf9..6ef6148c1fa 100644 --- a/libs/librrgraph/src/base/rr_graph_utils.h +++ b/libs/librrgraph/src/base/rr_graph_utils.h @@ -48,4 +48,12 @@ vtr::vector> get_fan_in_list(const RRGraphView& int seg_index_of_cblock(const RRGraphView& rr_graph, t_rr_type from_rr_type, int to_node); int seg_index_of_sblock(const RRGraphView& rr_graph, int from_node, int to_node); +/** + * @brief This function checks whether all inter-die connections are form OPINs. Return "true" + * if that is the case. Can be used for multiple purposes. For example, to determine which type of bounding + * box to be used to estimate the wire-length of a net. + * @param rr_graph + * @return + */ +bool inter_layer_connections_limited_to_opin(const RRGraphView& rr_graph); #endif \ No newline at end of file diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h index 1ff9a33115e..9940ef85ee3 100644 --- a/libs/librrgraph/src/base/rr_graph_view.h +++ b/libs/librrgraph/src/base/rr_graph_view.h @@ -234,7 +234,7 @@ class RRGraphView { } /** @brief Get string of information about routing resource node. The string will contain the following information. - * type, side, x_low, x_high, y_low, y_high, length, direction, segment_name + * type, side, x_low, x_high, y_low, y_high, length, direction, segment_name, layer num * This function is inlined for runtime optimization. */ inline const std::string node_coordinate_to_string(RRNodeId node) const { @@ -242,6 +242,7 @@ class RRGraphView { std::string start_y; //start y-coordinate std::string end_x; //end x-coordinate std::string end_y; //end y-coordinate + std::string layer_num_str; //layer number std::string arrow; //direction arrow std::string coordinate_string = node_type_string(node); //write the component's type as a routing resource node coordinate_string += ":" + std::to_string(size_t(node)) + " "; //add the index of the routing resource node @@ -256,12 +257,14 @@ class RRGraphView { coordinate_string += ")"; //add the side of the routing resource node // For OPINs and IPINs the starting and ending coordinate are identical, so we can just arbitrarily assign the start to larger values // and the end to the lower coordinate - start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs - start_y = std::to_string(node_yhigh(node)) + ")"; + start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs + start_y = std::to_string(node_yhigh(node)) + ","; + layer_num_str = std::to_string(node_layer(node)) + ")"; } else if (node_type(node) == SOURCE || node_type(node) == SINK) { // For SOURCE and SINK the starting and ending coordinate are identical, so just use start - start_x = "(" + std::to_string(node_xhigh(node)) + ","; - start_y = std::to_string(node_yhigh(node)) + ")"; + start_x = " (" + std::to_string(node_xhigh(node)) + ","; + start_y = std::to_string(node_yhigh(node)) + ","; + layer_num_str = std::to_string(node_layer(node)) + ")"; } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information RRIndexedDataId cost_index = node_cost_index(node); int seg_index = rr_indexed_data_[cost_index].seg_index; @@ -272,26 +275,29 @@ class RRGraphView { arrow = "->"; //we will point the coordinates from start to finish, left to right if (node_direction(node) == Direction::DEC) { //signal travels along decreasing direction + start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start coordinates have large value - start_y = std::to_string(node_yhigh(node)) + ")"; - end_x = "(" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value - end_y = std::to_string(node_ylow(node)) + ")"; + start_y = std::to_string(node_yhigh(node)) + ","; + end_x = " (" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value + end_y = std::to_string(node_ylow(node)) + ","; + layer_num_str = std::to_string(node_layer(node)) + ")"; } else { // signal travels in increasing direction, stays at same point, or can travel both directions start_x = " (" + std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value - start_y = std::to_string(node_ylow(node)) + ")"; - end_x = "(" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value - end_y = std::to_string(node_yhigh(node)) + ")"; + start_y = std::to_string(node_ylow(node)) + ","; + end_x = " (" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value + end_y = std::to_string(node_yhigh(node)) + ","; + layer_num_str = std::to_string(node_layer(node)) + ")"; //layer number if (node_direction(node) == Direction::BIDIR) { arrow = "<->"; //indicate that signal can travel both direction } } } - coordinate_string += start_x + start_y; //Write the starting coordinates + coordinate_string += start_x + start_y + layer_num_str; //Write the starting coordinates coordinate_string += arrow; //Indicate the direction - coordinate_string += end_x + end_y; //Write the end coordinates + coordinate_string += end_x + end_y + layer_num_str; //Write the end coordinates return coordinate_string; } diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 571c17c30e6..0cf1c901d23 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -84,6 +84,8 @@ static void do_one_route(const Netlist<>& net_list, bounding_box.xmax = device_ctx.grid.width() + 1; bounding_box.ymin = 0; bounding_box.ymax = device_ctx.grid.height() + 1; + bounding_box.layer_min = 0; + bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; t_conn_cost_params cost_params; cost_params.criticality = router_opts.max_criticality; @@ -203,9 +205,12 @@ static void profile_source(const Netlist<>& net_list, vtr::ScopedStartFinishTimer delay_timer(vtr::string_fmt( "Routing Src: %d Sink: %d", source_rr_node, sink_rr_node)); - successfully_routed = profiler.calculate_delay(RRNodeId(source_rr_node), RRNodeId(sink_rr_node), - router_opts, - &delays[sink_x][sink_y]); + + successfully_routed = profiler.calculate_delay(RRNodeId(source_rr_node), + RRNodeId(sink_rr_node), + router_opts, + &delays[sink_x][sink_y], + layer_num); } if (successfully_routed) { diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index b5492a1f8ec..e596bd51c43 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -622,6 +622,8 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->constraints_file = Options.constraints_file; + PlacerOpts->write_initial_place_file = Options.write_initial_place_file; + PlacerOpts->pad_loc_type = Options.pad_loc_type; PlacerOpts->place_chan_width = Options.PlaceChanWidth; @@ -661,6 +663,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->place_static_move_prob = Options.place_static_move_prob; PlacerOpts->place_static_notiming_move_prob = Options.place_static_notiming_move_prob; PlacerOpts->place_high_fanout_net = Options.place_high_fanout_net; + PlacerOpts->place_bounding_box_mode = Options.place_bounding_box_mode; PlacerOpts->RL_agent_placement = Options.RL_agent_placement; PlacerOpts->place_agent_multistate = Options.place_agent_multistate; PlacerOpts->place_checkpointing = Options.place_checkpointing; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 72800d8b1c0..32929c4fc9a 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -430,6 +430,41 @@ struct ParsePlaceAlgorithm { } }; +struct ParsePlaceBoundingBox { + ConvertedValue from_str(std::string str) { + ConvertedValue conv_value; + if (str == "auto_bb") { + conv_value.set_value(AUTO_BB); + } else if (str == "cube_bb") { + conv_value.set_value(CUBE_BB); + } else if (str == "per_layer_bb") { + conv_value.set_value(PER_LAYER_BB); + } else { + std::stringstream msg; + msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + conv_value.set_error(msg.str()); + } + return conv_value; + } + + ConvertedValue to_str(e_place_bounding_box_mode val) { + ConvertedValue conv_value; + if (val == AUTO_BB) { + conv_value.set_value("auto_bb"); + } else if (val == CUBE_BB) { + conv_value.set_value("cube_bb"); + } else { + VTR_ASSERT(val == PER_LAYER_BB); + conv_value.set_value("per_layer_bb"); + } + return conv_value; + } + + std::vector default_choices() { + return {"auto_bb", "cube_bb", "per_layer_bb"}; + } +}; + struct ParsePlaceAgentAlgorithm { ConvertedValue from_str(std::string str) { ConvertedValue conv_value; @@ -1569,6 +1604,11 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .metavar("RR_GRAPH_FILE") .show_in(argparse::ShowIn::HELP_ONLY); + file_grp.add_argument(args.write_initial_place_file, "--write_initial_place_file") + .help("Writes out the the placement chosen by the initial placement algorithm to the specified file") + .metavar("INITIAL_PLACE_FILE") + .show_in(argparse::ShowIn::HELP_ONLY); + file_grp.add_argument(args.read_vpr_constraints_file, "--read_vpr_constraints") .help("Reads the floorplanning constraints that packing and placement must respect from the specified XML file.") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2007,6 +2047,20 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .default_value("10") .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.place_bounding_box_mode, "--place_bounding_box_mode") + .help( + "Specifies the type of bounding box to be used in 3D architectures.\n" + "\n" + "MODE options:\n" + " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" + " cube_bb : Use 3D bounding boxes.\n" + " per_layer_bb : Use per-layer bounding boxes.\n" + "\n" + "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.") + .default_value("auto_bb") + .choices({"auto_bb", "cube_bb", "per_layer_bb"}) + .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.RL_agent_placement, "--RL_agent_placement") .help( "Uses a Reinforcement Learning (RL) agent in choosing the appropiate move type in placement." diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 06da9282f94..d1edc5ef2b2 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -27,6 +27,7 @@ struct t_options { argparse::ArgValue constraints_file; argparse::ArgValue write_rr_graph_file; argparse::ArgValue read_rr_graph_file; + argparse::ArgValue write_initial_place_file; argparse::ArgValue read_vpr_constraints_file; argparse::ArgValue write_vpr_constraints_file; @@ -127,6 +128,7 @@ struct t_options { argparse::ArgValue> place_static_move_prob; argparse::ArgValue> place_static_notiming_move_prob; argparse::ArgValue place_high_fanout_net; + argparse::ArgValue place_bounding_box_mode; argparse::ArgValue RL_agent_placement; argparse::ArgValue place_agent_multistate; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 9f379f84e42..65519d5775f 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -452,6 +452,8 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) { float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization; device_ctx.grid = create_device_grid(vpr_setup.device_layout, Arch.grid_layouts, num_type_instances, target_device_utilization); + VTR_ASSERT_MSG(device_ctx.grid.get_num_layers() <= MAX_NUM_LAYERS, "Number of layers should be less than MAX_NUM_LAYERS. If you need more layers, please increase the value of MAX_NUM_LAYERS in vpr_types.h"); + /* *Report on the device */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 82e7be31249..a07a73e2827 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -402,6 +402,12 @@ struct PlacementContext : public Context { * placer_debug_net or placer_debug_block parameters in the command line. */ bool f_placer_debug = false; + + /** + * Set this variable to ture if the type of the bounding box used in placement is of the type cube. If it is false, + * it would mean that per-layer bounding box is used. For the 2D architecture, the cube bounding box would be used. + */ + bool cube_bb = false; }; /** diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 93ef759bb88..2784c5e63da 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -65,6 +65,14 @@ //#define VERBOSE //Prints additional intermediate data +/* + * We need to define the maximum number of layers to address a specific issue. + * For certain data structures, such as `num_sink_pin_layer` in the placer context, dynamically allocating + * memory based on the number of layers can lead to a performance hit due to additional pointer chasing and + * cache locality concerns. Defining a constant variable helps optimize the memory allocation process. + */ +constexpr int MAX_NUM_LAYERS = 2; + /** * @brief For update_screen. Denotes importance of update. * @@ -570,48 +578,79 @@ struct t_net_power { }; /** - * @brief Stores the bounding box of a net in terms of the minimum and - * maximum coordinates of the blocks forming the net, clipped to - * the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1) + * @brief Stores a 3D bounding box in terms of the minimum and + * maximum coordinates: x, y, layer */ struct t_bb { t_bb() = default; - t_bb(int xmin_, int xmax_, int ymin_, int ymax_) + t_bb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_min_, int layer_max_) + : xmin(xmin_) + , xmax(xmax_) + , ymin(ymin_) + , ymax(ymax_) + , layer_min(layer_min_) + , layer_max(layer_max_) { + VTR_ASSERT(xmax_ >= xmin_); + VTR_ASSERT(ymax_ >= ymin_); + VTR_ASSERT(layer_max_ >= layer_min_); + } + int xmin = OPEN; + int xmax = OPEN; + int ymin = OPEN; + int ymax = OPEN; + int layer_min = OPEN; + int layer_max = OPEN; +}; + +/** + * @brief Stores a 2D bounding box in terms of the minimum and maximum x and y + * @note layer_num indicates the layer that the bounding box is on. + */ +struct t_2D_bb { + t_2D_bb() = default; + t_2D_bb(int xmin_, int xmax_, int ymin_, int ymax_, int layer_num_) : xmin(xmin_) , xmax(xmax_) , ymin(ymin_) - , ymax(ymax_) { + , ymax(ymax_) + , layer_num(layer_num_) { VTR_ASSERT(xmax_ >= xmin_); VTR_ASSERT(ymax_ >= ymin_); + VTR_ASSERT(layer_num_ >= 0); } int xmin = OPEN; int xmax = OPEN; int ymin = OPEN; int ymax = OPEN; + int layer_num = OPEN; }; /** * @brief An offset between placement locations (t_pl_loc) - * + * @note In the case of comparing the offset, the layer offset should be equal * x: x-offset * y: y-offset - * z: z-offset + * sub_tile: sub_tile-offset + * layer: layer-offset */ struct t_pl_offset { t_pl_offset() = default; - t_pl_offset(int xoffset, int yoffset, int sub_tile_offset) + t_pl_offset(int xoffset, int yoffset, int sub_tile_offset, int layer_offset) : x(xoffset) , y(yoffset) - , sub_tile(sub_tile_offset) {} + , sub_tile(sub_tile_offset) + , layer(layer_offset) {} int x = 0; int y = 0; int sub_tile = 0; + int layer = 0; t_pl_offset& operator+=(const t_pl_offset& rhs) { x += rhs.x; y += rhs.y; sub_tile += rhs.sub_tile; + layer += rhs.layer; return *this; } @@ -619,6 +658,7 @@ struct t_pl_offset { x -= rhs.x; y -= rhs.y; sub_tile -= rhs.sub_tile; + layer -= rhs.layer; return *this; } @@ -633,18 +673,19 @@ struct t_pl_offset { } friend t_pl_offset operator-(const t_pl_offset& other) { - return t_pl_offset(-other.x, -other.y, -other.sub_tile); + return t_pl_offset(-other.x, -other.y, -other.sub_tile, -other.layer); } friend t_pl_offset operator+(const t_pl_offset& other) { - return t_pl_offset(+other.x, +other.y, +other.sub_tile); + return t_pl_offset(+other.x, +other.y, +other.sub_tile, +other.layer); } friend bool operator<(const t_pl_offset& lhs, const t_pl_offset& rhs) { + VTR_ASSERT(lhs.layer == rhs.layer); return std::tie(lhs.x, lhs.y, lhs.sub_tile) < std::tie(rhs.x, rhs.y, rhs.sub_tile); } friend bool operator==(const t_pl_offset& lhs, const t_pl_offset& rhs) { - return std::tie(lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.x, rhs.y, rhs.sub_tile); + return std::tie(lhs.x, lhs.y, lhs.sub_tile, lhs.layer) == std::tie(rhs.x, rhs.y, rhs.sub_tile, rhs.layer); } friend bool operator!=(const t_pl_offset& lhs, const t_pl_offset& rhs) { @@ -659,6 +700,7 @@ struct hash { std::size_t seed = std::hash{}(v.x); vtr::hash_combine(seed, v.y); vtr::hash_combine(seed, v.sub_tile); + vtr::hash_combine(seed, v.layer); return seed; } }; @@ -688,7 +730,7 @@ struct t_pl_loc { int layer = OPEN; t_pl_loc& operator+=(const t_pl_offset& rhs) { - VTR_ASSERT(this->layer != OPEN); + layer += rhs.layer; x += rhs.x; y += rhs.y; sub_tile += rhs.sub_tile; @@ -696,7 +738,7 @@ struct t_pl_loc { } t_pl_loc& operator-=(const t_pl_offset& rhs) { - VTR_ASSERT(this->layer != OPEN); + layer -= rhs.layer; x -= rhs.x; y -= rhs.y; sub_tile -= rhs.sub_tile; @@ -720,8 +762,10 @@ struct t_pl_loc { } friend t_pl_offset operator-(const t_pl_loc& lhs, const t_pl_loc& rhs) { - VTR_ASSERT(lhs.layer == rhs.layer); - return {lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile}; + return {lhs.x - rhs.x, + lhs.y - rhs.y, + lhs.sub_tile - rhs.sub_tile, + lhs.layer - rhs.layer}; } friend bool operator<(const t_pl_loc& lhs, const t_pl_loc& rhs) { @@ -745,6 +789,7 @@ struct hash { std::size_t seed = std::hash{}(v.x); vtr::hash_combine(seed, v.y); vtr::hash_combine(seed, v.sub_tile); + vtr::hash_combine(seed, v.layer); return seed; } }; @@ -974,6 +1019,12 @@ enum e_place_algorithm { SLACK_TIMING_PLACE }; +enum e_place_bounding_box_mode { + AUTO_BB, + CUBE_BB, + PER_LAYER_BB +}; + /** * @brief Provides a wrapper around enum e_place_algorithm. * @@ -1109,6 +1160,9 @@ enum class e_place_delta_delay_algorithm { * @param constraints_file * File that specifies locations of locked down (constrained) * blocks for placement. Empty string means no constraints file. + * @param write_initial_place_file + * Write the initial placement into this file. Empty string means + * the initial placement is not written. * @param pad_loc_file * File to read pad locations from if pad_loc_type is USER. * @param place_freq @@ -1151,6 +1205,7 @@ struct t_placer_opts { int place_chan_width; enum e_pad_loc_type pad_loc_type; std::string constraints_file; + std::string write_initial_place_file; enum pfreq place_freq; int recompute_crit_iter; int inner_loop_recompute_divider; @@ -1186,6 +1241,7 @@ struct t_placer_opts { bool place_agent_multistate; bool place_checkpointing; int place_high_fanout_net; + e_place_bounding_box_mode place_bounding_box_mode; e_agent_algorithm place_agent_algorithm; float place_agent_epsilon; float place_agent_gamma; diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp index 6104226a605..70c92878f82 100644 --- a/vpr/src/noc/noc_storage.cpp +++ b/vpr/src/noc/noc_storage.cpp @@ -131,7 +131,7 @@ void NocStorage::set_device_grid_width(int grid_width) { void NocStorage::set_device_grid_spec(int grid_width, int grid_height) { device_grid_width = grid_width; - num_layer_blocks = grid_width * grid_height; + layer_num_grid_locs = grid_width * grid_height; return; } @@ -235,7 +235,7 @@ NocLinkId NocStorage::get_parallel_link(NocLinkId current_link) const { int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const { // calculate the key value - return (num_layer_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x); + return (layer_num_grid_locs * layer_position + device_grid_width * grid_position_y + grid_position_x); } void NocStorage::echo_noc(char* file_name) const { diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h index d490b529324..f35f0121eb2 100644 --- a/vpr/src/noc/noc_storage.h +++ b/vpr/src/noc/noc_storage.h @@ -141,7 +141,13 @@ class NocStorage { * */ int device_grid_width; - int num_layer_blocks; + /** + * @brief Internal reference to the number of blocks at each layer (width * height). This is necessary + * to compute a unique key for a given grid location which we can then use + * to get the corresponding physical (hard) router at the given grid + * location using 'grid_location_to_router_id'. + */ + int layer_num_grid_locs; // prevent "copying" of this object NocStorage(const NocStorage&) = delete; diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp index cab42663a7b..f1316701998 100644 --- a/vpr/src/place/centroid_move_generator.cpp +++ b/vpr/src/place/centroid_move_generator.cpp @@ -38,6 +38,9 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block /* Calculate the centroid location*/ calculate_centroid_loc(b_from, false, centroid, nullptr); + // Centroid location is not necessarily a valid location, and the downstream location expect a valid + // layer for "to" location. So if the layer is not valid, we set it to the same layer as from loc. + to.layer = (centroid.layer < 0) ? from.layer : centroid.layer; /* Find a location near the weighted centroid_loc */ if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) { return e_create_move::ABORT; diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp index 264656d8329..9fbc93a7645 100644 --- a/vpr/src/place/critical_uniform_move_generator.cpp +++ b/vpr/src/place/critical_uniform_move_generator.cpp @@ -28,7 +28,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; - + to.layer = from.layer; if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp index db49fc88486..330f1904368 100644 --- a/vpr/src/place/directed_moves_util.cpp +++ b/vpr/src/place/directed_moves_util.cpp @@ -25,6 +25,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc float acc_weight = 0; float acc_x = 0; float acc_y = 0; + float acc_layer = 0; float weight = 1; int from_block_layer_num = g_vpr_ctx.placement().block_locs[b_from].loc.layer; @@ -65,6 +66,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc acc_x += tile_loc.x * weight; acc_y += tile_loc.y * weight; + acc_layer += tile_loc.layer_num * weight; acc_weight += weight; } } @@ -84,6 +86,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc acc_x += tile_loc.x * weight; acc_y += tile_loc.y * weight; + acc_layer += tile_loc.layer_num * weight; acc_weight += weight; } } @@ -91,8 +94,7 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc //Calculate the centroid location centroid.x = acc_x / acc_weight; centroid.y = acc_y / acc_weight; - // TODO: For now, we don't move the centroid to a different layer - centroid.layer = from_block_layer_num; + centroid.layer = acc_layer / acc_weight; } static std::map available_reward_function = { diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp index 8baf52f8a46..995c2a37836 100644 --- a/vpr/src/place/feasible_region_move_generator.cpp +++ b/vpr/src/place/feasible_region_move_generator.cpp @@ -33,6 +33,8 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& /* Calculate the feasible region */ t_pl_loc to; + // Currently, we don't change the layer for this move + to.layer = from.layer; int ipin; ClusterBlockId bnum; int max_x, min_x, max_y, min_y; @@ -101,6 +103,9 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& FR_coords.ymin = std::min(from.y, max_y); FR_coords.ymax = std::max(from.y, yt); } + + FR_coords.layer_min = from.layer; + FR_coords.layer_max = from.layer; VTR_ASSERT(FR_coords.ymin <= FR_coords.ymax); t_range_limiters range_limiters{rlim, diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 66f47358ff5..c80d5ff245b 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -307,11 +307,10 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ int first_rlim = 15; auto search_range = get_compressed_grid_target_search_range(compressed_block_grid, - compressed_centroid_loc, - first_rlim, - num_layers); + compressed_centroid_loc[centroid_loc_layer_num], + first_rlim); - int delta_cx = search_range[centroid_loc_layer_num].xmax - search_range[centroid_loc_layer_num].xmin; + int delta_cx = search_range.xmax - search_range.xmin; //Block has not been placed yet, so the "from" coords will be (-1, -1) int cx_from = OPEN; @@ -323,7 +322,7 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ bool legal = find_compatible_compressed_loc_in_range(block_type, delta_cx, {cx_from, cy_from, layer_from}, - search_range[centroid_loc_layer_num], + search_range, to_compressed_loc, false, centroid_loc_layer_num); @@ -426,9 +425,8 @@ static std::vector find_centroid_loc(t_pl_macro pl_macro, t_pl_l centroid.y = acc_y / acc_weight; if (find_layer) { auto max_element = std::max_element(layer_count.begin(), layer_count.end()); - VTR_ASSERT(*max_element != 0); - auto index = std::distance(layer_count.begin(), max_element); - centroid.layer = static_cast(index); + VTR_ASSERT((*max_element) != 0); + centroid.layer = (int)std::distance(layer_count.begin(), max_element); } else { centroid.layer = head_layer_num; } @@ -634,11 +632,13 @@ static bool try_random_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_log t_physical_tile_loc to_compressed_loc; bool legal; + legal = find_compatible_compressed_loc_in_range(block_type, delta_cx, {cx_from, cy_from, reg_coord.layer_num}, {min_compressed_loc.x, max_compressed_loc.x, - min_compressed_loc.y, max_compressed_loc.y}, + min_compressed_loc.y, max_compressed_loc.y, + reg_coord.layer_num, reg_coord.layer_num}, to_compressed_loc, false, reg_coord.layer_num); @@ -1097,7 +1097,7 @@ bool place_one_block(const ClusterBlockId& blk_id, //If it does not belong to a macro, create a macro with the one block and then pass to the placement routines //This is done so that the initial placement flow can be the same whether the block belongs to a macro or not t_pl_macro_member macro_member; - t_pl_offset block_offset(0, 0, 0); + t_pl_offset block_offset(0, 0, 0, 0); macro_member.blk_index = blk_id; macro_member.offset = block_offset; diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp index a0853978f2b..324d0cd3e44 100644 --- a/vpr/src/place/median_move_generator.cpp +++ b/vpr/src/place/median_move_generator.cpp @@ -5,9 +5,9 @@ #include "placer_globals.h" #include "move_utils.h" -static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xold, int yold, int xnew, int ynew); +static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew); -static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_coord_new, ClusterBlockId block_id, bool& skip_net); +static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net); e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_affected, t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, const PlacerCriticalities* /*criticalities*/) { //Find a movable block based on blk_type @@ -28,15 +28,20 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ auto& device_ctx = g_vpr_ctx.device(); auto& place_move_ctx = g_placer_ctx.mutable_move(); + const int num_layers = device_ctx.grid.get_num_layers(); + bool is_multi_layer = (num_layers > 1); + t_pl_loc from = place_ctx.block_locs[b_from].loc; + int from_layer = from.layer; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from_layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); /* Calculate the median region */ t_pl_loc to; - t_bb coords, limit_coords; + t_bb coords(OPEN, OPEN, OPEN, OPEN, OPEN, OPEN); + t_bb limit_coords; ClusterBlockId bnum; int pnum, xnew, xold, ynew, yold; @@ -44,6 +49,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ //reused to save allocation time place_move_ctx.X_coord.clear(); place_move_ctx.Y_coord.clear(); + std::vector layer_blk_cnt(num_layers, 0); //true if the net is a feedback from the block to itself bool skip_net; @@ -61,10 +67,17 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ continue; if (cluster_ctx.clb_nlist.net_sinks(net_id).size() < SMALL_NET) { //calculate the bb from scratch - get_bb_from_scratch_excluding_block(net_id, &coords, b_from, skip_net); + get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net); if (skip_net) continue; } else { + t_bb union_bb; + const bool& cube_bb = g_vpr_ctx.placement().cube_bb; + if (!cube_bb) { + union_bb = union_2d_bb(place_move_ctx.layer_bb_coords[net_id]); + } + + const auto& net_bb_coords = cube_bb ? place_move_ctx.bb_coords[net_id] : union_bb; //use the incremental update of the bb bnum = cluster_ctx.clb_nlist.pin_block(pin_id); pnum = tile_pin_index(pin_id); @@ -76,20 +89,20 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ //To calulate the bb incrementally while excluding the moving block //assume that the moving block is moved to a non-critical coord of the bb - if (place_move_ctx.bb_coords[net_id].xmin == xold) { - xnew = place_move_ctx.bb_coords[net_id].xmax; + if (net_bb_coords.xmin == xold) { + xnew = net_bb_coords.xmax; } else { - xnew = place_move_ctx.bb_coords[net_id].xmin; + xnew = net_bb_coords.xmin; } - if (place_move_ctx.bb_coords[net_id].ymin == yold) { - ynew = place_move_ctx.bb_coords[net_id].ymax; + if (net_bb_coords.ymin == yold) { + ynew = net_bb_coords.ymax; } else { - ynew = place_move_ctx.bb_coords[net_id].ymin; + ynew = net_bb_coords.ymin; } - if (!get_bb_incrementally(net_id, &coords, xold, yold, xnew, ynew)) { - get_bb_from_scratch_excluding_block(net_id, &coords, b_from, skip_net); + if (!get_bb_incrementally(net_id, coords, xold, yold, xnew, ynew)) { + get_bb_from_scratch_excluding_block(net_id, coords, b_from, skip_net); if (skip_net) continue; } @@ -99,6 +112,17 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ place_move_ctx.X_coord.push_back(coords.xmax); place_move_ctx.Y_coord.push_back(coords.ymin); place_move_ctx.Y_coord.push_back(coords.ymax); + if (is_multi_layer) { + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num]; + } + // If the pin under consideration is of type sink, it shouldn't be added to layer_blk_cnt since the block + // is moving + if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK) { + VTR_ASSERT_SAFE(layer_blk_cnt[from_layer] > 0); + layer_blk_cnt[from_layer]--; + } + } } if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) { @@ -125,10 +149,20 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ t_pl_loc median_point; median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2; median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; - // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die. - median_point.layer = from.layer; - if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) + + // Before calling find_to_loc_centroid a valid layer should be assigned to "to" location. If there are multiple layers, the layer + // with highest number of sinks will be used. Otherwise, the same layer as "from" loc is assigned. + if (is_multi_layer) { + int layer_num = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end())); + median_point.layer = layer_num; + to.layer = layer_num; + } else { + median_point.layer = from.layer; + to.layer = from.layer; + } + if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) { return e_create_move::ABORT; + } e_create_move create_move = ::create_move(blocks_affected, b_from, to); @@ -150,17 +184,16 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ * Currently assumes channels on both sides of the CLBs forming the * * edges of the bounding box can be used. Essentially, I am assuming * * the pins always lie on the outside of the bounding box. */ -static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_coord_new, ClusterBlockId block_id, bool& skip_net) { +static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb& bb_coord_new, ClusterBlockId block_id, bool& skip_net) { //TODO: account for multiple physical pin instances per logical pin skip_net = true; - int xmin = 0; - int xmax = 0; - int ymin = 0; - int ymax = 0; + int xmin = OPEN; + int xmax = OPEN; + int ymin = OPEN; + int ymax = OPEN; - int x, y; int pnum; auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -173,14 +206,13 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co if (bnum != block_id) { skip_net = false; pnum = net_pin_to_tile_pin_index(net_id, 0); - x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; - - xmin = x; - ymin = y; - xmax = x; - ymax = y; + int src_x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; + int src_y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + xmin = src_x; + ymin = src_y; + xmax = src_x; + ymax = src_y; first_block = true; } @@ -190,8 +222,9 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co if (bnum == block_id) continue; skip_net = false; - x = place_ctx.block_locs[bnum].loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; - y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + const auto& block_loc = place_ctx.block_locs[bnum].loc; + int x = block_loc.x + physical_tile_type(bnum)->pin_width_offset[pnum]; + int y = block_loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; if (!first_block) { xmin = x; @@ -199,6 +232,7 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co xmax = x; ymax = y; first_block = true; + continue; } if (x < xmin) { xmin = x; @@ -220,11 +254,10 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co * channel immediately to the left of the bounding box, I want to * * clip to 1 in both directions as well (since minimum channel index * * is 0). See route_common.cpp for a channel diagram. */ - - bb_coord_new->xmin = std::max(std::min(xmin, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - bb_coord_new->ymin = std::max(std::min(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels - bb_coord_new->xmax = std::max(std::min(xmax, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - bb_coord_new->ymax = std::max(std::min(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + bb_coord_new.xmin = std::max(std::min(xmin, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new.ymin = std::max(std::min(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + bb_coord_new.xmax = std::max(std::min(xmax, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new.ymax = std::max(std::min(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels } /* @@ -240,11 +273,9 @@ static void get_bb_from_scratch_excluding_block(ClusterNetId net_id, t_bb* bb_co * the pins always lie on the outside of the bounding box. * * The x and y coordinates are the pin's x and y coordinates. */ /* IO blocks are considered to be one cell in for simplicity. */ -static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xold, int yold, int xnew, int ynew) { +static bool get_bb_incrementally(ClusterNetId net_id, t_bb& bb_coord_new, int xold, int yold, int xnew, int ynew) { //TODO: account for multiple physical pin instances per logical pin - const t_bb *curr_bb_edge, *curr_bb_coord; - auto& device_ctx = g_vpr_ctx.device(); auto& place_move_ctx = g_placer_ctx.move(); @@ -253,9 +284,19 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo xold = std::max(std::min(xold, device_ctx.grid.width() - 2), 1); //-2 for no perim channels yold = std::max(std::min(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels - /* The net had NOT been updated before, could use the old values */ - curr_bb_coord = &(place_move_ctx.bb_coords[net_id]); - curr_bb_edge = &(place_move_ctx.bb_num_on_edges[net_id]); + t_bb union_bb_edge; + t_bb union_bb; + const bool& cube_bb = g_vpr_ctx.placement().cube_bb; + if (!cube_bb) { + std::tie(union_bb_edge, union_bb) = union_2d_bb_incr(place_move_ctx.layer_bb_num_on_edges[net_id], + place_move_ctx.layer_bb_coords[net_id]); + } + + /* In this move, we use a 3D bounding box. Thus, if per-layer BB is used by placer, we need to take a union of BBs and use that for the rest of + * operations in this move + */ + const t_bb& curr_bb_edge = cube_bb ? place_move_ctx.bb_num_on_edges[net_id] : union_bb_edge; + const t_bb& curr_bb_coord = cube_bb ? place_move_ctx.bb_coords[net_id] : union_bb; /* Check if I can update the bounding box incrementally. */ @@ -263,24 +304,24 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo /* Update the xmax fields for coordinates and number of edges first. */ - if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */ - if (curr_bb_edge->xmax == 1) { + if (xold == curr_bb_coord.xmax) { /* Old position at xmax. */ + if (curr_bb_edge.xmax == 1) { return false; } else { - bb_coord_new->xmax = curr_bb_coord->xmax; + bb_coord_new.xmax = curr_bb_coord.xmax; } } else { /* Move to left, old postion was not at xmax. */ - bb_coord_new->xmax = curr_bb_coord->xmax; + bb_coord_new.xmax = curr_bb_coord.xmax; } /* Now do the xmin fields for coordinates and number of edges. */ - if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */ - bb_coord_new->xmin = xnew; - } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */ - bb_coord_new->xmin = xnew; + if (xnew < curr_bb_coord.xmin) { /* Moved past xmin */ + bb_coord_new.xmin = xnew; + } else if (xnew == curr_bb_coord.xmin) { /* Moved to xmin */ + bb_coord_new.xmin = xnew; } else { /* Xmin unchanged. */ - bb_coord_new->xmin = curr_bb_coord->xmin; + bb_coord_new.xmin = curr_bb_coord.xmin; } /* End of move to left case. */ @@ -288,29 +329,29 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo /* Update the xmin fields for coordinates and number of edges first. */ - if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */ - if (curr_bb_edge->xmin == 1) { + if (xold == curr_bb_coord.xmin) { /* Old position at xmin. */ + if (curr_bb_edge.xmin == 1) { return false; } else { - bb_coord_new->xmin = curr_bb_coord->xmin; + bb_coord_new.xmin = curr_bb_coord.xmin; } } else { /* Move to right, old position was not at xmin. */ - bb_coord_new->xmin = curr_bb_coord->xmin; + bb_coord_new.xmin = curr_bb_coord.xmin; } /* Now do the xmax fields for coordinates and number of edges. */ - if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */ - bb_coord_new->xmax = xnew; - } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */ - bb_coord_new->xmax = xnew; + if (xnew > curr_bb_coord.xmax) { /* Moved past xmax. */ + bb_coord_new.xmax = xnew; + } else if (xnew == curr_bb_coord.xmax) { /* Moved to xmax */ + bb_coord_new.xmax = xnew; } else { /* Xmax unchanged. */ - bb_coord_new->xmax = curr_bb_coord->xmax; + bb_coord_new.xmax = curr_bb_coord.xmax; } /* End of move to right case. */ } else { /* xnew == xold -- no x motion. */ - bb_coord_new->xmin = curr_bb_coord->xmin; - bb_coord_new->xmax = curr_bb_coord->xmax; + bb_coord_new.xmin = curr_bb_coord.xmin; + bb_coord_new.xmax = curr_bb_coord.xmax; } /* Now account for the y-direction motion. */ @@ -319,24 +360,24 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo /* Update the ymax fields for coordinates and number of edges first. */ - if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */ - if (curr_bb_edge->ymax == 1) { + if (yold == curr_bb_coord.ymax) { /* Old position at ymax. */ + if (curr_bb_edge.ymax == 1) { return false; } else { - bb_coord_new->ymax = curr_bb_coord->ymax; + bb_coord_new.ymax = curr_bb_coord.ymax; } } else { /* Move down, old postion was not at ymax. */ - bb_coord_new->ymax = curr_bb_coord->ymax; + bb_coord_new.ymax = curr_bb_coord.ymax; } /* Now do the ymin fields for coordinates and number of edges. */ - if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */ - bb_coord_new->ymin = ynew; - } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */ - bb_coord_new->ymin = ynew; + if (ynew < curr_bb_coord.ymin) { /* Moved past ymin */ + bb_coord_new.ymin = ynew; + } else if (ynew == curr_bb_coord.ymin) { /* Moved to ymin */ + bb_coord_new.ymin = ynew; } else { /* ymin unchanged. */ - bb_coord_new->ymin = curr_bb_coord->ymin; + bb_coord_new.ymin = curr_bb_coord.ymin; } /* End of move down case. */ @@ -344,30 +385,30 @@ static bool get_bb_incrementally(ClusterNetId net_id, t_bb* bb_coord_new, int xo /* Update the ymin fields for coordinates and number of edges first. */ - if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */ - if (curr_bb_edge->ymin == 1) { + if (yold == curr_bb_coord.ymin) { /* Old position at ymin. */ + if (curr_bb_edge.ymin == 1) { return false; } else { - bb_coord_new->ymin = curr_bb_coord->ymin; + bb_coord_new.ymin = curr_bb_coord.ymin; } } else { /* Moved up, old position was not at ymin. */ - bb_coord_new->ymin = curr_bb_coord->ymin; + bb_coord_new.ymin = curr_bb_coord.ymin; } /* Now do the ymax fields for coordinates and number of edges. */ - if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */ - bb_coord_new->ymax = ynew; - } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */ - bb_coord_new->ymax = ynew; + if (ynew > curr_bb_coord.ymax) { /* Moved past ymax. */ + bb_coord_new.ymax = ynew; + } else if (ynew == curr_bb_coord.ymax) { /* Moved to ymax */ + bb_coord_new.ymax = ynew; } else { /* ymax unchanged. */ - bb_coord_new->ymax = curr_bb_coord->ymax; + bb_coord_new.ymax = curr_bb_coord.ymax; } /* End of move up case. */ } else { /* ynew == yold -- no y motion. */ - bb_coord_new->ymin = curr_bb_coord->ymin; - bb_coord_new->ymax = curr_bb_coord->ymax; + bb_coord_new.ymin = curr_bb_coord.ymin; + bb_coord_new.ymax = curr_bb_coord.ymax; } return true; } diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 57419340317..ca10cfc500b 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -454,7 +454,9 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { auto& place_ctx = g_vpr_ctx.placement(); if (to.x < 0 || to.x >= int(device_ctx.grid.width()) - || to.y < 0 || to.y >= int(device_ctx.grid.height())) { + || to.y < 0 || to.y >= int(device_ctx.grid.height()) + || to.layer < 0 + || to.layer >= int(device_ctx.grid.get_num_layers())) { return false; } @@ -753,7 +755,8 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); - const int from_layer_num = from.layer; + const int to_layer_num = get_random_layer(type); + VTR_ASSERT(to_layer_num != OPEN); //Determine the coordinates in the compressed grid space of the current block std::vector compressed_locs = get_compressed_loc(compressed_block_grid, @@ -761,11 +764,10 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, num_layers); //Determine the valid compressed grid location ranges - std::vector search_range = get_compressed_grid_target_search_range(compressed_block_grid, - compressed_locs, - rlim, - num_layers); - int delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin; + t_bb search_range = get_compressed_grid_target_search_range(compressed_block_grid, + compressed_locs[to_layer_num], + rlim); + int delta_cx = search_range.xmax - search_range.xmin; t_physical_tile_loc to_compressed_loc; bool legal = false; @@ -774,9 +776,9 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, if (is_cluster_constrained(b_from)) { bool intersect = intersect_range_limit_with_floorplan_constraints(type, b_from, - search_range[from_layer_num], + search_range, delta_cx, - from_layer_num); + to_layer_num); if (!intersect) { return false; } @@ -784,11 +786,11 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, //TODO: For now, we only move the blocks on the same tile legal = find_compatible_compressed_loc_in_range(type, delta_cx, - compressed_locs[from_layer_num], - search_range[from_layer_num], + compressed_locs[to_layer_num], + search_range, to_compressed_loc, false, - from_layer_num); + to_layer_num); if (!legal) { //No valid position found @@ -808,8 +810,8 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, VTR_ASSERT_MSG(grid.get_height_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location"); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n", - search_range[from_layer_num].xmin, search_range[from_layer_num].ymin, from_layer_num, - search_range[from_layer_num].xmax, search_range[from_layer_num].ymax, from_layer_num, + search_range.xmin, search_range.ymin, search_range.layer_min, + search_range.xmax, search_range.ymax, search_range.layer_max, to.x, to.y, to.layer); return true; } @@ -829,7 +831,8 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, t_pl_loc& to_loc, ClusterBlockId b_from) { int num_layers = g_vpr_ctx.device().grid.get_num_layers(); - int from_layer_num = from_loc.layer; + const int to_layer_num = to_loc.layer; + VTR_ASSERT(to_layer_num != OPEN); const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; //Determine the coordinates in the compressed grid space of the current block @@ -842,25 +845,27 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, //Determine the valid compressed grid location ranges std::vector min_compressed_loc = get_compressed_loc_approx(compressed_block_grid, - {limit_coords->xmin, limit_coords->ymin, 0, from_layer_num}, + {limit_coords->xmin, limit_coords->ymin, 0, to_layer_num}, num_layers); std::vector max_compressed_loc = get_compressed_loc_approx(compressed_block_grid, - {limit_coords->xmax, limit_coords->ymax, 0, from_layer_num}, + {limit_coords->xmax, limit_coords->ymax, 0, to_layer_num}, num_layers); - VTR_ASSERT(min_compressed_loc[from_layer_num].x >= 0); - VTR_ASSERT(static_cast(compressed_block_grid.get_num_columns(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].x >= 0); - VTR_ASSERT(max_compressed_loc[from_layer_num].x >= min_compressed_loc[from_layer_num].x); - int delta_cx = max_compressed_loc[from_layer_num].x - min_compressed_loc[from_layer_num].x; + VTR_ASSERT(min_compressed_loc[to_layer_num].x >= 0); + VTR_ASSERT(static_cast(compressed_block_grid.get_num_columns(to_layer_num)) - 1 - max_compressed_loc[to_layer_num].x >= 0); + VTR_ASSERT(max_compressed_loc[to_layer_num].x >= min_compressed_loc[to_layer_num].x); + int delta_cx = max_compressed_loc[to_layer_num].x - min_compressed_loc[to_layer_num].x; - VTR_ASSERT(min_compressed_loc[from_layer_num].y >= 0); - VTR_ASSERT(static_cast(compressed_block_grid.get_num_rows(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].y >= 0); - VTR_ASSERT(max_compressed_loc[from_layer_num].y >= min_compressed_loc[from_layer_num].y); + VTR_ASSERT(min_compressed_loc[to_layer_num].y >= 0); + VTR_ASSERT(static_cast(compressed_block_grid.get_num_rows(to_layer_num)) - 1 - max_compressed_loc[to_layer_num].y >= 0); + VTR_ASSERT(max_compressed_loc[to_layer_num].y >= min_compressed_loc[to_layer_num].y); - t_bb search_range(min_compressed_loc[from_layer_num].x, - max_compressed_loc[from_layer_num].x, - min_compressed_loc[from_layer_num].y, - max_compressed_loc[from_layer_num].y); + t_bb search_range(min_compressed_loc[to_layer_num].x, + max_compressed_loc[to_layer_num].x, + min_compressed_loc[to_layer_num].y, + max_compressed_loc[to_layer_num].y, + to_layer_num, + to_layer_num); t_physical_tile_loc to_compressed_loc; bool legal = false; @@ -870,7 +875,7 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, b_from, search_range, delta_cx, - from_layer_num); + to_layer_num); if (!intersect) { return false; } @@ -878,11 +883,11 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, legal = find_compatible_compressed_loc_in_range(blk_type, delta_cx, - from_compressed_locs[from_layer_num], + from_compressed_locs[to_layer_num], search_range, to_compressed_loc, true, - from_layer_num); + to_layer_num); if (!legal) { //No valid position found @@ -902,8 +907,8 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n", - search_range.xmin, search_range.ymin, from_layer_num, - search_range.xmax, search_range.ymax, from_layer_num, + search_range.xmin, search_range.ymin, search_range.layer_min, + search_range.xmax, search_range.ymax, search_range.layer_max, to_loc.x, to_loc.y, to_loc.layer); return true; } @@ -916,7 +921,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, ClusterBlockId b_from) { //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; - const int from_layer_num = from_loc.layer; + const int to_layer_num = to_loc.layer; + VTR_ASSERT(to_layer_num >= 0); const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); std::vector from_compressed_loc = get_compressed_loc(compressed_block_grid, @@ -930,23 +936,21 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, //Determine the valid compressed grid location ranges int delta_cx; - std::vector search_range; + t_bb search_range; // If we are early in the anneal and the range limit still big enough --> search around the center location that the move proposed // If not --> search around the current location of the block but in the direction of the center location that the move proposed if (range_limiters.original_rlim > 0.15 * range_limiters.first_rlim) { search_range = get_compressed_grid_target_search_range(compressed_block_grid, - centroid_compressed_loc, - std::min(range_limiters.original_rlim, range_limiters.dm_rlim), - num_layers); + centroid_compressed_loc[to_layer_num], + std::min(range_limiters.original_rlim, range_limiters.dm_rlim)); } else { search_range = get_compressed_grid_bounded_search_range(compressed_block_grid, - from_compressed_loc, - centroid_compressed_loc, - std::min(range_limiters.original_rlim, range_limiters.dm_rlim), - num_layers); + from_compressed_loc[to_layer_num], + centroid_compressed_loc[to_layer_num], + std::min(range_limiters.original_rlim, range_limiters.dm_rlim)); } - delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin; + delta_cx = search_range.xmax - search_range.xmin; t_physical_tile_loc to_compressed_loc; bool legal = false; @@ -954,9 +958,9 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, if (is_cluster_constrained(b_from)) { bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, b_from, - search_range[from_layer_num], + search_range, delta_cx, - from_layer_num); + to_layer_num); if (!intersect) { return false; } @@ -965,11 +969,11 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, //TODO: For now, we only move the blocks on the same tile legal = find_compatible_compressed_loc_in_range(blk_type, delta_cx, - from_compressed_loc[from_layer_num], - search_range[from_layer_num], + from_compressed_loc[to_layer_num], + search_range, to_compressed_loc, false, - from_layer_num); + to_layer_num); if (!legal) { //No valid position found @@ -989,8 +993,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tSearch range %dx%dx%d x %dx%dx%d - Legal position at %d,%d,%d is found\n", - search_range[from_layer_num].xmin, search_range[from_layer_num].ymin, from_layer_num, - search_range[from_layer_num].xmax, search_range[from_layer_num].ymax, from_layer_num, + search_range.xmin, search_range.ymin, search_range.layer_min, + search_range.xmax, search_range.ymax, search_range.layer_max, to_loc.x, to_loc.y, to_loc.layer); return true; } @@ -999,8 +1003,8 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, static const std::array move_type_strings = { "Uniform", "Median", - "W. Centroid", "Centroid", + "W. Centroid", "W. Median", "Crit. Uniform", "Feasible Region", @@ -1133,10 +1137,10 @@ std::vector get_compressed_loc(const t_compressed_block_gri //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers std::vector compressed_locs(num_layers); - for (int layer_num = 0; layer_num < num_layers; ++layer_num) { - if (layer_num != grid_loc.layer) { - continue; - } + const auto& compatible_layers = compressed_block_grid.get_layer_nums(); + + for (const auto& layer_num : compatible_layers) { + // This would cause a problem if two blocks of the same types are on different x/y locations of different layers compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc({grid_loc.x, grid_loc.y, layer_num}); } @@ -1146,91 +1150,78 @@ std::vector get_compressed_loc(const t_compressed_block_gri std::vector get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid, t_pl_loc grid_loc, int num_layers) { - //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers std::vector compressed_locs(num_layers); - for (int layer_num = 0; layer_num < num_layers; ++layer_num) { - if (layer_num != grid_loc.layer) { - continue; - } + const auto& compatible_layers = compressed_block_grid.get_layer_nums(); + + for (const auto& layer_num : compatible_layers) { compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc_approx({grid_loc.x, grid_loc.y, layer_num}); } return compressed_locs; } -std::vector get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, - const std::vector& compressed_locs, - float rlim, - int num_layers) { - std::vector search_ranges(num_layers, t_bb()); - for (int layer_num = 0; layer_num < num_layers; ++layer_num) { - const auto& layer_loc = compressed_locs[layer_num]; - //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion - if (layer_loc.x == OPEN || layer_loc.y == OPEN || layer_loc.layer_num == OPEN) { - //No valid compressed location for this layer - continue; - } - int rlim_x_max_range = std::min((int)compressed_block_grid.get_num_columns(layer_num), rlim); - int rlim_y_max_range = std::min((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ +t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, + const t_physical_tile_loc& compressed_loc, + float rlim) { + t_bb search_ranges; + int layer_num = compressed_loc.layer_num; + VTR_ASSERT(compressed_loc.x != OPEN && compressed_loc.y != OPEN && compressed_loc.layer_num != OPEN); - search_ranges[layer_num].xmin = std::max(0, layer_loc.x - rlim_x_max_range); - search_ranges[layer_num].xmax = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, layer_loc.x + rlim_x_max_range); + int rlim_x_max_range = std::min((int)compressed_block_grid.get_num_columns(layer_num), rlim); + int rlim_y_max_range = std::min((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ - search_ranges[layer_num].ymin = std::max(0, layer_loc.y - rlim_y_max_range); - search_ranges[layer_num].ymax = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, layer_loc.y + rlim_y_max_range); - } + search_ranges.xmin = std::max(0, compressed_loc.x - rlim_x_max_range); + search_ranges.xmax = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, compressed_loc.x + rlim_x_max_range); + + search_ranges.ymin = std::max(0, compressed_loc.y - rlim_y_max_range); + search_ranges.ymax = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, compressed_loc.y + rlim_y_max_range); + + search_ranges.layer_min = compressed_loc.layer_num; + search_ranges.layer_max = compressed_loc.layer_num; return search_ranges; } -std::vector get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, - const std::vector& from_compressed_loc, - const std::vector& target_compressed_loc, - float rlim, - int num_layers) { - std::vector search_range(num_layers, t_bb()); +t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, + const t_physical_tile_loc& from_compressed_loc, + const t_physical_tile_loc& target_compressed_loc, + float rlim) { + t_bb search_range; int min_cx, max_cx, min_cy, max_cy; - for (int layer_num = 0; layer_num < num_layers; layer_num++) { - //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion - if (from_compressed_loc[layer_num].x == OPEN || from_compressed_loc[layer_num].y == OPEN || from_compressed_loc[layer_num].layer_num == OPEN) { - continue; - } - VTR_ASSERT(from_compressed_loc[layer_num].layer_num == layer_num); - VTR_ASSERT(target_compressed_loc[layer_num].layer_num == layer_num); + //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion + VTR_ASSERT(from_compressed_loc.x != OPEN && from_compressed_loc.y != OPEN && from_compressed_loc.layer_num != OPEN); + VTR_ASSERT(target_compressed_loc.x != OPEN && target_compressed_loc.y != OPEN && target_compressed_loc.layer_num != OPEN); - int rlim_x_max_range = std::min(compressed_block_grid.get_num_columns(layer_num), rlim); - int rlim_y_max_range = std::min(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ + int layer_num = target_compressed_loc.layer_num; + int rlim_x_max_range = std::min(compressed_block_grid.get_num_columns(layer_num), rlim); + int rlim_y_max_range = std::min(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ - int cx_from = from_compressed_loc[layer_num].x; - int cy_from = from_compressed_loc[layer_num].y; - if (cx_from == OPEN || cy_from == OPEN) { - continue; - } - - int cx_centroid = target_compressed_loc[layer_num].x; - int cy_centroid = target_compressed_loc[layer_num].y; + int cx_from = from_compressed_loc.x; + int cy_from = from_compressed_loc.y; - if (cx_centroid < cx_from) { - min_cx = std::max(0, cx_from - rlim_x_max_range); - max_cx = cx_from; - } else { - min_cx = cx_from; - max_cx = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range); - } - if (cy_centroid < cy_from) { - min_cy = std::max(0, cy_from - rlim_y_max_range); - max_cy = cy_from; - } else { - min_cy = cy_from; - max_cy = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range); - } + int cx_centroid = target_compressed_loc.x; + int cy_centroid = target_compressed_loc.y; - search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy); + if (cx_centroid < cx_from) { + min_cx = std::max(0, cx_from - rlim_x_max_range); + max_cx = cx_from; + } else { + min_cx = cx_from; + max_cx = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range); + } + if (cy_centroid < cy_from) { + min_cy = std::max(0, cy_from - rlim_y_max_range); + max_cy = cy_from; + } else { + min_cy = cy_from; + max_cy = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range); } + search_range = t_bb(min_cx, max_cx, min_cy, max_cy, layer_num, layer_num); + return search_range; } @@ -1299,3 +1290,150 @@ std::string e_move_result_to_string(e_move_result move_outcome) { std::string move_result_to_string[] = {"Rejected", "Accepted", "Aborted"}; return move_result_to_string[move_outcome]; } + +int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& place_ctx = g_vpr_ctx.placement(); + + // TODO: Compatible layer vector should be shuffled first, and then iterated through + int free_layer = loc.layer; + VTR_ASSERT(loc.layer != OPEN); + if (device_ctx.grid.get_num_layers() > 1) { + const auto& compatible_layers = place_ctx.compressed_block_grids[logical_block->index].get_layer_nums(); + if (compatible_layers.size() > 1) { + if (place_ctx.grid_blocks.block_at_location(loc) != EMPTY_BLOCK_ID) { + for (const auto& layer : compatible_layers) { + if (layer != free_layer) { + if (place_ctx.grid_blocks.block_at_location(loc) == EMPTY_BLOCK_ID) { + free_layer = layer; + break; + } + } + } + } + } + } + + return free_layer; +} + +int get_random_layer(t_logical_block_type_ptr logical_block) { + const auto& compatible_layers = g_vpr_ctx.placement().compressed_block_grids[logical_block->index].get_layer_nums(); + VTR_ASSERT(!compatible_layers.empty()); + int layer_num = OPEN; + if (compatible_layers.size() == 1) { + layer_num = compatible_layers[0]; + } else { + layer_num = compatible_layers[vtr::irand(compatible_layers.size() - 1)]; + } + + return layer_num; +} + +t_bb union_2d_bb(const std::vector& bb_vec) { + t_bb merged_bb; + + // Not all 2d_bbs are valid. Thus, if one of the coordinates in the 2D_bb is not valid (equal to OPEN), + // we need to skip it. + for (const auto& layer_bb : bb_vec) { + if (layer_bb.xmin == OPEN) { + VTR_ASSERT_SAFE(layer_bb.xmax == OPEN); + VTR_ASSERT_SAFE(layer_bb.ymin == OPEN); + VTR_ASSERT_SAFE(layer_bb.ymax == OPEN); + VTR_ASSERT_SAFE(layer_bb.layer_num == OPEN); + continue; + } + if (merged_bb.xmin == OPEN || layer_bb.xmin < merged_bb.xmin) { + merged_bb.xmin = layer_bb.xmin; + } + if (merged_bb.xmax == OPEN || layer_bb.xmax > merged_bb.xmax) { + merged_bb.xmax = layer_bb.xmax; + } + if (merged_bb.ymin == OPEN || layer_bb.ymin < merged_bb.ymin) { + merged_bb.ymin = layer_bb.ymin; + } + if (merged_bb.ymax == OPEN || layer_bb.ymax > merged_bb.ymax) { + merged_bb.ymax = layer_bb.ymax; + } + if (merged_bb.layer_min == OPEN || layer_bb.layer_num < merged_bb.layer_min) { + merged_bb.layer_min = layer_bb.layer_num; + } + if (merged_bb.layer_max == OPEN || layer_bb.layer_num > merged_bb.layer_max) { + merged_bb.layer_max = layer_bb.layer_num; + } + } + + return merged_bb; +} + +std::pair union_2d_bb_incr(const std::vector& num_edge_vec, + const std::vector& bb_vec) { + t_bb merged_num_edge; + t_bb merged_bb; + + for (const auto& layer_bb : bb_vec) { + if (layer_bb.xmin == OPEN) { + VTR_ASSERT_SAFE(layer_bb.xmax == OPEN); + VTR_ASSERT_SAFE(layer_bb.ymin == OPEN); + VTR_ASSERT_SAFE(layer_bb.ymax == OPEN); + VTR_ASSERT_SAFE(layer_bb.layer_num == OPEN); + continue; + } + if (merged_bb.xmin == OPEN || layer_bb.xmin <= merged_bb.xmin) { + if (layer_bb.xmin == merged_bb.xmin) { + VTR_ASSERT_SAFE(merged_num_edge.xmin != OPEN); + merged_num_edge.xmin += num_edge_vec[layer_bb.layer_num].xmin; + } else { + merged_num_edge.xmin = num_edge_vec[layer_bb.layer_num].xmin; + } + merged_bb.xmin = layer_bb.xmin; + } + if (merged_bb.xmax == OPEN || layer_bb.xmax >= merged_bb.xmax) { + if (layer_bb.xmax == merged_bb.xmax) { + VTR_ASSERT_SAFE(merged_num_edge.xmax != OPEN); + merged_num_edge.xmax += num_edge_vec[layer_bb.layer_num].xmax; + } else { + merged_num_edge.xmax = num_edge_vec[layer_bb.layer_num].xmax; + } + merged_bb.xmax = layer_bb.xmax; + } + if (merged_bb.ymin == OPEN || layer_bb.ymin <= merged_bb.ymin) { + if (layer_bb.ymin == merged_bb.ymin) { + VTR_ASSERT_SAFE(merged_num_edge.ymin != OPEN); + merged_num_edge.ymin += num_edge_vec[layer_bb.layer_num].ymin; + } else { + merged_num_edge.ymin = num_edge_vec[layer_bb.layer_num].ymin; + } + merged_bb.ymin = layer_bb.ymin; + } + if (merged_bb.ymax == OPEN || layer_bb.ymax >= merged_bb.ymax) { + if (layer_bb.ymax == merged_bb.ymax) { + VTR_ASSERT_SAFE(merged_num_edge.ymax != OPEN); + merged_num_edge.ymax += num_edge_vec[layer_bb.layer_num].ymax; + } else { + merged_num_edge.ymax = num_edge_vec[layer_bb.layer_num].ymax; + } + merged_bb.ymax = layer_bb.ymax; + } + if (merged_bb.layer_min == OPEN || layer_bb.layer_num <= merged_bb.layer_min) { + if (layer_bb.layer_num == merged_bb.layer_min) { + VTR_ASSERT_SAFE(merged_num_edge.layer_min != OPEN); + merged_num_edge.layer_min += num_edge_vec[layer_bb.layer_num].layer_num; + } else { + merged_num_edge.layer_min = num_edge_vec[layer_bb.layer_num].layer_num; + } + merged_bb.layer_min = layer_bb.layer_num; + } + if (merged_bb.layer_max == OPEN || layer_bb.layer_num >= merged_bb.layer_max) { + if (layer_bb.layer_num == merged_bb.layer_max) { + VTR_ASSERT_SAFE(merged_num_edge.layer_max != OPEN); + merged_num_edge.layer_max += num_edge_vec[layer_bb.layer_num].layer_num; + } else { + merged_num_edge.layer_max = num_edge_vec[layer_bb.layer_num].layer_num; + } + merged_bb.layer_max = layer_bb.layer_num; + } + } + + return std::make_pair(merged_num_edge, merged_bb); +} diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 72b53b6b02a..f9369acd4f7 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -275,13 +275,11 @@ std::vector get_compressed_loc_approx(const t_compressed_bl * @param compressed_block_grid * @param compressed_locs * @param rlim - * @param num_layers * @return A compressed search range for each layer */ -std::vector get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, - const std::vector& compressed_locs, - float rlim, - int num_layers); +t_bb get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, + const t_physical_tile_loc& compressed_locs, + float rlim); /** * @brief This function calculates the search range based on the given rlim value and the number of columns/rows @@ -294,14 +292,12 @@ std::vector get_compressed_grid_target_search_range(const t_compressed_blo * @param from_compressed_loc * @param target_compressed_loc * @param rlim - * @param num_layers * @return */ -std::vector get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, - const std::vector& from_compressed_loc, - const std::vector& target_compressed_loc, - float rlim, - int num_layers); +t_bb get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, + const t_physical_tile_loc& from_compressed_loc, + const t_physical_tile_loc& target_compressed_loc, + float rlim); /* * If the block to be moved (b_from) has a floorplan constraint, this routine changes the max and min coords @@ -328,6 +324,37 @@ bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr t std::string e_move_result_to_string(e_move_result move_outcome); +/** + * @brif Iterate over all layers that have a physical tile at the x-y location specified by "loc" that can accomodate "logical_block". + * If the location in the layer specified by "layer_num" is empty, return that layer. Otherwise, + * return a layer that is not occupied at that location. If there isn't any, again, return the layer of loc. + * + * @param logical_block + * @param loc + * @return + */ +int find_free_layer(t_logical_block_type_ptr logical_block, const t_pl_loc& loc); + +int get_random_layer(t_logical_block_type_ptr logical_block); + +/** + * @brief Iterate over all layers and get the maximum x and y over that layers that have a valid value. set the layer min and max + * based on the layers that have a valid BB. + * @param tbb_vec + * @return 3D bounding box + */ +t_bb union_2d_bb(const std::vector& tbb_vec); + +/** + * @brief Iterate over all layers and get the maximum x and y over that layers that have a valid value. Create the "num_edge" in a similar way. This data structure + * stores how many blocks are on each edge of the BB. set the layer min and max based on the layers that have a valid BB. + * @param num_edge_vec + * @param bb_vec + * @return num_edge, 3D bb + */ +std::pair union_2d_bb_incr(const std::vector& num_edge_vec, + const std::vector& bb_vec); + #ifdef VTR_ENABLE_DEBUG_LOGGING /** * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp index 5e68e7d1462..17d96dd3677 100644 --- a/vpr/src/place/noc_place_utils.cpp +++ b/vpr/src/place/noc_place_utils.cpp @@ -495,7 +495,7 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa // now choose a compatible block to swap with t_pl_loc to; - + to.layer = from.layer; if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 52e4e5e6ff1..51dfce9ee32 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -148,7 +148,9 @@ static vtr::NdMatrix chany_place_cost_fac({0, 0}); //[0...device_ctx.g /* The following arrays are used by the try_swap function for speed. */ /* [0...cluster_ctx.clb_nlist.nets().size()-1] */ -static vtr::vector ts_bb_coord_new, ts_bb_edge_new; +static vtr::vector ts_bb_edge_new, ts_bb_coord_new; +static vtr::vector> layer_ts_bb_edge_new, layer_ts_bb_coord_new; +static vtr::Matrix ts_layer_sink_pin_count; static std::vector ts_nets_to_update; /* These file-scoped variables keep track of the number of swaps * @@ -252,13 +254,22 @@ std::unique_ptr f_move_stats_file(nullptr, void print_clb_placement(const char* fname); #endif +/** + * @brief determine the type of the bounding box used by the placer to predict the wirelength + * + * @param place_bb_mode The bounding box mode passed by the CLI + * @param rr_graph The routing resource graph + */ +static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, + const RRGraphView& rr_graph); + static void alloc_and_load_placement_structs(float place_cost_exp, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, t_direct_inf* directs, int num_directs); -static void alloc_and_load_try_swap_structs(); +static void alloc_and_load_try_swap_structs(const bool cube_bb); static void free_try_swap_structs(); static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); @@ -269,7 +280,11 @@ static void free_fast_cost_update(); static double comp_bb_cost(e_cost_methods method); -static void update_move_nets(int num_nets_affected); +static double comp_layer_bb_cost(e_cost_methods method); + +static void update_move_nets(int num_nets_affected, + const bool cube_bb); + static void reset_move_nets(int num_nets_affected); static e_move_result try_swap(const t_annealing_state* state, @@ -304,7 +319,20 @@ static int check_placement_consistency(); static int check_block_placement_consistency(); static int check_macro_placement_consistency(); -static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t_annealing_sched annealing_sched, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, SetupTimingInfo* timing_info, MoveGenerator& move_generator, ManualMoveGenerator& manual_move_generator, NetPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, MoveTypeStat& move_type_stat); +static float starting_t(const t_annealing_state* state, + t_placer_costs* costs, + t_annealing_sched annealing_sched, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + MoveGenerator& move_generator, + ManualMoveGenerator& manual_move_generator, + NetPinTimingInvalidator* pin_timing_invalidator, + t_pl_blocks_to_be_moved& blocks_affected, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + MoveTypeStat& move_type_stat); static int count_connections(); @@ -326,9 +354,69 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); static e_move_result assess_swap(double delta_c, double t); -static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new); - -static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew); +static void get_non_updateable_bb(ClusterNetId net_id, + t_bb& bb_coord_new, + vtr::NdMatrixProxy num_sink_pin_layer); + +static void get_non_updateable_layer_bb(ClusterNetId net_id, + std::vector& bb_coord_new, + vtr::NdMatrixProxy num_sink_layer); + +static void update_bb(ClusterNetId net_id, + t_bb& bb_edge_new, + t_bb& bb_coord_new, + vtr::NdMatrixProxy num_sink_pin_layer_new, + t_physical_tile_loc pin_old_loc, + t_physical_tile_loc pin_new_loc, + bool src_pin); + +static void update_layer_bb(ClusterNetId net_id, + std::vector& bb_edge_new, + std::vector& bb_coord_new, + vtr::NdMatrixProxy bb_pin_sink_count_new, + t_physical_tile_loc pin_old_loc, + t_physical_tile_loc pin_new_loc, + bool is_output_pin); + +static inline void update_bb_same_layer(ClusterNetId net_id, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const std::vector& curr_bb_edge, + const std::vector& curr_bb_coord, + vtr::NdMatrixProxy bb_pin_sink_count_new, + std::vector& bb_edge_new, + std::vector& bb_coord_new); + +static inline void update_bb_layer_changed(ClusterNetId net_id, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const std::vector& curr_bb_edge, + const std::vector& curr_bb_coord, + vtr::NdMatrixProxy bb_pin_sink_count_new, + std::vector& bb_edge_new, + std::vector& bb_coord_new); + +static void update_bb_pin_sink_count(ClusterNetId net_id, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const vtr::NdMatrixProxy curr_layer_pin_sink_count, + vtr::NdMatrixProxy bb_pin_sink_count_new, + bool is_output_pin); + +static inline void update_bb_edge(ClusterNetId net_id, + std::vector& bb_edge_new, + std::vector& bb_coord_new, + vtr::NdMatrixProxy bb_layer_pin_sink_count, + const int& old_num_block_on_edge, + const int& old_edge_coord, + int& new_num_block_on_edge, + int& new_edge_coord); + +static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc, + const t_2D_bb& bb_edge_old, + const t_2D_bb& bb_coord_old, + t_2D_bb& bb_edge_new, + t_2D_bb& bb_coord_new); static int find_affected_nets_and_update_costs( const t_place_algorithm& place_algorithm, @@ -345,6 +433,13 @@ static void update_net_bb(const ClusterNetId net, int iblk, const ClusterBlockId blk, const ClusterPinId blk_pin); + +static void update_net_layer_bb(const ClusterNetId net, + const t_pl_blocks_to_be_moved& blocks_affected, + int iblk, + const ClusterBlockId blk, + const ClusterPinId blk_pin); + static void update_td_delta_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& criticalities, const ClusterNetId net, @@ -356,11 +451,27 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, c static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); -static double get_net_cost(ClusterNetId net_id, t_bb* bb_ptr); +static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr); + +static double get_net_layer_cost(ClusterNetId /* net_id */, + const std::vector& bbptr, + const vtr::NdMatrixProxy layer_pin_sink_count); -static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges); +static void get_bb_from_scratch(ClusterNetId net_id, + t_bb& coords, + t_bb& num_on_edges, + vtr::NdMatrixProxy num_sink_pin_layer); -static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr); +static void get_layer_bb_from_scratch(ClusterNetId net_id, + std::vector& num_on_edges, + std::vector& coords, + vtr::NdMatrixProxy layer_pin_sink_count); + +static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr); + +static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */, + const std::vector& bbptr, + const vtr::NdMatrixProxy layer_pin_sink_count); static void free_try_swap_arrays(); @@ -508,6 +619,7 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_delay_model(net_list, + device_ctx.arch_switch_inf, chan_width_dist, placer_opts, router_opts, @@ -523,6 +635,14 @@ void try_place(const Netlist<>& net_list, } } + g_vpr_ctx.mutable_placement().cube_bb = is_cube_bb(placer_opts.place_bounding_box_mode, + device_ctx.rr_graph); + const auto& cube_bb = g_vpr_ctx.placement().cube_bb; + + VTR_LOG("\n"); + VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); + VTR_LOG("\n"); + int move_lim = 1; move_lim = (int)(annealing_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); @@ -549,6 +669,12 @@ void try_place(const Netlist<>& net_list, placer_opts.constraints_file.c_str(), noc_opts.noc); + if (!placer_opts.write_initial_place_file.empty()) { + print_place(nullptr, + nullptr, + (placer_opts.write_initial_place_file + ".init.place").c_str()); + } + #ifdef ENABLE_ANALYTIC_PLACE /* * Analytic Placer: @@ -578,7 +704,12 @@ void try_place(const Netlist<>& net_list, /* Gets initial cost and loads bounding boxes. */ if (placer_opts.place_algorithm.is_timing_driven()) { - costs.bb_cost = comp_bb_cost(NORMAL); + if (cube_bb) { + costs.bb_cost = comp_bb_cost(NORMAL); + } else { + VTR_ASSERT_SAFE(!cube_bb); + costs.bb_cost = comp_layer_bb_cost(NORMAL); + } first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ @@ -658,7 +789,12 @@ void try_place(const Netlist<>& net_list, VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE); /* Total cost is the same as wirelength cost normalized*/ - costs.bb_cost = comp_bb_cost(NORMAL); + if (cube_bb) { + costs.bb_cost = comp_bb_cost(NORMAL); + } else { + VTR_ASSERT_SAFE(!cube_bb); + costs.bb_cost = comp_layer_bb_cost(NORMAL); + } costs.bb_cost_norm = 1 / costs.bb_cost; /* Timing cost and normalization factors are not used */ @@ -684,8 +820,11 @@ void try_place(const Netlist<>& net_list, costs.cost = get_total_cost(&costs, placer_opts, noc_opts); //Sanity check that initial placement is legal - check_place(costs, place_delay_model.get(), placer_criticalities.get(), - placer_opts.place_algorithm, noc_opts); + check_place(costs, + place_delay_model.get(), + placer_criticalities.get(), + placer_opts.place_algorithm, + noc_opts); //Initial pacement statistics VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, @@ -985,8 +1124,11 @@ void try_place(const Netlist<>& net_list, place_sync_external_block_connections(block_id); } - check_place(costs, place_delay_model.get(), placer_criticalities.get(), - placer_opts.place_algorithm, noc_opts); + check_place(costs, + place_delay_model.get(), + placer_criticalities.get(), + placer_opts.place_algorithm, + noc_opts); //Some stats VTR_LOG("\n"); @@ -1134,7 +1276,8 @@ static void placement_inner_loop(const t_annealing_state* state, e_move_result swap_result = try_swap(state, costs, move_generator, manual_move_generator, timing_info, pin_timing_invalidator, blocks_affected, delay_model, criticalities, setup_slacks, - placer_opts, noc_opts, move_type_stat, place_algorithm, timing_bb_factor, manual_move_enabled); + placer_opts, noc_opts, move_type_stat, place_algorithm, + timing_bb_factor, manual_move_enabled); if (swap_result == ACCEPTED) { /* Move was accepted. Update statistics that are useful for the annealing schedule. */ @@ -1292,7 +1435,20 @@ static int count_connections() { } ///@brief Find the starting temperature for the annealing loop. -static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t_annealing_sched annealing_sched, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, SetupTimingInfo* timing_info, MoveGenerator& move_generator, ManualMoveGenerator& manual_move_generator, NetPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, MoveTypeStat& move_type_stat) { +static float starting_t(const t_annealing_state* state, + t_placer_costs* costs, + t_annealing_sched annealing_sched, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + MoveGenerator& move_generator, + ManualMoveGenerator& manual_move_generator, + NetPinTimingInvalidator* pin_timing_invalidator, + t_pl_blocks_to_be_moved& blocks_affected, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + MoveTypeStat& move_type_stat) { if (annealing_sched.type == USER_SCHED) { return (annealing_sched.init_t); } @@ -1370,7 +1526,8 @@ static float starting_t(const t_annealing_state* state, t_placer_costs* costs, t return init_temp; } -static void update_move_nets(int num_nets_affected) { +static void update_move_nets(int num_nets_affected, + const bool cube_bb) { /* update net cost functions and reset flags. */ auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_move_ctx = g_placer_ctx.mutable_move(); @@ -1379,9 +1536,23 @@ static void update_move_nets(int num_nets_affected) { inet_affected++) { ClusterNetId net_id = ts_nets_to_update[inet_affected]; - place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id]; - if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) - place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id]; + if (cube_bb) { + place_move_ctx.bb_coords[net_id] = ts_bb_coord_new[net_id]; + } else { + place_move_ctx.layer_bb_coords[net_id] = layer_ts_bb_coord_new[net_id]; + } + + for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) { + place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num] = ts_layer_sink_pin_count[size_t(net_id)][layer_num]; + } + + if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET) { + if (cube_bb) { + place_move_ctx.bb_num_on_edges[net_id] = ts_bb_edge_new[net_id]; + } else { + place_move_ctx.layer_bb_num_on_edges[net_id] = layer_ts_bb_edge_new[net_id]; + } + } net_cost[net_id] = proposed_net_cost[net_id]; @@ -1637,7 +1808,8 @@ static e_move_result try_swap(const t_annealing_state* state, } /* Update net cost functions and reset flags. */ - update_move_nets(num_nets_affected); + update_move_nets(num_nets_affected, + g_vpr_ctx.placement().cube_bb); /* Update clb data structures since we kept the move. */ commit_move_blocks(blocks_affected); @@ -1746,6 +1918,37 @@ static e_move_result try_swap(const t_annealing_state* state, return move_outcome; } +static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, + const RRGraphView& rr_graph) { + bool cube_bb; + const int number_layers = g_vpr_ctx.device().grid.get_num_layers(); + + // If the FPGA has only layer, then we can only use cube bounding box + if (number_layers == 1) { + cube_bb = true; + } else { + VTR_ASSERT(number_layers > 1); + if (place_bb_mode == AUTO_BB) { + // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not + // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen. + if (inter_layer_connections_limited_to_opin(rr_graph)) { + cube_bb = false; + } else { + cube_bb = true; + } + } else if (place_bb_mode == CUBE_BB) { + // The user has specifically asked for CUBE_BB + cube_bb = true; + } else { + // The user has specifically asked for PER_LAYER_BB + VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB); + cube_bb = false; + } + } + + return cube_bb; +} + /** * @brief Find all the nets and pins affected by this swap and update costs. * @@ -1781,6 +1984,8 @@ static int find_affected_nets_and_update_costs( int num_affected_nets = 0; + const auto& cube_bb = g_vpr_ctx.placement().cube_bb; + /* Go through all the blocks moved. */ for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num; @@ -1800,7 +2005,11 @@ static int find_affected_nets_and_update_costs( record_affected_net(net_id, num_affected_nets); /* Update the net bounding boxes. */ - update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); + if (cube_bb) { + update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); + } else { + update_net_layer_bb(net_id, blocks_affected, iblk, blk, blk_pin); + } if (place_algorithm.is_timing_driven()) { /* Determine the change in connection delay and timing cost. */ @@ -1816,8 +2025,15 @@ static int find_affected_nets_and_update_costs( inet_affected++) { ClusterNetId net_id = ts_nets_to_update[inet_affected]; - proposed_net_cost[net_id] = get_net_cost(net_id, - &ts_bb_coord_new[net_id]); + if (cube_bb) { + proposed_net_cost[net_id] = get_net_cost(net_id, + ts_bb_coord_new[net_id]); + } else { + proposed_net_cost[net_id] = get_net_layer_cost(net_id, + layer_ts_bb_coord_new[net_id], + ts_layer_sink_pin_count[size_t(net_id)]); + } + bb_delta_c += proposed_net_cost[net_id] - net_cost[net_id]; } @@ -1855,24 +2071,78 @@ static void update_net_bb(const ClusterNetId net, //For small nets brute-force bounding box update is faster if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net - get_non_updateable_bb(net, &ts_bb_coord_new[net]); + get_non_updateable_bb(net, + ts_bb_coord_new[net], + ts_layer_sink_pin_count[size_t(net)]); } } else { //For large nets, update bounding box incrementally int iblk_pin = tile_pin_index(blk_pin); + bool src_pin = cluster_ctx.clb_nlist.pin_type(blk_pin) == PinType::DRIVER; t_physical_tile_type_ptr blk_type = physical_tile_type(blk); int pin_width_offset = blk_type->pin_width_offset[iblk_pin]; int pin_height_offset = blk_type->pin_height_offset[iblk_pin]; //Incremental bounding box update - update_bb(net, &ts_bb_coord_new[net], &ts_bb_edge_new[net], - blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset, - blocks_affected.moved_blocks[iblk].old_loc.y - + pin_height_offset, - blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset, - blocks_affected.moved_blocks[iblk].new_loc.y - + pin_height_offset); + t_physical_tile_loc pin_old_loc( + blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset, + blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset, + blocks_affected.moved_blocks[iblk].old_loc.layer); + t_physical_tile_loc pin_new_loc( + blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset, + blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset, + blocks_affected.moved_blocks[iblk].new_loc.layer); + update_bb(net, + ts_bb_edge_new[net], + ts_bb_coord_new[net], + ts_layer_sink_pin_count[size_t(net)], + pin_old_loc, + pin_new_loc, + src_pin); + } +} + +static void update_net_layer_bb(const ClusterNetId net, + const t_pl_blocks_to_be_moved& blocks_affected, + int iblk, + const ClusterBlockId blk, + const ClusterPinId blk_pin) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + + if (cluster_ctx.clb_nlist.net_sinks(net).size() < SMALL_NET) { + //For small nets brute-force bounding box update is faster + + if (bb_updated_before[net] == NOT_UPDATED_YET) { //Only once per-net + get_non_updateable_layer_bb(net, + layer_ts_bb_coord_new[net], + ts_layer_sink_pin_count[size_t(net)]); + } + } else { + //For large nets, update bounding box incrementally + int iblk_pin = tile_pin_index(blk_pin); + + t_physical_tile_type_ptr blk_type = physical_tile_type(blk); + int pin_width_offset = blk_type->pin_width_offset[iblk_pin]; + int pin_height_offset = blk_type->pin_height_offset[iblk_pin]; + + //Incremental bounding box update + t_physical_tile_loc pin_old_loc( + blocks_affected.moved_blocks[iblk].old_loc.x + pin_width_offset, + blocks_affected.moved_blocks[iblk].old_loc.y + pin_height_offset, + blocks_affected.moved_blocks[iblk].old_loc.layer); + t_physical_tile_loc pin_new_loc( + blocks_affected.moved_blocks[iblk].new_loc.x + pin_width_offset, + blocks_affected.moved_blocks[iblk].new_loc.y + pin_height_offset, + blocks_affected.moved_blocks[iblk].new_loc.layer); + auto pin_dir = get_pin_type_from_pin_physical_num(blk_type, iblk_pin); + update_layer_bb(net, + layer_ts_bb_edge_new[net], + layer_ts_bb_coord_new[net], + ts_layer_sink_pin_count[size_t(net)], + pin_old_loc, + pin_new_loc, + pin_dir == e_pin_type::DRIVER); } } @@ -2237,19 +2507,61 @@ static double comp_bb_cost(e_cost_methods method) { * so they can use a fast bounding box calculator. */ if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == NORMAL) { - get_bb_from_scratch(net_id, &place_move_ctx.bb_coords[net_id], - &place_move_ctx.bb_num_on_edges[net_id]); + get_bb_from_scratch(net_id, + place_move_ctx.bb_coords[net_id], + place_move_ctx.bb_num_on_edges[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); } else { get_non_updateable_bb(net_id, - &place_move_ctx.bb_coords[net_id]); + place_move_ctx.bb_coords[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); } - net_cost[net_id] = get_net_cost(net_id, - &place_move_ctx.bb_coords[net_id]); + net_cost[net_id] = get_net_cost(net_id, place_move_ctx.bb_coords[net_id]); cost += net_cost[net_id]; if (method == CHECK) - expected_wirelength += get_net_wirelength_estimate(net_id, - &place_move_ctx.bb_coords[net_id]); + expected_wirelength += get_net_wirelength_estimate(net_id, place_move_ctx.bb_coords[net_id]); + } + } + + if (method == CHECK) { + VTR_LOG("\n"); + VTR_LOG("BB estimate of min-dist (placement) wire length: %.0f\n", + expected_wirelength); + } + return cost; +} + +static double comp_layer_bb_cost(e_cost_methods method) { + double cost = 0; + double expected_wirelength = 0.0; + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_move_ctx = g_placer_ctx.mutable_move(); + + for (auto net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ + /* Small nets don't use incremental updating on their bounding boxes, * + * so they can use a fast bounding box calculator. */ + if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET + && method == NORMAL) { + get_layer_bb_from_scratch(net_id, + place_move_ctx.layer_bb_num_on_edges[net_id], + place_move_ctx.layer_bb_coords[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); + } else { + get_non_updateable_layer_bb(net_id, + place_move_ctx.layer_bb_coords[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); + } + + net_cost[net_id] = get_net_layer_cost(net_id, + place_move_ctx.layer_bb_coords[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); + cost += net_cost[net_id]; + if (method == CHECK) + expected_wirelength += get_net_layer_wirelength_estimate(net_id, + place_move_ctx.layer_bb_coords[net_id], + place_move_ctx.num_sink_pin_layer[size_t(net_id)]); } } @@ -2275,11 +2587,15 @@ static void alloc_and_load_placement_structs(float place_cost_exp, const auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& cube_bb = place_ctx.cube_bb; + auto& p_timing_ctx = g_placer_ctx.mutable_timing(); auto& place_move_ctx = g_placer_ctx.mutable_move(); size_t num_nets = cluster_ctx.clb_nlist.nets().size(); + const int num_layers = device_ctx.grid.get_num_layers(); + init_placement_context(); max_pins_per_clb = 0; @@ -2323,8 +2639,21 @@ static void alloc_and_load_placement_structs(float place_cost_exp, net_cost.resize(num_nets, -1.); proposed_net_cost.resize(num_nets, -1.); - place_move_ctx.bb_coords.resize(num_nets, t_bb()); - place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb()); + + if (cube_bb) { + place_move_ctx.bb_coords.resize(num_nets, t_bb()); + place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb()); + } else { + VTR_ASSERT_SAFE(!cube_bb); + place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); + place_move_ctx.layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); + } + + place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); + for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) { + auto& elem = ts_layer_sink_pin_count.get(flat_idx); + elem = OPEN; + } /* Used to store costs for moves not yet made and to indicate when a net's * * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't * @@ -2333,7 +2662,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, alloc_and_load_for_fast_cost_update(place_cost_exp); - alloc_and_load_try_swap_structs(); + alloc_and_load_try_swap_structs(cube_bb); place_ctx.pl_macros = alloc_and_load_placement_macros(directs, num_directs); @@ -2362,8 +2691,13 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc vtr::release_memory(net_cost); vtr::release_memory(proposed_net_cost); - vtr::release_memory(place_move_ctx.bb_coords); vtr::release_memory(place_move_ctx.bb_num_on_edges); + vtr::release_memory(place_move_ctx.bb_coords); + + vtr::release_memory(place_move_ctx.layer_bb_num_on_edges); + vtr::release_memory(place_move_ctx.layer_bb_coords); + + place_move_ctx.num_sink_pin_layer.clear(); vtr::release_memory(bb_updated_before); @@ -2376,15 +2710,30 @@ static void free_placement_structs(const t_placer_opts& placer_opts, const t_noc } } -static void alloc_and_load_try_swap_structs() { +static void alloc_and_load_try_swap_structs(const bool cube_bb) { /* Allocate the local bb_coordinate storage, etc. only once. */ /* Allocate with size cluster_ctx.clb_nlist.nets().size() for any number of nets affected. */ auto& cluster_ctx = g_vpr_ctx.clustering(); size_t num_nets = cluster_ctx.clb_nlist.nets().size(); - ts_bb_coord_new.resize(num_nets, t_bb()); - ts_bb_edge_new.resize(num_nets, t_bb()); + const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + + if (cube_bb) { + ts_bb_edge_new.resize(num_nets, t_bb()); + ts_bb_coord_new.resize(num_nets, t_bb()); + } else { + VTR_ASSERT_SAFE(!cube_bb); + layer_ts_bb_edge_new.resize(num_nets, std::vector(num_layers, t_2D_bb())); + layer_ts_bb_coord_new.resize(num_nets, std::vector(num_layers, t_2D_bb())); + } + + ts_layer_sink_pin_count.resize({num_nets, size_t(num_layers)}); + for (size_t flat_idx = 0; flat_idx < ts_layer_sink_pin_count.size(); flat_idx++) { + auto& elem = ts_layer_sink_pin_count.get(flat_idx); + elem = OPEN; + } + ts_nets_to_update.resize(num_nets, ClusterNetId::INVALID()); auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -2392,8 +2741,11 @@ static void alloc_and_load_try_swap_structs() { } static void free_try_swap_structs() { - vtr::release_memory(ts_bb_coord_new); vtr::release_memory(ts_bb_edge_new); + vtr::release_memory(ts_bb_coord_new); + vtr::release_memory(layer_ts_bb_edge_new); + vtr::release_memory(layer_ts_bb_coord_new); + ts_layer_sink_pin_count.clear(); vtr::release_memory(ts_nets_to_update); auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -2404,8 +2756,11 @@ static void free_try_swap_structs() { * from only the block location information). It updates both the * * coordinate and number of pins on each edge information. It * * should only be called when the bounding box information is not valid. */ -static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_edges) { - int pnum, x, y, xmin, xmax, ymin, ymax; +static void get_bb_from_scratch(ClusterNetId net_id, + t_bb& coords, + t_bb& num_on_edges, + vtr::NdMatrixProxy num_sink_pin_layer) { + int pnum, x, y, pin_layer, xmin, xmax, ymin, ymax; int xmin_edge, xmax_edge, ymin_edge, ymax_edge; auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -2433,6 +2788,10 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_ xmax_edge = 1; ymax_edge = 1; + for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { + num_sink_pin_layer[layer_num] = 0; + } + for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); pnum = tile_pin_index(pin_id); @@ -2440,6 +2799,7 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_ + physical_tile_type(bnum)->pin_width_offset[pnum]; y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + pin_layer = place_ctx.block_locs[bnum].loc.layer; /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. * * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and * @@ -2476,19 +2836,134 @@ static void get_bb_from_scratch(ClusterNetId net_id, t_bb* coords, t_bb* num_on_ ymax = y; ymax_edge = 1; } + + num_sink_pin_layer[pin_layer]++; } /* Copy the coordinates and number on edges information into the proper * * structures. */ - coords->xmin = xmin; - coords->xmax = xmax; - coords->ymin = ymin; - coords->ymax = ymax; + coords.xmin = xmin; + coords.xmax = xmax; + coords.ymin = ymin; + coords.ymax = ymax; + + num_on_edges.xmin = xmin_edge; + num_on_edges.xmax = xmax_edge; + num_on_edges.ymin = ymin_edge; + num_on_edges.ymax = ymax_edge; +} + +/* This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. * + * from only the block location information). It updates the * + * coordinate, number of pins on each edge information, and the number of sinks on each layer. It * + * should only be called when the bounding box information is not valid. */ +static void get_layer_bb_from_scratch(ClusterNetId net_id, + std::vector& num_on_edges, + std::vector& coords, + vtr::NdMatrixProxy layer_pin_sink_count) { + auto& device_ctx = g_vpr_ctx.device(); + const int num_layers = device_ctx.grid.get_num_layers(); + std::vector xmin(num_layers, OPEN); + std::vector xmax(num_layers, OPEN); + std::vector ymin(num_layers, OPEN); + std::vector ymax(num_layers, OPEN); + std::vector xmin_edge(num_layers, OPEN); + std::vector xmax_edge(num_layers, OPEN); + std::vector ymin_edge(num_layers, OPEN); + std::vector ymax_edge(num_layers, OPEN); + + std::vector num_sink_pin_layer(num_layers, 0); + + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + auto& grid = device_ctx.grid; - num_on_edges->xmin = xmin_edge; - num_on_edges->xmax = xmax_edge; - num_on_edges->ymin = ymin_edge; - num_on_edges->ymax = ymax_edge; + ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); + int pnum_src = net_pin_to_tile_pin_index(net_id, 0); + VTR_ASSERT(pnum_src >= 0); + int x_src = place_ctx.block_locs[bnum].loc.x + + physical_tile_type(bnum)->pin_width_offset[pnum_src]; + int y_src = place_ctx.block_locs[bnum].loc.y + + physical_tile_type(bnum)->pin_height_offset[pnum_src]; + + x_src = max(min(x_src, grid.width() - 2), 1); + y_src = max(min(y_src, grid.height() - 2), 1); + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + xmin[layer_num] = x_src; + ymin[layer_num] = y_src; + xmax[layer_num] = x_src; + ymax[layer_num] = y_src; + xmin_edge[layer_num] = 1; + ymin_edge[layer_num] = 1; + xmax_edge[layer_num] = 1; + ymax_edge[layer_num] = 1; + } + + for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + bnum = cluster_ctx.clb_nlist.pin_block(pin_id); + int pnum = tile_pin_index(pin_id); + int layer = place_ctx.block_locs[bnum].loc.layer; + VTR_ASSERT(layer >= 0 && layer < num_layers); + num_sink_pin_layer[layer]++; + int x = place_ctx.block_locs[bnum].loc.x + + physical_tile_type(bnum)->pin_width_offset[pnum]; + int y = place_ctx.block_locs[bnum].loc.y + + physical_tile_type(bnum)->pin_height_offset[pnum]; + + /* Code below counts IO blocks as being within the 1..grid.width()-2, 1..grid.height()-2 clb array. * + * This is because channels do not go out of the 0..grid.width()-2, 0..grid.height()-2 range, and * + * I always take all channels impinging on the bounding box to be within * + * that bounding box. Hence, this "movement" of IO blocks does not affect * + * the which channels are included within the bounding box, and it * + * simplifies the code a lot. */ + + x = max(min(x, grid.width() - 2), 1); //-2 for no perim channels + y = max(min(y, grid.height() - 2), 1); //-2 for no perim channels + + if (x == xmin[layer]) { + xmin_edge[layer]++; + } + if (x == xmax[layer]) { /* Recall that xmin could equal xmax -- don't use else */ + xmax_edge[layer]++; + } else if (x < xmin[layer]) { + xmin[layer] = x; + xmin_edge[layer] = 1; + } else if (x > xmax[layer]) { + xmax[layer] = x; + xmax_edge[layer] = 1; + } + + if (y == ymin[layer]) { + ymin_edge[layer]++; + } + if (y == ymax[layer]) { + ymax_edge[layer]++; + } else if (y < ymin[layer]) { + ymin[layer] = y; + ymin_edge[layer] = 1; + } else if (y > ymax[layer]) { + ymax[layer] = y; + ymax_edge[layer] = 1; + } + } + + /* Copy the coordinates and number on edges information into the proper * + * structures. */ + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + layer_pin_sink_count[layer_num] = num_sink_pin_layer[layer_num]; + coords[layer_num].xmin = xmin[layer_num]; + coords[layer_num].xmax = xmax[layer_num]; + coords[layer_num].ymin = ymin[layer_num]; + coords[layer_num].ymax = ymax[layer_num]; + coords[layer_num].layer_num = layer_num; + + num_on_edges[layer_num].xmin = xmin_edge[layer_num]; + num_on_edges[layer_num].xmax = xmax_edge[layer_num]; + num_on_edges[layer_num].ymin = ymin_edge[layer_num]; + num_on_edges[layer_num].ymax = ymax_edge[layer_num]; + num_on_edges[layer_num].layer_num = layer_num; + } } static double wirelength_crossing_count(size_t fanout) { @@ -2502,7 +2977,7 @@ static double wirelength_crossing_count(size_t fanout) { } } -static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) { +static double get_net_wirelength_estimate(ClusterNetId net_id, const t_bb& bbptr) { /* WMF: Finds the estimate of wirelength due to one net by looking at * * its coordinate bounding box. */ @@ -2519,14 +2994,46 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr) { /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ - ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing; + ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing; + + ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing; + + return (ncost); +} + +static double get_net_layer_wirelength_estimate(ClusterNetId /* net_id */, + const std::vector& bbptr, + const vtr::NdMatrixProxy layer_pin_sink_count) { + /* WMF: Finds the estimate of wirelength due to one net by looking at * + * its coordinate bounding box. */ + + double ncost = 0.; + double crossing = 0.; + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN); + if (layer_pin_sink_count[layer_num] == 0) { + continue; + } + crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1); + + /* Could insert a check for xmin == xmax. In that case, assume * + * connection will be made with no bends and hence no x-cost. * + * Same thing for y-cost. */ + + /* Cost = wire length along channel * cross_count / average * + * channel capacity. Do this for x, then y direction and add. */ - ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing; + ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing; + + ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing; + } return (ncost); } -static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) { +static double get_net_cost(ClusterNetId net_id, const t_bb& bbptr) { /* Finds the cost due to one net by looking at its coordinate bounding * * box. */ @@ -2543,11 +3050,45 @@ static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) { /* Cost = wire length along channel * cross_count / average * * channel capacity. Do this for x, then y direction and add. */ - ncost = (bbptr->xmax - bbptr->xmin + 1) * crossing - * chanx_place_cost_fac[bbptr->ymax][bbptr->ymin - 1]; + ncost = (bbptr.xmax - bbptr.xmin + 1) * crossing + * chanx_place_cost_fac[bbptr.ymax][bbptr.ymin - 1]; - ncost += (bbptr->ymax - bbptr->ymin + 1) * crossing - * chany_place_cost_fac[bbptr->xmax][bbptr->xmin - 1]; + ncost += (bbptr.ymax - bbptr.ymin + 1) * crossing + * chany_place_cost_fac[bbptr.xmax][bbptr.xmin - 1]; + + return (ncost); +} + +static double get_net_layer_cost(ClusterNetId /* net_id */, + const std::vector& bbptr, + const vtr::NdMatrixProxy layer_pin_sink_count) { + /* Finds the cost due to one net by looking at its coordinate bounding * + * box. */ + + double ncost = 0.; + double crossing = 0.; + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + VTR_ASSERT(layer_pin_sink_count[layer_num] != OPEN); + if (layer_pin_sink_count[layer_num] == 0) { + continue; + } + crossing = wirelength_crossing_count(layer_pin_sink_count[layer_num] + 1); + + /* Could insert a check for xmin == xmax. In that case, assume * + * connection will be made with no bends and hence no x-cost. * + * Same thing for y-cost. */ + + /* Cost = wire length along channel * cross_count / average * + * channel capacity. Do this for x, then y direction and add. */ + + ncost += (bbptr[layer_num].xmax - bbptr[layer_num].xmin + 1) * crossing + * chanx_place_cost_fac[bbptr[layer_num].ymax][bbptr[layer_num].ymin - 1]; + + ncost += (bbptr[layer_num].ymax - bbptr[layer_num].ymin + 1) * crossing + * chany_place_cost_fac[bbptr[layer_num].xmax][bbptr[layer_num].xmin - 1]; + } return (ncost); } @@ -2559,10 +3100,12 @@ static double get_net_cost(ClusterNetId net_id, t_bb* bbptr) { * Currently assumes channels on both sides of the CLBs forming the * * edges of the bounding box can be used. Essentially, I am assuming * * the pins always lie on the outside of the bounding box. */ -static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) { +static void get_non_updateable_bb(ClusterNetId net_id, + t_bb& bb_coord_new, + vtr::NdMatrixProxy num_sink_pin_layer) { //TODO: account for multiple physical pin instances per logical pin - int xmax, ymax, xmin, ymin, x, y; + int xmax, ymax, xmin, ymin, x, y, layer; int pnum; auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -2582,6 +3125,10 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) { xmax = x; ymax = y; + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + num_sink_pin_layer[layer_num] = 0; + } + for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { bnum = cluster_ctx.clb_nlist.pin_block(pin_id); pnum = tile_pin_index(pin_id); @@ -2589,6 +3136,7 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) { + physical_tile_type(bnum)->pin_width_offset[pnum]; y = place_ctx.block_locs[bnum].loc.y + physical_tile_type(bnum)->pin_height_offset[pnum]; + layer = place_ctx.block_locs[bnum].loc.layer; if (x < xmin) { xmin = x; @@ -2601,6 +3149,8 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) { } else if (y > ymax) { ymax = y; } + + num_sink_pin_layer[layer]++; } /* Now I've found the coordinates of the bounding box. There are no * @@ -2611,13 +3161,87 @@ static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new) { * clip to 1 in both directions as well (since minimum channel index * * is 0). See route_common.cpp for a channel diagram. */ - bb_coord_new->xmin = max(min(xmin, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - bb_coord_new->ymin = max(min(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels - bb_coord_new->xmax = max(min(xmax, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - bb_coord_new->ymax = max(min(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + bb_coord_new.xmin = max(min(xmin, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new.ymin = max(min(ymin, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + bb_coord_new.xmax = max(min(xmax, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new.ymax = max(min(ymax, device_ctx.grid.height() - 2), 1); //-2 for no perim channels +} + +static void get_non_updateable_layer_bb(ClusterNetId net_id, + std::vector& bb_coord_new, + vtr::NdMatrixProxy num_sink_layer) { + //TODO: account for multiple physical pin instances per logical pin + + auto& device_ctx = g_vpr_ctx.device(); + int num_layers = device_ctx.grid.get_num_layers(); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + num_sink_layer[layer_num] = 0; + } + + int pnum; + + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + + ClusterBlockId bnum = cluster_ctx.clb_nlist.net_driver_block(net_id); + pnum = net_pin_to_tile_pin_index(net_id, 0); + + int src_x = place_ctx.block_locs[bnum].loc.x + + physical_tile_type(bnum)->pin_width_offset[pnum]; + int src_y = place_ctx.block_locs[bnum].loc.y + + physical_tile_type(bnum)->pin_height_offset[pnum]; + + std::vector xmin(num_layers, src_x); + std::vector ymin(num_layers, src_y); + std::vector xmax(num_layers, src_x); + std::vector ymax(num_layers, src_y); + + for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { + bnum = cluster_ctx.clb_nlist.pin_block(pin_id); + pnum = tile_pin_index(pin_id); + int x = place_ctx.block_locs[bnum].loc.x + + physical_tile_type(bnum)->pin_width_offset[pnum]; + int y = place_ctx.block_locs[bnum].loc.y + + physical_tile_type(bnum)->pin_height_offset[pnum]; + + int layer_num = place_ctx.block_locs[bnum].loc.layer; + num_sink_layer[layer_num]++; + if (x < xmin[layer_num]) { + xmin[layer_num] = x; + } else if (x > xmax[layer_num]) { + xmax[layer_num] = x; + } + + if (y < ymin[layer_num]) { + ymin[layer_num] = y; + } else if (y > ymax[layer_num]) { + ymax[layer_num] = y; + } + } + + /* Now I've found the coordinates of the bounding box. There are no * + * channels beyond device_ctx.grid.width()-2 and * + * device_ctx.grid.height() - 2, so I want to clip to that. As well,* + * since I'll always include the channel immediately below and the * + * channel immediately to the left of the bounding box, I want to * + * clip to 1 in both directions as well (since minimum channel index * + * is 0). See route_common.cpp for a channel diagram. */ + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + bb_coord_new[layer_num].layer_num = layer_num; + bb_coord_new[layer_num].xmin = max(min(xmin[layer_num], device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new[layer_num].ymin = max(min(ymin[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels + bb_coord_new[layer_num].xmax = max(min(xmax[layer_num], device_ctx.grid.width() - 2), 1); //-2 for no perim channels + bb_coord_new[layer_num].ymax = max(min(ymax[layer_num], device_ctx.grid.height() - 2), 1); //-2 for no perim channels + } } -static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew) { +static void update_bb(ClusterNetId net_id, + t_bb& bb_edge_new, + t_bb& bb_coord_new, + vtr::NdMatrixProxy num_sink_pin_layer_new, + t_physical_tile_loc pin_old_loc, + t_physical_tile_loc pin_new_loc, + bool src_pin) { /* Updates the bounding box of a net by storing its coordinates in * * the bb_coord_new data structure and the number of blocks on each * * edge in the bb_edge_new data structure. This routine should only * @@ -2636,174 +3260,546 @@ static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new auto& device_ctx = g_vpr_ctx.device(); auto& place_move_ctx = g_placer_ctx.move(); - xnew = max(min(xnew, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - ynew = max(min(ynew, device_ctx.grid.height() - 2), 1); //-2 for no perim channels - xold = max(min(xold, device_ctx.grid.width() - 2), 1); //-2 for no perim channels - yold = max(min(yold, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + const int num_layers = device_ctx.grid.get_num_layers(); + + pin_new_loc.x = max(min(pin_new_loc.x, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + pin_new_loc.y = max(min(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + pin_old_loc.x = max(min(pin_old_loc.x, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + pin_old_loc.y = max(min(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels /* Check if the net had been updated before. */ if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { /* The net had been updated from scratch, DO NOT update again! */ return; - } else if (bb_updated_before[net_id] == NOT_UPDATED_YET) { + } + + vtr::NdMatrixProxy curr_num_sink_pin_layer = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : num_sink_pin_layer_new; + + if (bb_updated_before[net_id] == NOT_UPDATED_YET) { /* The net had NOT been updated before, could use the old values */ - curr_bb_coord = &place_move_ctx.bb_coords[net_id]; curr_bb_edge = &place_move_ctx.bb_num_on_edges[net_id]; + curr_bb_coord = &place_move_ctx.bb_coords[net_id]; bb_updated_before[net_id] = UPDATED_ONCE; } else { /* The net had been updated before, must use the new values */ - curr_bb_coord = bb_coord_new; - curr_bb_edge = bb_edge_new; + curr_bb_coord = &bb_coord_new; + curr_bb_edge = &bb_edge_new; } /* Check if I can update the bounding box incrementally. */ - if (xnew < xold) { /* Move to left. */ + if (pin_new_loc.x < pin_old_loc.x) { /* Move to left. */ /* Update the xmax fields for coordinates and number of edges first. */ - if (xold == curr_bb_coord->xmax) { /* Old position at xmax. */ + if (pin_old_loc.x == curr_bb_coord->xmax) { /* Old position at xmax. */ if (curr_bb_edge->xmax == 1) { - get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new); + get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new); bb_updated_before[net_id] = GOT_FROM_SCRATCH; return; } else { - bb_edge_new->xmax = curr_bb_edge->xmax - 1; - bb_coord_new->xmax = curr_bb_coord->xmax; + bb_edge_new.xmax = curr_bb_edge->xmax - 1; + bb_coord_new.xmax = curr_bb_coord->xmax; } } else { /* Move to left, old postion was not at xmax. */ - bb_coord_new->xmax = curr_bb_coord->xmax; - bb_edge_new->xmax = curr_bb_edge->xmax; + bb_coord_new.xmax = curr_bb_coord->xmax; + bb_edge_new.xmax = curr_bb_edge->xmax; } /* Now do the xmin fields for coordinates and number of edges. */ - if (xnew < curr_bb_coord->xmin) { /* Moved past xmin */ - bb_coord_new->xmin = xnew; - bb_edge_new->xmin = 1; - } else if (xnew == curr_bb_coord->xmin) { /* Moved to xmin */ - bb_coord_new->xmin = xnew; - bb_edge_new->xmin = curr_bb_edge->xmin + 1; + if (pin_new_loc.x < curr_bb_coord->xmin) { /* Moved past xmin */ + bb_coord_new.xmin = pin_new_loc.x; + bb_edge_new.xmin = 1; + } else if (pin_new_loc.x == curr_bb_coord->xmin) { /* Moved to xmin */ + bb_coord_new.xmin = pin_new_loc.x; + bb_edge_new.xmin = curr_bb_edge->xmin + 1; } else { /* Xmin unchanged. */ - bb_coord_new->xmin = curr_bb_coord->xmin; - bb_edge_new->xmin = curr_bb_edge->xmin; + bb_coord_new.xmin = curr_bb_coord->xmin; + bb_edge_new.xmin = curr_bb_edge->xmin; } /* End of move to left case. */ - } else if (xnew > xold) { /* Move to right. */ + } else if (pin_new_loc.x > pin_old_loc.x) { /* Move to right. */ /* Update the xmin fields for coordinates and number of edges first. */ - if (xold == curr_bb_coord->xmin) { /* Old position at xmin. */ + if (pin_old_loc.x == curr_bb_coord->xmin) { /* Old position at xmin. */ if (curr_bb_edge->xmin == 1) { - get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new); + get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new); bb_updated_before[net_id] = GOT_FROM_SCRATCH; return; } else { - bb_edge_new->xmin = curr_bb_edge->xmin - 1; - bb_coord_new->xmin = curr_bb_coord->xmin; + bb_edge_new.xmin = curr_bb_edge->xmin - 1; + bb_coord_new.xmin = curr_bb_coord->xmin; } } else { /* Move to right, old position was not at xmin. */ - bb_coord_new->xmin = curr_bb_coord->xmin; - bb_edge_new->xmin = curr_bb_edge->xmin; + bb_coord_new.xmin = curr_bb_coord->xmin; + bb_edge_new.xmin = curr_bb_edge->xmin; } /* Now do the xmax fields for coordinates and number of edges. */ - if (xnew > curr_bb_coord->xmax) { /* Moved past xmax. */ - bb_coord_new->xmax = xnew; - bb_edge_new->xmax = 1; - } else if (xnew == curr_bb_coord->xmax) { /* Moved to xmax */ - bb_coord_new->xmax = xnew; - bb_edge_new->xmax = curr_bb_edge->xmax + 1; + if (pin_new_loc.x > curr_bb_coord->xmax) { /* Moved past xmax. */ + bb_coord_new.xmax = pin_new_loc.x; + bb_edge_new.xmax = 1; + } else if (pin_new_loc.x == curr_bb_coord->xmax) { /* Moved to xmax */ + bb_coord_new.xmax = pin_new_loc.x; + bb_edge_new.xmax = curr_bb_edge->xmax + 1; } else { /* Xmax unchanged. */ - bb_coord_new->xmax = curr_bb_coord->xmax; - bb_edge_new->xmax = curr_bb_edge->xmax; + bb_coord_new.xmax = curr_bb_coord->xmax; + bb_edge_new.xmax = curr_bb_edge->xmax; } /* End of move to right case. */ - } else { /* xnew == xold -- no x motion. */ - bb_coord_new->xmin = curr_bb_coord->xmin; - bb_coord_new->xmax = curr_bb_coord->xmax; - bb_edge_new->xmin = curr_bb_edge->xmin; - bb_edge_new->xmax = curr_bb_edge->xmax; + } else { /* pin_new_loc.x == pin_old_loc.x -- no x motion. */ + bb_coord_new.xmin = curr_bb_coord->xmin; + bb_coord_new.xmax = curr_bb_coord->xmax; + bb_edge_new.xmin = curr_bb_edge->xmin; + bb_edge_new.xmax = curr_bb_edge->xmax; } /* Now account for the y-direction motion. */ - if (ynew < yold) { /* Move down. */ + if (pin_new_loc.y < pin_old_loc.y) { /* Move down. */ /* Update the ymax fields for coordinates and number of edges first. */ - if (yold == curr_bb_coord->ymax) { /* Old position at ymax. */ + if (pin_old_loc.y == curr_bb_coord->ymax) { /* Old position at ymax. */ if (curr_bb_edge->ymax == 1) { - get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new); + get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new); bb_updated_before[net_id] = GOT_FROM_SCRATCH; return; } else { - bb_edge_new->ymax = curr_bb_edge->ymax - 1; - bb_coord_new->ymax = curr_bb_coord->ymax; + bb_edge_new.ymax = curr_bb_edge->ymax - 1; + bb_coord_new.ymax = curr_bb_coord->ymax; } } else { /* Move down, old postion was not at ymax. */ - bb_coord_new->ymax = curr_bb_coord->ymax; - bb_edge_new->ymax = curr_bb_edge->ymax; + bb_coord_new.ymax = curr_bb_coord->ymax; + bb_edge_new.ymax = curr_bb_edge->ymax; } /* Now do the ymin fields for coordinates and number of edges. */ - if (ynew < curr_bb_coord->ymin) { /* Moved past ymin */ - bb_coord_new->ymin = ynew; - bb_edge_new->ymin = 1; - } else if (ynew == curr_bb_coord->ymin) { /* Moved to ymin */ - bb_coord_new->ymin = ynew; - bb_edge_new->ymin = curr_bb_edge->ymin + 1; + if (pin_new_loc.y < curr_bb_coord->ymin) { /* Moved past ymin */ + bb_coord_new.ymin = pin_new_loc.y; + bb_edge_new.ymin = 1; + } else if (pin_new_loc.y == curr_bb_coord->ymin) { /* Moved to ymin */ + bb_coord_new.ymin = pin_new_loc.y; + bb_edge_new.ymin = curr_bb_edge->ymin + 1; } else { /* ymin unchanged. */ - bb_coord_new->ymin = curr_bb_coord->ymin; - bb_edge_new->ymin = curr_bb_edge->ymin; + bb_coord_new.ymin = curr_bb_coord->ymin; + bb_edge_new.ymin = curr_bb_edge->ymin; } /* End of move down case. */ - } else if (ynew > yold) { /* Moved up. */ + } else if (pin_new_loc.y > pin_old_loc.y) { /* Moved up. */ /* Update the ymin fields for coordinates and number of edges first. */ - if (yold == curr_bb_coord->ymin) { /* Old position at ymin. */ + if (pin_old_loc.y == curr_bb_coord->ymin) { /* Old position at ymin. */ if (curr_bb_edge->ymin == 1) { - get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new); + get_bb_from_scratch(net_id, bb_coord_new, bb_edge_new, num_sink_pin_layer_new); bb_updated_before[net_id] = GOT_FROM_SCRATCH; return; } else { - bb_edge_new->ymin = curr_bb_edge->ymin - 1; - bb_coord_new->ymin = curr_bb_coord->ymin; + bb_edge_new.ymin = curr_bb_edge->ymin - 1; + bb_coord_new.ymin = curr_bb_coord->ymin; } } else { /* Moved up, old position was not at ymin. */ - bb_coord_new->ymin = curr_bb_coord->ymin; - bb_edge_new->ymin = curr_bb_edge->ymin; + bb_coord_new.ymin = curr_bb_coord->ymin; + bb_edge_new.ymin = curr_bb_edge->ymin; } /* Now do the ymax fields for coordinates and number of edges. */ - if (ynew > curr_bb_coord->ymax) { /* Moved past ymax. */ - bb_coord_new->ymax = ynew; - bb_edge_new->ymax = 1; - } else if (ynew == curr_bb_coord->ymax) { /* Moved to ymax */ - bb_coord_new->ymax = ynew; - bb_edge_new->ymax = curr_bb_edge->ymax + 1; + if (pin_new_loc.y > curr_bb_coord->ymax) { /* Moved past ymax. */ + bb_coord_new.ymax = pin_new_loc.y; + bb_edge_new.ymax = 1; + } else if (pin_new_loc.y == curr_bb_coord->ymax) { /* Moved to ymax */ + bb_coord_new.ymax = pin_new_loc.y; + bb_edge_new.ymax = curr_bb_edge->ymax + 1; } else { /* ymax unchanged. */ - bb_coord_new->ymax = curr_bb_coord->ymax; - bb_edge_new->ymax = curr_bb_edge->ymax; + bb_coord_new.ymax = curr_bb_coord->ymax; + bb_edge_new.ymax = curr_bb_edge->ymax; } /* End of move up case. */ - } else { /* ynew == yold -- no y motion. */ - bb_coord_new->ymin = curr_bb_coord->ymin; - bb_coord_new->ymax = curr_bb_coord->ymax; - bb_edge_new->ymin = curr_bb_edge->ymin; - bb_edge_new->ymax = curr_bb_edge->ymax; + } else { /* pin_new_loc.y == yold -- no y motion. */ + bb_coord_new.ymin = curr_bb_coord->ymin; + bb_coord_new.ymax = curr_bb_coord->ymax; + bb_edge_new.ymin = curr_bb_edge->ymin; + bb_edge_new.ymax = curr_bb_edge->ymax; + } + + /* Now account for the layer motion. */ + if (num_layers > 1) { + /* We need to update it only if multiple layers are available */ + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num]; + } + if (!src_pin) { + /* if src pin is being moved, we don't need to update this data structure */ + if (pin_old_loc.layer_num != pin_new_loc.layer_num) { + num_sink_pin_layer_new[pin_old_loc.layer_num] = (curr_num_sink_pin_layer)[pin_old_loc.layer_num] - 1; + num_sink_pin_layer_new[pin_new_loc.layer_num] = (curr_num_sink_pin_layer)[pin_new_loc.layer_num] + 1; + } + } + } + + if (bb_updated_before[net_id] == NOT_UPDATED_YET) { + bb_updated_before[net_id] = UPDATED_ONCE; + } +} + +static void update_layer_bb(ClusterNetId net_id, + std::vector& bb_edge_new, + std::vector& bb_coord_new, + vtr::NdMatrixProxy bb_pin_sink_count_new, + t_physical_tile_loc pin_old_loc, + t_physical_tile_loc pin_new_loc, + bool is_output_pin) { + /* Updates the bounding box of a net by storing its coordinates in * + * the bb_coord_new data structure and the number of blocks on each * + * edge in the bb_edge_new data structure. This routine should only * + * be called for large nets, since it has some overhead relative to * + * just doing a brute force bounding box calculation. The bounding * + * box coordinate and edge information for inet must be valid before * + * this routine is called. * + * Currently assumes channels on both sides of the CLBs forming the * + * edges of the bounding box can be used. Essentially, I am assuming * + * the pins always lie on the outside of the bounding box. * + * The x and y coordinates are the pin's x and y coordinates. */ + /* IO blocks are considered to be one cell in for simplicity. */ + //TODO: account for multiple physical pin instances per logical pin + const std::vector*curr_bb_edge, *curr_bb_coord; + + auto& device_ctx = g_vpr_ctx.device(); + auto& place_move_ctx = g_placer_ctx.move(); + + pin_new_loc.x = max(min(pin_new_loc.x, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + pin_new_loc.y = max(min(pin_new_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + pin_old_loc.x = max(min(pin_old_loc.x, device_ctx.grid.width() - 2), 1); //-2 for no perim channels + pin_old_loc.y = max(min(pin_old_loc.y, device_ctx.grid.height() - 2), 1); //-2 for no perim channels + + /* Check if the net had been updated before. */ + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + /* The net had been updated from scratch, DO NOT update again! */ + return; } + const vtr::NdMatrixProxy curr_layer_pin_sink_count = (bb_updated_before[net_id] == NOT_UPDATED_YET) ? place_move_ctx.num_sink_pin_layer[size_t(net_id)] : bb_pin_sink_count_new; + if (bb_updated_before[net_id] == NOT_UPDATED_YET) { + /* The net had NOT been updated before, could use the old values */ + curr_bb_edge = &place_move_ctx.layer_bb_num_on_edges[net_id]; + curr_bb_coord = &place_move_ctx.layer_bb_coords[net_id]; bb_updated_before[net_id] = UPDATED_ONCE; + } else { + /* The net had been updated before, must use the new values */ + curr_bb_edge = &bb_edge_new; + curr_bb_coord = &bb_coord_new; + } + + /* Check if I can update the bounding box incrementally. */ + + update_bb_pin_sink_count(net_id, + pin_old_loc, + pin_new_loc, + curr_layer_pin_sink_count, + bb_pin_sink_count_new, + is_output_pin); + + int layer_old = pin_old_loc.layer_num; + int layer_new = pin_new_loc.layer_num; + bool layer_changed = (layer_old != layer_new); + + bb_edge_new = *curr_bb_edge; + bb_coord_new = *curr_bb_coord; + + if (layer_changed) { + update_bb_layer_changed(net_id, + pin_old_loc, + pin_new_loc, + *curr_bb_edge, + *curr_bb_coord, + bb_pin_sink_count_new, + bb_edge_new, + bb_coord_new); + } else { + update_bb_same_layer(net_id, + pin_old_loc, + pin_new_loc, + *curr_bb_edge, + *curr_bb_coord, + bb_pin_sink_count_new, + bb_edge_new, + bb_coord_new); + } + + if (bb_updated_before[net_id] == NOT_UPDATED_YET) { + bb_updated_before[net_id] = UPDATED_ONCE; + } +} + +static inline void update_bb_same_layer(ClusterNetId net_id, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const std::vector& curr_bb_edge, + const std::vector& curr_bb_coord, + vtr::NdMatrixProxy bb_pin_sink_count_new, + std::vector& bb_edge_new, + std::vector& bb_coord_new) { + int x_old = pin_old_loc.x; + int x_new = pin_new_loc.x; + + int y_old = pin_old_loc.y; + int y_new = pin_new_loc.y; + + int layer_num = pin_old_loc.layer_num; + VTR_ASSERT_SAFE(layer_num == pin_new_loc.layer_num); + + if (x_new < x_old) { + if (x_old == curr_bb_coord[layer_num].xmax) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[layer_num].xmax, + curr_bb_coord[layer_num].xmax, + bb_edge_new[layer_num].xmax, + bb_coord_new[layer_num].xmax); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + if (x_new < curr_bb_coord[layer_num].xmin) { + bb_edge_new[layer_num].xmin = 1; + bb_coord_new[layer_num].xmin = x_new; + } else if (x_new == curr_bb_coord[layer_num].xmin) { + bb_edge_new[layer_num].xmin = curr_bb_edge[layer_num].xmin + 1; + bb_coord_new[layer_num].xmin = curr_bb_coord[layer_num].xmin; + } + + } else if (x_new > x_old) { + if (x_old == curr_bb_coord[layer_num].xmin) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[layer_num].xmin, + curr_bb_coord[layer_num].xmin, + bb_edge_new[layer_num].xmin, + bb_coord_new[layer_num].xmin); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + if (x_new > curr_bb_coord[layer_num].xmax) { + bb_edge_new[layer_num].xmax = 1; + bb_coord_new[layer_num].xmax = x_new; + } else if (x_new == curr_bb_coord[layer_num].xmax) { + bb_edge_new[layer_num].xmax = curr_bb_edge[layer_num].xmax + 1; + bb_coord_new[layer_num].xmax = curr_bb_coord[layer_num].xmax; + } + } + + if (y_new < y_old) { + if (y_old == curr_bb_coord[layer_num].ymax) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[layer_num].ymax, + curr_bb_coord[layer_num].ymax, + bb_edge_new[layer_num].ymax, + bb_coord_new[layer_num].ymax); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + if (y_new < curr_bb_coord[layer_num].ymin) { + bb_edge_new[layer_num].ymin = 1; + bb_coord_new[layer_num].ymin = y_new; + } else if (y_new == curr_bb_coord[layer_num].ymin) { + bb_edge_new[layer_num].ymin = curr_bb_edge[layer_num].ymin + 1; + bb_coord_new[layer_num].ymin = curr_bb_coord[layer_num].ymin; + } + + } else if (y_new > y_old) { + if (y_old == curr_bb_coord[layer_num].ymin) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[layer_num].ymin, + curr_bb_coord[layer_num].ymin, + bb_edge_new[layer_num].ymin, + bb_coord_new[layer_num].ymin); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + if (y_new > curr_bb_coord[layer_num].ymax) { + bb_edge_new[layer_num].ymax = 1; + bb_coord_new[layer_num].ymax = y_new; + } else if (y_new == curr_bb_coord[layer_num].ymax) { + bb_edge_new[layer_num].ymax = curr_bb_edge[layer_num].ymax + 1; + bb_coord_new[layer_num].ymax = curr_bb_coord[layer_num].ymax; + } + } +} + +static inline void update_bb_layer_changed(ClusterNetId net_id, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const std::vector& curr_bb_edge, + const std::vector& curr_bb_coord, + vtr::NdMatrixProxy bb_pin_sink_count_new, + std::vector& bb_edge_new, + std::vector& bb_coord_new) { + int x_old = pin_old_loc.x; + + int y_old = pin_old_loc.y; + + int old_layer_num = pin_old_loc.layer_num; + int new_layer_num = pin_new_loc.layer_num; + VTR_ASSERT_SAFE(old_layer_num != new_layer_num); + + if (x_old == curr_bb_coord[old_layer_num].xmax) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[old_layer_num].xmax, + curr_bb_coord[old_layer_num].xmax, + bb_edge_new[old_layer_num].xmax, + bb_coord_new[old_layer_num].xmax); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } else if (x_old == curr_bb_coord[old_layer_num].xmin) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[old_layer_num].xmin, + curr_bb_coord[old_layer_num].xmin, + bb_edge_new[old_layer_num].xmin, + bb_coord_new[old_layer_num].xmin); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + if (y_old == curr_bb_coord[old_layer_num].ymax) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[old_layer_num].ymax, + curr_bb_coord[old_layer_num].ymax, + bb_edge_new[old_layer_num].ymax, + bb_coord_new[old_layer_num].ymax); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } else if (y_old == curr_bb_coord[old_layer_num].ymin) { + update_bb_edge(net_id, + bb_edge_new, + bb_coord_new, + bb_pin_sink_count_new, + curr_bb_edge[old_layer_num].ymin, + curr_bb_coord[old_layer_num].ymin, + bb_edge_new[old_layer_num].ymin, + bb_coord_new[old_layer_num].ymin); + if (bb_updated_before[net_id] == GOT_FROM_SCRATCH) { + return; + } + } + + add_block_to_bb(pin_new_loc, + curr_bb_edge[new_layer_num], + curr_bb_coord[new_layer_num], + bb_edge_new[new_layer_num], + bb_coord_new[new_layer_num]); +} + +static void update_bb_pin_sink_count(ClusterNetId /* net_id */, + const t_physical_tile_loc& pin_old_loc, + const t_physical_tile_loc& pin_new_loc, + const vtr::NdMatrixProxy curr_layer_pin_sink_count, + vtr::NdMatrixProxy bb_pin_sink_count_new, + bool is_output_pin) { + VTR_ASSERT(curr_layer_pin_sink_count[pin_old_loc.layer_num] > 0 || is_output_pin == 1); + for (int layer_num = 0; layer_num < g_vpr_ctx.device().grid.get_num_layers(); layer_num++) { + bb_pin_sink_count_new[layer_num] = curr_layer_pin_sink_count[layer_num]; + } + if (!is_output_pin) { + bb_pin_sink_count_new[pin_old_loc.layer_num] -= 1; + bb_pin_sink_count_new[pin_new_loc.layer_num] += 1; + } +} + +static inline void update_bb_edge(ClusterNetId net_id, + std::vector& bb_edge_new, + std::vector& bb_coord_new, + vtr::NdMatrixProxy bb_layer_pin_sink_count, + const int& old_num_block_on_edge, + const int& old_edge_coord, + int& new_num_block_on_edge, + int& new_edge_coord) { + if (old_num_block_on_edge == 1) { + get_layer_bb_from_scratch(net_id, + bb_edge_new, + bb_coord_new, + bb_layer_pin_sink_count); + bb_updated_before[net_id] = GOT_FROM_SCRATCH; + return; + } else { + new_num_block_on_edge = old_num_block_on_edge - 1; + new_edge_coord = old_edge_coord; + } +} + +static void add_block_to_bb(const t_physical_tile_loc& new_pin_loc, + const t_2D_bb& bb_edge_old, + const t_2D_bb& bb_coord_old, + t_2D_bb& bb_edge_new, + t_2D_bb& bb_coord_new) { + int x_new = new_pin_loc.x; + int y_new = new_pin_loc.y; + + if (x_new > bb_coord_old.xmax) { + bb_edge_new.xmax = 1; + bb_coord_new.xmax = x_new; + } else if (x_new == bb_coord_old.xmax) { + bb_edge_new.xmax = bb_edge_old.xmax + 1; + } + + if (x_new < bb_coord_old.xmin) { + bb_edge_new.xmin = 1; + bb_coord_new.xmin = x_new; + } else if (x_new == bb_coord_old.xmin) { + bb_edge_new.xmin = bb_edge_old.xmin + 1; + } + + if (y_new > bb_coord_old.ymax) { + bb_edge_new.ymax = 1; + bb_coord_new.ymax = y_new; + } else if (y_new == bb_coord_old.ymax) { + bb_edge_new.ymax = bb_edge_old.ymax + 1; + } + + if (y_new < bb_coord_old.ymin) { + bb_edge_new.ymin = 1; + bb_coord_new.ymin = y_new; + } else if (y_new == bb_coord_old.ymin) { + bb_edge_new.ymin = bb_edge_old.ymin + 1; } } @@ -2961,7 +3957,15 @@ static int check_placement_costs(const t_placer_costs& costs, double bb_cost_check; double timing_cost_check; - bb_cost_check = comp_bb_cost(CHECK); + const auto& cube_bb = g_vpr_ctx.placement().cube_bb; + + if (cube_bb) { + bb_cost_check = comp_bb_cost(CHECK); + } else { + VTR_ASSERT_SAFE(!cube_bb); + bb_cost_check = comp_layer_bb_cost(CHECK); + } + if (fabs(bb_cost_check - costs.bb_cost) > costs.bb_cost * ERROR_TOL) { VTR_LOG_ERROR( "bb_cost_check: %g and bb_cost: %g differ in check_place.\n", @@ -3259,7 +4263,7 @@ static void print_placement_swaps_stats(const t_annealing_state& state) { num_swap_accepted, 100 * accept_rate); VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, num_swap_rejected, 100 * reject_rate); - VTR_LOG("\tSwaps aborted : %*d (%4.1f %%)\n", num_swap_print_digits, + VTR_LOG("\tSwaps aborted: %*d (%4.1f %%)\n", num_swap_print_digits, num_swap_aborted, 100 * abort_rate); } diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index 44d8c4a0b49..bfdd15707f4 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -27,11 +27,20 @@ #endif /* VTR_ENABLE_CAPNPROTO */ ///@brief DeltaDelayModel methods. -float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const { - int delta_x = std::abs(from_x - to_x); - int delta_y = std::abs(from_y - to_y); - - return delays_[layer_num][delta_x][delta_y]; +float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + // TODO: This is compatible with the case that only OPINs are connected to other layers. + // Ideally, I should check whether OPINs are conneced or IPINs and use the correct layer. + // If both are connected, minimum should be taken. In the case that channels are also connected, + // I haven't thought about what to do. + float cross_layer_td = 0; + if (from_loc.layer_num != to_loc.layer_num) { + VTR_ASSERT(std::isfinite(cross_layer_delay_)); + cross_layer_td = cross_layer_delay_; + } + return delays_[to_loc.layer_num][delta_x][delta_y] + cross_layer_td; } void DeltaDelayModel::dump_echo(std::string filepath) const { @@ -60,13 +69,13 @@ const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { } ///@brief OverrideDelayModel methods. -float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const { +float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { //First check to if there is an override delay value auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type({from_x, from_y, layer_num}); - t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type({to_x, to_y, layer_num}); + t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); + t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); t_override override_key; override_key.from_type = from_type_ptr->index; @@ -76,8 +85,8 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, //Delay overrides may be different for +/- delta so do not use //an absolute delta for the look-up - override_key.delta_x = to_x - from_x; - override_key.delta_y = to_y - from_y; + override_key.delta_x = to_loc.x - from_loc.x; + override_key.delta_y = to_loc.y - from_loc.y; float delay_val = std::numeric_limits::quiet_NaN(); auto override_iter = delay_overrides_.find(override_key); @@ -86,7 +95,7 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, delay_val = override_iter->second; } else { //Fall back to the base delay model if no override was found - delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin, layer_num); + delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); } return delay_val; @@ -258,7 +267,7 @@ void OverrideDelayModel::read(const std::string& file) { auto model = reader.getRoot(); ToNdMatrix<3, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); - base_delay_model_ = std::make_unique(delays, is_flat_); + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); // Reading non-scalar capnproto fields is roughly equivilant to using // a std::vector of the field type. Actual type is capnp::List::Reader. @@ -310,6 +319,7 @@ void OverrideDelayModel::write(const std::string& file) const { ///@brief Initialize the placer delay model. std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, + const std::vector& arch_switch_inf, t_chan_width_dist chan_width_dist, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -318,8 +328,16 @@ std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& const t_direct_inf* directs, const int num_directs, bool is_flat) { - return compute_place_delay_model(placer_opts, router_opts, net_list, det_routing_arch, segment_inf, - chan_width_dist, directs, num_directs, is_flat); + return compute_place_delay_model(placer_opts, + router_opts, + net_list, + arch_switch_inf, + det_routing_arch, + segment_inf, + chan_width_dist, + directs, + num_directs, + is_flat); } /** @@ -346,9 +364,10 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste int source_x = place_ctx.block_locs[source_block].loc.x; int source_y = place_ctx.block_locs[source_block].loc.y; + int source_layer = place_ctx.block_locs[source_block].loc.layer; int sink_x = place_ctx.block_locs[sink_block].loc.x; int sink_y = place_ctx.block_locs[sink_block].loc.y; - int sink_layer_num = place_ctx.block_locs[sink_block].loc.layer; + int sink_layer = place_ctx.block_locs[sink_block].loc.layer; /** * This heuristic only considers delta_x and delta_y, a much better @@ -357,13 +376,10 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste * In particular this approach does not accurately capture the effect * of fast carry-chain connections. */ - delay_source_to_sink = delay_model->delay(source_x, - source_y, + delay_source_to_sink = delay_model->delay({source_x, source_y, source_layer}, source_block_ipin, - sink_x, - sink_y, - sink_block_ipin, - sink_layer_num); + {sink_x, sink_y, sink_layer}, + sink_block_ipin); if (delay_source_to_sink < 0) { VPR_ERROR(VPR_ERROR_PLACE, "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n" diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index 09b6969c011..b10eae12204 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -29,6 +29,7 @@ class PlaceDelayModel; ///@brief Initialize the placer delay model. std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, + const std::vector& arch_switch_inf, t_chan_width_dist chan_width_dist, const t_placer_opts& place_opts, const t_router_opts& router_opts, @@ -62,7 +63,7 @@ class PlaceDelayModel { * * Either compute or read methods must be invoked before invoking delay. */ - virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const = 0; + virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0; ///@brief Dumps the delay model to an echo file. virtual void dump_echo(std::string filename) const = 0; @@ -85,10 +86,15 @@ class PlaceDelayModel { ///@brief A simple delay model based on the distance (delta) between block locations. class DeltaDelayModel : public PlaceDelayModel { public: - DeltaDelayModel(bool is_flat) - : is_flat_(is_flat) {} - DeltaDelayModel(vtr::NdMatrix delta_delays, bool is_flat) + DeltaDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + DeltaDelayModel(float min_cross_layer_delay, + vtr::NdMatrix delta_delays, + bool is_flat) : delays_(std::move(delta_delays)) + , cross_layer_delay_(min_cross_layer_delay) , is_flat_(is_flat) {} void compute( @@ -96,7 +102,7 @@ class DeltaDelayModel : public PlaceDelayModel { const t_placer_opts& placer_opts, const t_router_opts& router_opts, int longest_length) override; - float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const override; + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; void dump_echo(std::string filepath) const override; void read(const std::string& file) override; @@ -107,13 +113,16 @@ class DeltaDelayModel : public PlaceDelayModel { private: vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] + float cross_layer_delay_; bool is_flat_; }; class OverrideDelayModel : public PlaceDelayModel { public: - OverrideDelayModel(bool is_flat) - : is_flat_(is_flat) {} + OverrideDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} void compute( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, @@ -121,7 +130,7 @@ class OverrideDelayModel : public PlaceDelayModel { int longest_length) override; // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the // specified from and to pins - float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const override; + float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; void dump_echo(std::string filepath) const override; void read(const std::string& file) override; @@ -135,6 +144,8 @@ class OverrideDelayModel : public PlaceDelayModel { private: std::unique_ptr base_delay_model_; + /* Minimum delay of cross-layer connections */ + float cross_layer_delay_; bool is_flat_; void compute_override_delay_model(RouterDelayProfiler& router, diff --git a/vpr/src/place/placer_context.h b/vpr/src/place/placer_context.h index 23df961b144..f5e56bbf37f 100644 --- a/vpr/src/place/placer_context.h +++ b/vpr/src/place/placer_context.h @@ -91,11 +91,20 @@ struct PlacerRuntimeContext : public Context { */ struct PlacerMoveContext : public Context { public: + // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) + vtr::vector bb_num_on_edges; + // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box vtr::vector bb_coords; // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) - vtr::vector bb_num_on_edges; + vtr::vector> layer_bb_num_on_edges; + + // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box + vtr::vector> layer_bb_coords; + + // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer () + vtr::Matrix num_sink_pin_layer; // The first range limit calculated by the anneal float first_rlim; diff --git a/vpr/src/place/simpleRL_move_generator.h b/vpr/src/place/simpleRL_move_generator.h index 9ded69055d5..de108313023 100644 --- a/vpr/src/place/simpleRL_move_generator.h +++ b/vpr/src/place/simpleRL_move_generator.h @@ -211,6 +211,8 @@ class SimpleRLMoveGenerator : public MoveGenerator { * * @param agent std::unique_ptr to the agent. Only EpsilonGreedyAgent and SoftmaxAgent types are accepted * by the constructor. If other types are passed, a compile error would be thrown. + * + * @param is_multi_layer A boolean value to indicate whether the placement is multi-layer or not */ template::value || std::is_same::value>::type> diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 5476e24cafe..fba8b1e9c46 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -167,6 +167,7 @@ static float find_neightboring_average(vtr::NdMatrix& matrix, t_physic std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, + const std::vector& arch_switch_inf, t_det_routing_arch* det_routing_arch, std::vector& segment_inf, t_chan_width_dist chan_width_dist, @@ -194,10 +195,13 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& /*now setup and compute the actual arrays */ std::unique_ptr place_delay_model; + float min_cross_layer_delay = get_min_cross_layer_delay(arch_switch_inf, + segment_inf, + det_routing_arch->wire_to_arch_ipin_switch_between_dice); if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { - place_delay_model = std::make_unique(is_flat); + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { - place_delay_model = std::make_unique(is_flat); + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); } else { VTR_ASSERT_MSG(false, "Invalid placer delay model"); } @@ -241,7 +245,7 @@ void OverrideDelayModel::compute( longest_length, is_flat_); - base_delay_model_ = std::make_unique(delays, false); + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); compute_override_delay_model(route_profiler, router_opts); } @@ -389,7 +393,8 @@ static float route_connection_delay( successfully_routed = route_profiler.calculate_delay( source_rr_node, sink_rr_node, router_opts, - &net_delay_value); + &net_delay_value, + layer_num); } if (successfully_routed) break; @@ -1192,7 +1197,7 @@ void OverrideDelayModel::compute_override_delay_model( if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; float direct_connect_delay = std::numeric_limits::quiet_NaN(); - bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); + bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay, OPEN); if (found_routing_path) { set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h index 30e1a8ae01a..f9efe7fc933 100644 --- a/vpr/src/place/timing_place_lookup.h +++ b/vpr/src/place/timing_place_lookup.h @@ -5,6 +5,7 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, const Netlist<>& net_list, + const std::vector& arch_switch_inf, t_det_routing_arch* det_routing_arch, std::vector& segment_inf, t_chan_width_dist chan_width_dist, diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp index 320ce987c39..6560c32af24 100644 --- a/vpr/src/place/uniform_move_generator.cpp +++ b/vpr/src/place/uniform_move_generator.cpp @@ -26,7 +26,6 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; - if (!find_to_loc_uniform(cluster_from_type, rlim, from, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp index d5f62ae9d20..d33b6fa2ebe 100644 --- a/vpr/src/place/weighted_centroid_move_generator.cpp +++ b/vpr/src/place/weighted_centroid_move_generator.cpp @@ -38,7 +38,9 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move /* Calculate the weighted centroid */ calculate_centroid_loc(b_from, true, centroid, criticalities); - /* Find a */ + // Centroid location is not necessarily a valid location, and the downstream location expect a valid + // layer for "to" location. So if the layer is not valid, we set it to the same layer as from loc. + to.layer = (centroid.layer < 0) ? from.layer : centroid.layer; if (!find_to_loc_centroid(cluster_from_type, from, centroid, range_limiters, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp index 17f064d4c9c..2d343cd3347 100644 --- a/vpr/src/place/weighted_median_move_generator.cpp +++ b/vpr/src/place/weighted_median_move_generator.cpp @@ -27,6 +27,9 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_move_ctx = g_placer_ctx.mutable_move(); + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + bool is_multi_layer = (num_layers > 1); + t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); @@ -42,6 +45,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& //reused to save allocation time place_move_ctx.X_coord.clear(); place_move_ctx.Y_coord.clear(); + std::vector layer_blk_cnt(num_layers, 0); //true if the net is a feedback from the block to itself (all the net terminals are connected to the same block) bool skip_net; @@ -72,6 +76,17 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& place_move_ctx.X_coord.insert(place_move_ctx.X_coord.end(), ceil(coords.xmax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.xmax.edge); place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymin.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymin.edge); place_move_ctx.Y_coord.insert(place_move_ctx.Y_coord.end(), ceil(coords.ymax.criticality * CRIT_MULT_FOR_W_MEDIAN), coords.ymax.edge); + // If multile layers are available, I need to keep track of how many sinks are in each layer. + if (is_multi_layer) { + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + layer_blk_cnt[layer_num] += place_move_ctx.num_sink_pin_layer[size_t(net_id)][layer_num]; + } + // If the pin under consideration if of type sink, it is counted in place_move_ctx.num_sink_pin_layer, and we don't want to consider the moving pins + if (cluster_ctx.clb_nlist.pin_type(pin_id) != PinType::DRIVER) { + VTR_ASSERT(layer_blk_cnt[from.layer] > 0); + layer_blk_cnt[from.layer]--; + } + } } if ((place_move_ctx.X_coord.empty()) || (place_move_ctx.Y_coord.empty())) { @@ -106,8 +121,17 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& t_pl_loc w_median_point; w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2; w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; - // TODO: Currently, we don't move blocks between different types of layers - w_median_point.layer = from.layer; + + // If multiple layers are available, we would choose the median layer, otherwise the same layer (layer #0) as the from_loc would be chosen + //#TODO: Since we are now only considering 2 layers, the layer with maximum number of sinks should be chosen. we need to update it to get the true median + if (is_multi_layer) { + int layer_num = std::distance(layer_blk_cnt.begin(), std::max_element(layer_blk_cnt.begin(), layer_blk_cnt.end())); + w_median_point.layer = layer_num; + to.layer = layer_num; + } else { + w_median_point.layer = from.layer; + to.layer = from.layer; + } if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 62db70ed31f..b99fb60b650 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -5,58 +5,31 @@ #include "bucket.h" #include "rr_graph_fwd.h" -static inline bool relevant_node_to_target(const RRGraphView* rr_graph, - RRNodeId node_to_add, - RRNodeId target_node) { - VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK); - auto node_to_add_type = rr_graph->node_type(node_to_add); - if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) { - return true; - } else if (node_in_same_physical_tile(node_to_add, target_node)) { - VTR_ASSERT(node_to_add_type == IPIN); - return true; - } - return false; -} - -inline void update_router_stats(const DeviceContext& device_ctx, +/** + * @brief This function is relevant when the architecture is 3D. If inter-layer connections are only from OPINs (determine by is_inter_layer_opin_connection), + * then nodes (other that OPINs) which are on the other layer than sink's layer, don't need to be pushed back to the heap. + * @param rr_nodes + * @param rr_graph + * @param from_node + * @param sink_node + * @param is_inter_layer_opin_connection It is true if the architecture is 3D and inter-layer connections are only from OPINs. + * @return + */ +static bool has_path_to_sink(const t_rr_graph_view& rr_nodes, + const RRGraphView* rr_graph, + RRNodeId from_node, + RRNodeId sink_node, + bool is_inter_layer_opin_connection); + +static bool relevant_node_to_target(const RRGraphView* rr_graph, + RRNodeId node_to_add, + RRNodeId target_node); + +static void update_router_stats(const DeviceContext& device_ctx, const RRGraphView* rr_graph, RouterStats* router_stats, RRNodeId rr_node_id, - bool is_push) { - if (is_push) { - router_stats->heap_pushes++; - } else { - router_stats->heap_pops++; - } - - auto node_type = rr_graph->node_type(rr_node_id); - VTR_ASSERT(node_type != NUM_RR_TYPES); - t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id), - rr_graph->node_ylow(rr_node_id), - rr_graph->node_layer(rr_node_id)}); - - if (is_inter_cluster_node(physical_type, - node_type, - rr_graph->node_ptc_num(rr_node_id))) { - if (is_push) { - router_stats->inter_cluster_node_pushes++; - router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; - } else { - router_stats->inter_cluster_node_pops++; - router_stats->inter_cluster_node_type_cnt_pops[node_type]++; - } - - } else { - if (is_push) { - router_stats->intra_cluster_node_pushes++; - router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; - } else { - router_stats->intra_cluster_node_pops++; - router_stats->intra_cluster_node_type_cnt_pops[node_type]++; - } - } -} + bool is_push); /** return tuple */ template @@ -112,9 +85,9 @@ std::tuple ConnectionRouter::timing_driven_route_connection return std::make_tuple(false, nullptr); } - VTR_LOGV_DEBUG(router_debug_, " Routing to %d as normal net (BB: %d,%d x %d,%d)\n", sink_node, - bounding_box.xmin, bounding_box.ymin, - bounding_box.xmax, bounding_box.ymax); + VTR_LOGV_DEBUG(router_debug_, " Routing to %d as normal net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node, + bounding_box.layer_min, bounding_box.xmin, bounding_box.ymin, + bounding_box.layer_max, bounding_box.xmax, bounding_box.ymax); t_heap* cheapest = timing_driven_route_connection_from_heap(sink_node, cost_params, @@ -152,6 +125,8 @@ std::tuple ConnectionRouter::timing_driven_route_connection full_device_bounding_box.ymin = 0; full_device_bounding_box.xmax = grid_.width() - 1; full_device_bounding_box.ymax = grid_.height() - 1; + full_device_bounding_box.layer_min = 0; + full_device_bounding_box.layer_max = grid_.get_num_layers() - 1; // //TODO: potential future optimization @@ -220,9 +195,9 @@ std::tuple ConnectionRouter::timing_driven_route_conne return std::make_tuple(false, false, t_heap()); } - VTR_LOGV_DEBUG(router_debug_, " Routing to %d as high fanout net (BB: %d,%d x %d,%d)\n", sink_node, - high_fanout_bb.xmin, high_fanout_bb.ymin, - high_fanout_bb.xmax, high_fanout_bb.ymax); + VTR_LOGV_DEBUG(router_debug_, " Routing to %d as high fanout net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node, + high_fanout_bb.layer_min, high_fanout_bb.xmin, high_fanout_bb.ymin, + high_fanout_bb.layer_max, high_fanout_bb.xmax, high_fanout_bb.ymax); bool retry_with_full_bb = false; t_heap* cheapest; @@ -473,10 +448,12 @@ void ConnectionRouter::timing_driven_expand_neighbours(t_heap* current, t_bb target_bb; if (target_node != RRNodeId::INVALID()) { - target_bb.xmin = rr_graph_->node_xlow(target_node); - target_bb.ymin = rr_graph_->node_ylow(target_node); - target_bb.xmax = rr_graph_->node_xhigh(target_node); - target_bb.ymax = rr_graph_->node_yhigh(target_node); + target_bb.xmin = rr_graph_->node_xlow(RRNodeId(target_node)); + target_bb.ymin = rr_graph_->node_ylow(RRNodeId(target_node)); + target_bb.xmax = rr_graph_->node_xhigh(RRNodeId(target_node)); + target_bb.ymax = rr_graph_->node_yhigh(RRNodeId(target_node)); + target_bb.layer_min = rr_graph_->node_layer(RRNodeId(target_node)); + target_bb.layer_max = rr_graph_->node_layer(RRNodeId(target_node)); } // For each node associated with the current heap element, expand all of it's neighbors @@ -537,6 +514,9 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, int to_ylow = rr_graph_->node_ylow(to_node); int to_xhigh = rr_graph_->node_xhigh(to_node); int to_yhigh = rr_graph_->node_yhigh(to_node); + int to_layer = rr_graph_->node_layer(to_node); + + VTR_ASSERT(bounding_box.layer_max < g_vpr_ctx.device().grid.get_num_layers()); // BB-pruning // Disable BB-pruning if RCV is enabled, as this can make it harder for circuits with high negative hold slack to resolve this @@ -544,15 +524,19 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, if ((to_xhigh < bounding_box.xmin // Strictly left of BB left-edge || to_xlow > bounding_box.xmax // Strictly right of BB right-edge || to_yhigh < bounding_box.ymin // Strictly below BB bottom-edge - || to_ylow > bounding_box.ymax) // Strictly above BB top-edge + || to_ylow > bounding_box.ymax + || to_layer < bounding_box.layer_min + || to_layer > bounding_box.layer_max) // Strictly above BB top-edge && !rcv_path_manager.is_enabled()) { VTR_LOGV_DEBUG(router_debug_, " Pruned expansion of node %d edge %zu -> %d" - " (to node location %d,%dx%d,%d outside of expanded" - " net bounding box %d,%dx%d,%d)\n", + " (to node location %d,%d,%d x %d,%d,%d outside of expanded" + " net bounding box %d,%d,%d x %d,%d,%d)\n", from_node, size_t(from_edge), size_t(to_node), - to_xlow, to_ylow, to_xhigh, to_yhigh, - bounding_box.xmin, bounding_box.ymin, bounding_box.xmax, bounding_box.ymax); + to_xlow, to_ylow, to_layer, + to_xhigh, to_yhigh, to_layer, + bounding_box.xmin, bounding_box.ymin, bounding_box.layer_min, + bounding_box.xmax, bounding_box.ymax, bounding_box.layer_max); return; /* Node is outside (expanded) bounding box. */ } @@ -568,14 +552,18 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, if (to_xlow < target_bb.xmin || to_ylow < target_bb.ymin || to_xhigh > target_bb.xmax - || to_yhigh > target_bb.ymax) { + || to_yhigh > target_bb.ymax + || to_layer < target_bb.layer_min + || to_layer > target_bb.layer_max) { VTR_LOGV_DEBUG(router_debug_, " Pruned expansion of node %d edge %zu -> %d" - " (to node is IPIN at %d,%dx%d,%d which does not" - " lead to target block %d,%dx%d,%d)\n", + " (to node is IPIN at %d,%d,%d x %d,%d,%d which does not" + " lead to target block %d,%d,%d x %d,%d,%d)\n", from_node, size_t(from_edge), size_t(to_node), - to_xlow, to_ylow, to_xhigh, to_yhigh, - target_bb.xmin, target_bb.ymin, target_bb.xmax, target_bb.ymax); + to_xlow, to_ylow, to_layer, + to_xhigh, to_yhigh, to_layer, + target_bb.xmin, target_bb.ymin, target_bb.layer_min, + target_bb.xmax, target_bb.ymax, target_bb.layer_max); return; } } @@ -929,6 +917,9 @@ void ConnectionRouter::add_route_tree_to_heap( /* Pre-order depth-first traversal */ // IPINs and SINKS are not re_expanded if (rt_node.re_expand) { + if (target_node.is_valid() && !has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), RRNodeId(target_node), only_opin_inter_layer)) { + return; + } add_route_tree_node_to_heap(rt_node, target_node, cost_params, @@ -1021,6 +1012,9 @@ static t_bb adjust_highfanout_bounding_box(t_bb highfanout_bb) { bb.xmax += HIGH_FANOUT_BB_FAC; bb.ymax += HIGH_FANOUT_BB_FAC; + bb.layer_min = highfanout_bb.layer_min; + bb.layer_max = highfanout_bb.layer_max; + return bb; } @@ -1054,6 +1048,8 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( highfanout_bb.xmax = rr_graph_->node_xhigh(target_node); highfanout_bb.ymin = rr_graph_->node_ylow(target_node); highfanout_bb.ymax = rr_graph_->node_yhigh(target_node); + highfanout_bb.layer_min = rr_graph_->node_layer(target_node); + highfanout_bb.layer_max = rr_graph_->node_layer(target_node); //Add existing routing starting from the target bin. //If the target's bin has insufficient existing routing add from the surrounding bins @@ -1077,6 +1073,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( continue; } + if (!has_path_to_sink(rr_nodes_, rr_graph_, RRNodeId(rt_node.inode), target_node, only_opin_inter_layer)) { + continue; + } // Put the node onto the heap add_route_tree_node_to_heap(rt_node, target_node, cost_params, true); @@ -1085,6 +1084,8 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( highfanout_bb.ymin = std::min(highfanout_bb.ymin, rr_graph_->node_ylow(rr_node_to_add)); highfanout_bb.xmax = std::max(highfanout_bb.xmax, rr_graph_->node_xhigh(rr_node_to_add)); highfanout_bb.ymax = std::max(highfanout_bb.ymax, rr_graph_->node_yhigh(rr_node_to_add)); + highfanout_bb.layer_min = std::min(highfanout_bb.layer_min, rr_graph_->node_layer(rr_node_to_add)); + highfanout_bb.layer_max = std::max(highfanout_bb.layer_max, rr_graph_->node_layer(rr_node_to_add)); if (is_flat_) { if (rr_graph_->node_type(rr_node_to_add) == CHANY || rr_graph_->node_type(rr_node_to_add) == CHANX) { chan_nodes_added++; @@ -1121,6 +1122,84 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( return bounding_box; } +static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes, + const RRGraphView* rr_graph, + RRNodeId from_node, + RRNodeId sink_node, + bool is_inter_layer_opin_connection) { + int sink_layer = rr_graph->node_layer(sink_node); + + if (rr_graph->node_layer(from_node) == sink_layer || rr_graph->node_type(from_node) == SOURCE || !is_inter_layer_opin_connection) { + return true; + } else if (rr_graph->node_type(from_node) == CHANX || rr_graph->node_type(from_node) == CHANY || rr_graph->node_type(from_node) == IPIN) { + return false; + } else { + VTR_ASSERT(rr_graph->node_type(from_node) == OPIN && is_inter_layer_opin_connection); + auto edges = rr_nodes.edge_range(from_node); + + for (RREdgeId from_edge : edges) { + RRNodeId to_node = rr_nodes.edge_sink_node(from_edge); + if (rr_graph->node_layer(to_node) == sink_layer) { + return true; + } + } + return false; + } +} + +static inline bool relevant_node_to_target(const RRGraphView* rr_graph, + RRNodeId node_to_add, + RRNodeId target_node) { + VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK); + auto node_to_add_type = rr_graph->node_type(node_to_add); + if (node_to_add_type == t_rr_type::OPIN || node_to_add_type == t_rr_type::SOURCE || node_to_add_type == t_rr_type::CHANX || node_to_add_type == t_rr_type::CHANY || node_to_add_type == SINK) { + return true; + } else if (node_in_same_physical_tile(node_to_add, target_node)) { + VTR_ASSERT(node_to_add_type == IPIN); + return true; + } + return false; +} + +static inline void update_router_stats(const DeviceContext& device_ctx, + const RRGraphView* rr_graph, + RouterStats* router_stats, + RRNodeId rr_node_id, + bool is_push) { + if (is_push) { + router_stats->heap_pushes++; + } else { + router_stats->heap_pops++; + } + + auto node_type = rr_graph->node_type(rr_node_id); + VTR_ASSERT(node_type != NUM_RR_TYPES); + t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id), + rr_graph->node_ylow(rr_node_id), + rr_graph->node_layer(rr_node_id)}); + + if (is_inter_cluster_node(physical_type, + node_type, + rr_graph->node_ptc_num(rr_node_id))) { + if (is_push) { + router_stats->inter_cluster_node_pushes++; + router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->inter_cluster_node_pops++; + router_stats->inter_cluster_node_type_cnt_pops[node_type]++; + } + + } else { + if (is_push) { + router_stats->intra_cluster_node_pushes++; + router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->intra_cluster_node_pops++; + router_stats->intra_cluster_node_type_cnt_pops[node_type]++; + } + } +} + std::unique_ptr make_connection_router(e_heap_type heap_type, const DeviceGrid& grid, const RouterLookahead& router_lookahead, diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h index 5834e852409..093ab8fed83 100644 --- a/vpr/src/route/connection_router.h +++ b/vpr/src/route/connection_router.h @@ -47,6 +47,7 @@ class ConnectionRouter : public ConnectionRouterInterface { , router_debug_(false) { heap_.init_heap(grid); heap_.set_prune_limit(rr_nodes_.size(), kHeapPruneFactor * rr_nodes_.size()); + only_opin_inter_layer = (grid.get_num_layers() > 1) && inter_layer_connections_limited_to_opin(*rr_graph); } // Clear's the modified list. Should be called after reset_path_costs @@ -286,6 +287,8 @@ class ConnectionRouter : public ConnectionRouterInterface { HeapImplementation heap_; bool router_debug_; + bool only_opin_inter_layer; + // The path manager for RCV, keeps track of the route tree as a set, also manages the allocation of the heap types PathManager rcv_path_manager; }; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 99d116b0de6..2a2f9cb787e 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -835,6 +835,8 @@ vtr::vector load_route_bb(const Netlist<>& net_list, full_device_bounding_box.ymin = 0; full_device_bounding_box.xmax = device_ctx.grid.width() - 1; full_device_bounding_box.ymax = device_ctx.grid.height() - 1; + full_device_bounding_box.layer_min = 0; + full_device_bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; } auto nets = net_list.nets(); @@ -905,6 +907,8 @@ t_bb load_net_route_bb(const Netlist<>& net_list, int ymin = rr_graph.node_ylow(driver_rr); int xmax = rr_graph.node_xhigh(driver_rr); int ymax = rr_graph.node_yhigh(driver_rr); + int layer_min = rr_graph.node_layer(driver_rr); + int layer_max = rr_graph.node_layer(driver_rr); auto net_sinks = net_list.net_sinks(net_id); for (size_t ipin = 1; ipin < net_sinks.size() + 1; ++ipin) { //Start at 1 since looping through sinks @@ -914,10 +918,15 @@ t_bb load_net_route_bb(const Netlist<>& net_list, VTR_ASSERT(rr_graph.node_xlow(sink_rr) <= rr_graph.node_xhigh(sink_rr)); VTR_ASSERT(rr_graph.node_ylow(sink_rr) <= rr_graph.node_yhigh(sink_rr)); + VTR_ASSERT(rr_graph.node_layer(sink_rr) >= 0); + VTR_ASSERT(rr_graph.node_layer(sink_rr) <= device_ctx.grid.get_num_layers() - 1); + xmin = std::min(xmin, rr_graph.node_xlow(sink_rr)); xmax = std::max(xmax, rr_graph.node_xhigh(sink_rr)); ymin = std::min(ymin, rr_graph.node_ylow(sink_rr)); ymax = std::max(ymax, rr_graph.node_yhigh(sink_rr)); + layer_min = std::min(layer_min, rr_graph.node_layer(sink_rr)); + layer_max = std::max(layer_max, rr_graph.node_layer(sink_rr)); } /* Want the channels on all 4 sides to be usuable, even if bb_factor = 0. */ @@ -933,6 +942,8 @@ t_bb load_net_route_bb(const Netlist<>& net_list, bb.xmax = std::min(xmax + bb_factor, device_ctx.grid.width() - 1); bb.ymin = std::max(ymin - bb_factor, 0); bb.ymax = std::min(ymax + bb_factor, device_ctx.grid.height() - 1); + bb.layer_min = layer_min; + bb.layer_max = layer_max; return bb; } diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp index 96e6464f62b..b3a6dda3b72 100644 --- a/vpr/src/route/route_parallel.cpp +++ b/vpr/src/route/route_parallel.cpp @@ -985,7 +985,9 @@ RouteIterResults route_partition_tree(tbb::task_group& g, 0, (int)(device_ctx.grid.width() - 1), 0, - (int)(device_ctx.grid.height() - 1)}; + (int)(device_ctx.grid.height() - 1), + 0, + (int)(device_ctx.grid.get_num_layers() - 1)}; tree.root().nets.push_back(net_id); } } diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 62930ad2555..2b497066e32 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1908,8 +1908,10 @@ t_bb calc_current_bb(const RouteTree& tree) { t_bb bb; bb.xmin = grid.width() - 1; bb.ymin = grid.height() - 1; + bb.layer_min = grid.get_num_layers() - 1; bb.xmax = 0; bb.ymax = 0; + bb.layer_max = 0; for (auto& rt_node : tree.all_nodes()) { //The router interprets RR nodes which cross the boundary as being @@ -1918,8 +1920,10 @@ t_bb calc_current_bb(const RouteTree& tree) { //and xlow/ylow for xmax/ymax calculations bb.xmin = std::min(bb.xmin, rr_graph.node_xhigh(rt_node.inode)); bb.ymin = std::min(bb.ymin, rr_graph.node_yhigh(rt_node.inode)); + bb.layer_min = std::min(bb.layer_min, rr_graph.node_layer(rt_node.inode)); bb.xmax = std::max(bb.xmax, rr_graph.node_xlow(rt_node.inode)); bb.ymax = std::max(bb.ymax, rr_graph.node_ylow(rt_node.inode)); + bb.layer_max = std::max(bb.layer_max, rr_graph.node_layer(rt_node.inode)); } VTR_ASSERT(bb.xmin <= bb.xmax); diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index 4e2274c406f..eac8fdf28c4 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -23,7 +23,11 @@ RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list, is_flat) , is_flat_(is_flat) {} -bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, float* net_delay) { +bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, + RRNodeId sink_node, + const t_router_opts& router_opts, + float* net_delay, + int layer_num) { /* Returns true as long as found some way to hook up this net, even if that * * way resulted in overuse of resources (congestion). If there is no way * * to route this net, even ignoring congestion, it returns false. In this * @@ -54,6 +58,14 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, RRNodeId sink_no bounding_box.xmax = device_ctx.grid.width() + 1; bounding_box.ymin = 0; bounding_box.ymax = device_ctx.grid.height() + 1; + // If layer num is not specified, it means the BB should cover all layers + if (layer_num == OPEN) { + bounding_box.layer_min = 0; + bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; + } else { + bounding_box.layer_min = layer_num; + bounding_box.layer_max = layer_num; + } t_conn_cost_params cost_params; cost_params.criticality = 1.; @@ -81,7 +93,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, RRNodeId sink_no true); if (found_path) { - VTR_ASSERT(RRNodeId(cheapest.index) == sink_node); + VTR_ASSERT(cheapest.index == sink_node); vtr::optional rt_node_of_sink; std::tie(std::ignore, rt_node_of_sink) = tree.update_from_heap(&cheapest, OPEN, nullptr, is_flat_); @@ -111,13 +123,15 @@ vtr::vector calculate_all_path_delays_from_rr_node(RRNodeId src vtr::vector path_delays_to(device_ctx.rr_graph.num_nodes(), std::numeric_limits::quiet_NaN()); - RouteTree tree((RRNodeId(src_rr_node))); + RouteTree tree(src_rr_node); t_bb bounding_box; bounding_box.xmin = 0; bounding_box.xmax = device_ctx.grid.width() + 1; bounding_box.ymin = 0; bounding_box.ymax = device_ctx.grid.height() + 1; + bounding_box.layer_min = 0; + bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; t_conn_cost_params cost_params; cost_params.criticality = 1.; diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h index d4dcbb5dac7..11d8eb25f1d 100644 --- a/vpr/src/route/router_delay_profiling.h +++ b/vpr/src/route/router_delay_profiling.h @@ -13,7 +13,24 @@ class RouterDelayProfiler { RouterDelayProfiler(const Netlist<>& net_list, const RouterLookahead* lookahead, bool is_flat); - bool calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, float* net_delay); + + /** + * @brief Returns true as long as found some way to hook up this net, even if that + * way resulted in overuse of resources (congestion). If there is no way + * to route this net, even ignoring congestion, it returns false. In this + * case the rr_graph is disconnected and you can give up. + * @param source_node + * @param sink_node + * @param router_opts + * @param net_delay + * @param layer_num + * @return + */ + bool calculate_delay(RRNodeId source_node, + RRNodeId sink_node, + const t_router_opts& router_opts, + float* net_delay, + int layer_num); private: const Netlist<>& net_list_; diff --git a/vpr/src/route/router_lookahead_cost_map.cpp b/vpr/src/route/router_lookahead_cost_map.cpp index 9e3de711d9d..c00257a1ce4 100644 --- a/vpr/src/route/router_lookahead_cost_map.cpp +++ b/vpr/src/route/router_lookahead_cost_map.cpp @@ -397,6 +397,8 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat * the cost map data structures, exploiting the capnp serialization. */ +#ifdef VTR_ENABLE_CAPNPROTO + static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { out->delay = in.getDelay(); out->congestion = in.getCongestion(); @@ -491,3 +493,15 @@ void CostMap::write(const std::string& file) const { writeMessageToFile(file, &builder); } + +#else + +void CostMap::read(const std::string& /*file*/) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Read CostMap requires the support of capnp"); +} + +void CostMap::write(const std::string& /*file*/) const { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Write CostMap requires the support of capnp"); +} + +#endif diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp index fa5a9a9849e..b176372e686 100644 --- a/vpr/src/route/router_lookahead_extended_map.cpp +++ b/vpr/src/route/router_lookahead_extended_map.cpp @@ -419,7 +419,8 @@ std::pair ExtendedMapLookahead::run_dijkstra(RRNodeId start_node, // compute the cost maps for lookahead void ExtendedMapLookahead::compute(const std::vector& segment_inf) { - this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_); + std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_); + this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead(); vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); @@ -614,7 +615,8 @@ void ExtendedMapLookahead::write(const std::string& file) const { void ExtendedMapLookahead::read(const std::string& file) { cost_map_.read(file); - this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_); + std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_); + this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead(); } void ExtendedMapLookahead::write(const std::string& file) const { diff --git a/vpr/src/route/router_lookahead_extended_map.h b/vpr/src/route/router_lookahead_extended_map.h index 424a1dfa23d..ccd3faaa2ad 100644 --- a/vpr/src/route/router_lookahead_extended_map.h +++ b/vpr/src/route/router_lookahead_extended_map.h @@ -19,6 +19,9 @@ class ExtendedMapLookahead : public RouterLookahead { ///& internal_opin_global_cost_map); +/** + * @brief Iterate over all of the wire segments accessible from the SOURCE/OPIN (stored in src_opin_delay_map) and return the minimum cost (congestion and delay) across them to the sink + * @param src_opin_delay_map + * @param layer_num + * @param delta_x + * @param delta_y + * @return (delay, congestion) + */ +static std::pair get_cost_from_src_opin(const std::map& src_opin_delay_map, + int layer_num, + int delta_x, + int delta_y); + // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, const std::string& file); @@ -295,6 +308,10 @@ static void print_wire_cost_map(int layer_num, const std::vector& static void print_router_cost_map(const t_routing_cost_map& router_cost_map); /******** Interface class member function definitions ********/ +MapLookahead::MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat) + : det_routing_arch_(det_routing_arch) + , is_flat_(is_flat) {} + float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_node, const t_conn_cost_params& params, float R_upstream) const { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -430,6 +447,7 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ int delta_x, delta_y; int from_layer_num = rr_graph.node_layer(from_node); + int to_layer_num = rr_graph.node_layer(to_node); get_xy_deltas(from_node, to_node, &delta_x, &delta_y); delta_x = abs(delta_x); delta_y = abs(delta_y); @@ -444,66 +462,31 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final //delay to reach the sink. - t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node), - rr_graph.node_ylow(from_node), - from_layer_num}); + t_physical_tile_type_ptr from_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node), + rr_graph.node_ylow(from_node), + from_layer_num}); - auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type); + auto from_tile_index = std::distance(&device_ctx.physical_tile_types[0], from_tile_type); auto from_ptc = rr_graph.node_ptc_num(from_node); - if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) { - //During lookahead profiling we were unable to find any wires which connected - //to this PTC. - // - //This can sometimes occur at very low channel widths (e.g. during min W search on - //small designs) where W discretization combined with fraction Fc may cause some - //pins/sources to be left disconnected. - // - //Such RR graphs are of course unroutable, but that should be determined by the - //router. So just return an arbitrary value here rather than error. - - //We choose to return the largest (non-infinite) value possible, but scaled - //down by a large factor to maintain some dynaimc range in case this value ends - //up being processed (e.g. by the timing analyzer). - // - //The cost estimate should still be *extremely* large compared to a typical delay, and - //so should ensure that the router de-prioritizes exploring this path, but does not - //forbid the router from trying. - expected_delay_cost = std::numeric_limits::max() / 1e12; - expected_cong_cost = std::numeric_limits::max() / 1e12; - } else { - //From the current SOURCE/OPIN we look-up the wiretypes which are reachable - //and then add the estimates from those wire types for the distance of interest. - //If there are multiple options we use the minimum value. - for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) { - const util::t_reachable_wire_inf& reachable_wire_inf = kv.second; - - Cost_Entry wire_cost_entry; - if (reachable_wire_inf.wire_rr_type == SINK) { - //Some pins maybe reachable via a direct (OPIN -> IPIN) connection. - //In the lookahead, we treat such connections as 'special' wire types - //with no delay/congestion cost - wire_cost_entry.delay = 0; - wire_cost_entry.congestion = 0; - } else { - //For an actual accessible wire, we query the wire look-up to get it's - //delay and congestion cost estimates - wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, - reachable_wire_inf.wire_seg_index, - from_layer_num, - delta_x, - delta_y); - } - - float this_delay_cost = (params.criticality) * (reachable_wire_inf.delay + wire_cost_entry.delay); - float this_cong_cost = (1. - params.criticality) * (reachable_wire_inf.congestion + wire_cost_entry.congestion); - - expected_delay_cost = std::min(expected_delay_cost, this_delay_cost); - expected_cong_cost = std::min(expected_cong_cost, this_cong_cost); - } + // Currently, we assume inter-layer connections are only from a block output pin to another layer. Thus, if the from and to layers are different, + // We use src_opin_inter_layer_delays. + if (from_layer_num == to_layer_num) { + std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc], + from_layer_num, + delta_x, + delta_y); + } else if (from_layer_num != to_layer_num) { + std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num], + to_layer_num, + delta_x, + delta_y); } + expected_delay_cost *= params.criticality; + expected_cong_cost *= (1 - params.criticality); + VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost), vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", rr_node_arch_name(from_node, is_flat_).c_str(), @@ -516,7 +499,6 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ .c_str()); } else if (from_type == CHANX || from_type == CHANY) { - VTR_ASSERT_SAFE(from_type == CHANX || from_type == CHANY); //When estimating costs from a wire, we directly look-up the result in the wire lookahead (f_wire_cost_map) auto from_cost_index = rr_graph.node_cost_index(from_node); @@ -524,29 +506,34 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ VTR_ASSERT(from_seg_index >= 0); - /* now get the expected cost from our lookahead map */ - Cost_Entry cost_entry = get_wire_cost_entry(from_type, - from_seg_index, - from_layer_num, - delta_x, - delta_y); - - float expected_delay = cost_entry.delay; - float expected_cong = cost_entry.congestion; - - expected_delay_cost = params.criticality * expected_delay; - expected_cong_cost = (1.0 - params.criticality) * expected_cong; - - VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost), - vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(from_node, is_flat_).c_str(), - describe_rr_node(rr_graph, - device_ctx.grid, - device_ctx.rr_indexed_data, - from_node, - is_flat_) - .c_str()) - .c_str()); + // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node + // is of type CHANX/CHANY, and the sink node is on the other layer, there will no path from that node to the sink + if (from_layer_num != to_layer_num) { + expected_delay_cost = std::numeric_limits::max() / 1e12; + expected_cong_cost = std::numeric_limits::max() / 1e12; + } else { + /* now get the expected cost from our lookahead map */ + Cost_Entry cost_entry = get_wire_cost_entry(from_type, + from_seg_index, + from_layer_num, + delta_x, + delta_y); + expected_delay_cost = cost_entry.delay; + expected_cong_cost = cost_entry.congestion; + + VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost), + vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", + rr_node_arch_name(from_node, is_flat_).c_str(), + describe_rr_node(rr_graph, + device_ctx.grid, + device_ctx.rr_indexed_data, + from_node, + is_flat_) + .c_str()) + .c_str()); + } + expected_delay_cost *= params.criticality; + expected_cong_cost *= (1 - params.criticality); } else if (from_type == IPIN) { /* Change if you're allowing route-throughs */ return std::make_pair(0., device_ctx.rr_indexed_data[RRIndexedDataId(SINK_COST_INDEX)].base_cost); } else { /* Change this if you want to investigate route-throughs */ @@ -565,7 +552,7 @@ void MapLookahead::compute(const std::vector& segment_inf) { //Next, compute which wire types are accessible (and the cost to reach them) //from the different physical tile type's SOURCEs & OPINs - this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_); + std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_); } void MapLookahead::compute_intra_tile() { @@ -588,7 +575,7 @@ void MapLookahead::read(const std::string& file) { //Next, compute which wire types are accessible (and the cost to reach them) //from the different physical tile type's SOURCEs & OPINs - this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_); + std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_); } void MapLookahead::read_intra_cluster(const std::string& file) { @@ -1503,6 +1490,67 @@ static void min_global_cost_map(vtr::NdMatrix& internal_opi } } +static std::pair get_cost_from_src_opin(const std::map& src_opin_delay_map, + int layer_num, + int delta_x, + int delta_y) { + float expected_delay_cost = std::numeric_limits::infinity(); + float expected_cong_cost = std::numeric_limits::infinity(); + if (src_opin_delay_map.empty()) { + //During lookahead profiling we were unable to find any wires which connected + //to this PTC. + // + //This can sometimes occur at very low channel widths (e.g. during min W search on + //small designs) where W discretization combined with fraction Fc may cause some + //pins/sources to be left disconnected. + // + //Such RR graphs are of course unroutable, but that should be determined by the + //router. So just return an arbitrary value here rather than error. + + //We choose to return the largest (non-infinite) value possible, but scaled + //down by a large factor to maintain some dynaimc range in case this value ends + //up being processed (e.g. by the timing analyzer). + // + //The cost estimate should still be *extremely* large compared to a typical delay, and + //so should ensure that the router de-prioritizes exploring this path, but does not + //forbid the router from trying. + expected_delay_cost = std::numeric_limits::max() / 1e12; + expected_cong_cost = std::numeric_limits::max() / 1e12; + } else { + //From the current SOURCE/OPIN we look-up the wiretypes which are reachable + //and then add the estimates from those wire types for the distance of interest. + //If there are multiple options we use the minimum value. + for (const auto& kv : src_opin_delay_map) { + const util::t_reachable_wire_inf& reachable_wire_inf = kv.second; + + Cost_Entry wire_cost_entry; + if (reachable_wire_inf.wire_rr_type == SINK) { + //Some pins maybe reachable via a direct (OPIN -> IPIN) connection. + //In the lookahead, we treat such connections as 'special' wire types + //with no delay/congestion cost + wire_cost_entry.delay = 0; + wire_cost_entry.congestion = 0; + } else { + //For an actual accessible wire, we query the wire look-up to get it's + //delay and congestion cost estimates + wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, + reachable_wire_inf.wire_seg_index, + layer_num, + delta_x, + delta_y); + } + + float this_delay_cost = reachable_wire_inf.delay + wire_cost_entry.delay; + float this_cong_cost = reachable_wire_inf.congestion + wire_cost_entry.congestion; + + expected_delay_cost = std::min(expected_delay_cost, this_delay_cost); + expected_cong_cost = std::min(expected_cong_cost, this_cong_cost); + } + } + + return std::make_pair(expected_delay_cost, expected_cong_cost); +} + // // When writing capnp targetted serialization, always allow compilation when // VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception @@ -1518,7 +1566,7 @@ void read_router_lookahead(const std::string& /*file*/) { VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::read " DISABLE_ERROR); } -void DeltaDelayModel::write(const std::string& /*file*/) const { +void write_router_lookahead(const std::string& file) { VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::write " DISABLE_ERROR); } diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h index f3e82531510..d6340acac85 100644 --- a/vpr/src/route/router_lookahead_map.h +++ b/vpr/src/route/router_lookahead_map.h @@ -8,9 +8,7 @@ class MapLookahead : public RouterLookahead { public: - explicit MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat) - : det_routing_arch_(det_routing_arch) - , is_flat_(is_flat) {} + explicit MapLookahead(const t_det_routing_arch& det_routing_arch, bool is_flat); private: //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types @@ -21,6 +19,9 @@ class MapLookahead : public RouterLookahead { std::unordered_map> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost // Lookup table to store the minimum cost for each dx and dy vtr::NdMatrix distance_based_min_cost; // [layer_num][dx][dy] -> cost + // [tile_index][from_layer_num][to_layer_num] -> pair + util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays; + const t_det_routing_arch& det_routing_arch_; bool is_flat_; diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index e3141e947c3..01c0e79d16c 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -19,7 +19,8 @@ #include "route_common.h" #include "route_timing.h" -static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays); +static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays, util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, bool is_multi_layer); + static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays); static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev); @@ -305,21 +306,32 @@ template void expand_dijkstra_neighbours(const RRGraphView& rr_graph, std::vector, std::greater>* pq); -t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) { +std::pair compute_router_src_opin_lookahead(bool is_flat) { vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead"); auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_graph; - t_src_opin_delays src_opin_delays; + int num_layers = device_ctx.grid.get_num_layers(); + bool is_multi_layer = (num_layers > 1); - src_opin_delays.resize(device_ctx.grid.get_num_layers()); - for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + t_src_opin_delays src_opin_delays; + src_opin_delays.resize(num_layers); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size()); } + t_src_opin_inter_layer_delays src_opin_inter_layer_delays; + if (is_multi_layer) { + src_opin_inter_layer_delays.resize(num_layers); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + int num_physical_tiles = (int)device_ctx.physical_tile_types.size(); + src_opin_inter_layer_delays[layer_num].resize(num_physical_tiles); + } + } + //We assume that the routing connectivity of each instance of a physical tile is the same, //and so only measure one instance of each type - for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int layer_num = 0; layer_num < num_layers; layer_num++) { for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], layer_num) == 0) { continue; @@ -356,11 +368,22 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) { if (ptc >= int(src_opin_delays[layer_num][itile].size())) { src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional... + if (is_multi_layer) { + size_t old_size = src_opin_inter_layer_delays[layer_num][itile].size(); + src_opin_inter_layer_delays[layer_num][itile].resize(ptc + 1); + for (size_t i = old_size; i < src_opin_inter_layer_delays[layer_num][itile].size(); ++i) { + src_opin_inter_layer_delays[layer_num][itile][i].resize(num_layers); + } + } } //Find the wire types which are reachable from inode and record them and //the cost to reach them - dijkstra_flood_to_wires(itile, node_id, src_opin_delays); + dijkstra_flood_to_wires(itile, + node_id, + src_opin_delays, + src_opin_inter_layer_delays, + is_multi_layer); if (src_opin_delays[layer_num][itile][ptc].empty()) { VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", @@ -383,7 +406,7 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) { } } - return src_opin_delays; + return std::make_pair(src_opin_delays, src_opin_inter_layer_delays); } t_chan_ipins_delays compute_router_chan_ipin_lookahead() { @@ -466,7 +489,11 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g } // namespace util -static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_delays& src_opin_delays) { +static void dijkstra_flood_to_wires(int itile, + RRNodeId node, + util::t_src_opin_delays& src_opin_delays, + util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, + bool is_multi_layer) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -516,6 +543,7 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d pq.pop(); e_rr_type curr_rr_type = rr_graph.node_type(curr.node); + int curr_layer_num = rr_graph.node_layer(curr.node); if (curr_rr_type == CHANX || curr_rr_type == CHANY || curr_rr_type == SINK) { //We stop expansion at any CHANX/CHANY/SINK int seg_index; @@ -535,12 +563,20 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d } //Keep costs of the best path to reach each wire type - if (!src_opin_delays[node_layer_num][itile][ptc].count(seg_index) - || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay) { + if ((!src_opin_delays[node_layer_num][itile][ptc].count(seg_index) + || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay) + && curr_layer_num == node_layer_num) { src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type; src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index; src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay; src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion; + } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index) || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay) + && curr_layer_num != node_layer_num) { + // Store a CHANX/Y node or a SINK node on another layer that is reachable by the current node. + src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type; + src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index; + src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay = curr.delay; + src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].congestion = curr.congestion; } } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) { @@ -564,11 +600,6 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d continue; } - if (rr_graph.node_layer(curr.node) != node_layer_num) { - //Don't change the layer - continue; - } - t_pq_entry next; next.congestion = curr.congestion + incr_cong; //Of current node next.delay = curr.delay + incr_delay; //To reach next node diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 24f02df20f8..0245208fdf7 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -271,8 +271,11 @@ struct t_reachable_wire_inf { // SOURCE/OPIN of a given tile type. // // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned -// as the lookahead expected cost. +// as the lookahead expected cost. [opin/src layer_num][tile_index][opin/src ptc_number] -> pair typedef std::vector>>> t_src_opin_delays; +// Store the wire segments on to_layer_num reachable from a given SOURCE/OPIN +// [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair +typedef std::vector>>>> t_src_opin_inter_layer_delays; //[from pin ptc num][target src ptc num]->cost typedef std::vector> t_ipin_primitive_sink_delays; @@ -288,7 +291,13 @@ typedef std::vector> t_ipin_primitive_sink_d // and the tile's IPIN. If there are many connections to the same IPIN, the one with the minimum delay is selected. typedef std::vector>> t_chan_ipins_delays; -t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat); +/** + * @brief For each tile, iterate over its OPINs and store which segment types are accessible from each OPIN + * @param is_flat + * @return (segments accessible on the same type, segments accessible on other layer) + */ +std::pair compute_router_src_opin_lookahead(bool is_flat); + t_chan_ipins_delays compute_router_chan_ipin_lookahead(); t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph, diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 0b0bb8f90ab..ee879fea226 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -673,46 +673,47 @@ void create_rr_graph(const t_graph_type graph_type, bool echo_enabled = getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH_INDEXED_DATA); const char* echo_file_name = getEchoFileName(E_ECHO_RR_GRAPH_INDEXED_DATA); bool load_rr_graph = !det_routing_arch->read_rr_graph_filename.empty(); - if (load_rr_graph) { - if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) { - free_rr_graph(); - load_rr_file(&mutable_device_ctx.rr_graph_builder, - &mutable_device_ctx.rr_graph, - device_ctx.physical_tile_types, - segment_inf, - &mutable_device_ctx.rr_indexed_data, - &mutable_device_ctx.rr_rc_data, - grid, - device_ctx.arch_switch_inf, - graph_type, - device_ctx.arch, - &mutable_device_ctx.chan_width, - router_opts.base_cost_type, - device_ctx.virtual_clock_network_root_idx, - &det_routing_arch->wire_to_rr_ipin_switch, - &det_routing_arch->wire_to_arch_ipin_switch_between_dice, - det_routing_arch->read_rr_graph_filename.c_str(), - &det_routing_arch->read_rr_graph_filename, - router_opts.read_rr_edge_metadata, - router_opts.do_check_rr_graph, - echo_enabled, - echo_file_name, - is_flat); - if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { - mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, - router_opts.reorder_rr_graph_nodes_threshold, - router_opts.reorder_rr_graph_nodes_seed); - } + if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_graph.empty()) { + //No change in channel width, so skip re-building RR graph + if (is_flat && !device_ctx.rr_graph_is_flat) { + VTR_LOG("RR graph channel widths unchanged, intra-cluster resources should be added...\n"); + } else { + VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n"); + return; } } else { - if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_graph.empty()) { - //No change in channel width, so skip re-building RR graph - if (is_flat && !device_ctx.rr_graph_is_flat) { - VTR_LOG("RR graph channel widths unchanged, intra-cluster resources should be added...\n"); - } else { - VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n"); - return; + if (load_rr_graph) { + if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) { + free_rr_graph(); + + load_rr_file(&mutable_device_ctx.rr_graph_builder, + &mutable_device_ctx.rr_graph, + device_ctx.physical_tile_types, + segment_inf, + &mutable_device_ctx.rr_indexed_data, + &mutable_device_ctx.rr_rc_data, + grid, + device_ctx.arch_switch_inf, + graph_type, + device_ctx.arch, + &mutable_device_ctx.chan_width, + router_opts.base_cost_type, + device_ctx.virtual_clock_network_root_idx, + &det_routing_arch->wire_to_rr_ipin_switch, + &det_routing_arch->wire_to_arch_ipin_switch_between_dice, + det_routing_arch->read_rr_graph_filename.c_str(), + &det_routing_arch->read_rr_graph_filename, + router_opts.read_rr_edge_metadata, + router_opts.do_check_rr_graph, + echo_enabled, + echo_file_name, + is_flat); + if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { + mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, + router_opts.reorder_rr_graph_nodes_threshold, + router_opts.reorder_rr_graph_nodes_seed); + } } } else { free_rr_graph(); diff --git a/vpr/src/route/rr_graph_area.cpp b/vpr/src/route/rr_graph_area.cpp index 996723ad11e..3ac736eebd8 100644 --- a/vpr/src/route/rr_graph_area.cpp +++ b/vpr/src/route/rr_graph_area.cpp @@ -492,7 +492,7 @@ void count_unidir_routing_transistors(std::vector& /*segment_inf* VTR_LOG("\n"); VTR_LOG("Routing area (in minimum width transistor areas)...\n"); - VTR_LOG("\tTotal routing area: %#g, per logic tile: %#g\n", ntrans, ntrans / (float)(device_ctx.grid.width() * device_ctx.grid.height())); + VTR_LOG("\tTotal routing area: %#g, per logic tile: %#g\n", ntrans, ntrans / (float)(device_ctx.grid.get_num_layers() * device_ctx.grid.width() * device_ctx.grid.height())); } static float get_cblock_trans(int* num_inputs_to_cblock, int wire_to_ipin_switch, int max_inputs_to_cblock, float trans_sram_bit) { diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 6157c9b980d..b200a06ba7f 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -2508,3 +2508,25 @@ void add_pb_child_to_list(std::list& pb_list, const t_pb* parent_pb } } } + +float get_min_cross_layer_delay(const std::vector& arch_switch_inf, + const std::vector& segment_inf, + const int wire_to_ipin_arch_sw_id) { + float min_delay = std::numeric_limits::max(); + + // Check whether the inter-layer switch type for connection block is defined. If it is, + // get the delay of it. + if (wire_to_ipin_arch_sw_id != OPEN) { + min_delay = arch_switch_inf[wire_to_ipin_arch_sw_id].Tdel(); + } + + // Iterate over inter-layer switch types of segments to find the minimum delay + for (const auto& seg_inf : segment_inf) { + int cross_layer_sw_arch_id = seg_inf.arch_opin_between_dice_switch; + if (cross_layer_sw_arch_id != OPEN) { + min_delay = std::min(min_delay, arch_switch_inf[cross_layer_sw_arch_id].Tdel()); + } + } + + return min_delay; +} diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 1ba3dcb35b7..75842967cd1 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -311,4 +311,16 @@ t_arch_switch_inf create_internal_arch_sw(float delay); void add_pb_child_to_list(std::list& pb_list, const t_pb* parent_pb); +/** + * @brief Iterate over all inter-layer switch types and return the minimum delay of it. + * useful four router lookahead to to have some estimate of the cost of crossing a layer + * @param arch_switch_inf + * @param segment_inf + * @param wire_to_ipin_arch_sw_id + * @return + */ +float get_min_cross_layer_delay(const std::vector& arch_switch_inf, + const std::vector& segment_inf, + const int wire_to_ipin_arch_sw_id); + #endif diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 6c1b54734e3..0fef4f22a84 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -34,6 +34,8 @@ static float do_one_route(RRNodeId source_node, bounding_box.xmax = device_ctx.grid.width() + 1; bounding_box.ymin = 0; bounding_box.ymax = device_ctx.grid.height() + 1; + bounding_box.layer_min = 0; + bounding_box.layer_max = device_ctx.grid.get_num_layers() - 1; t_conn_cost_params cost_params; cost_params.criticality = router_opts.max_criticality; diff --git a/vpr/test/test_place_delay_model_serdes.cpp b/vpr/test/test_place_delay_model_serdes.cpp index 818b5cc3dfe..988b3e255b4 100644 --- a/vpr/test/test_place_delay_model_serdes.cpp +++ b/vpr/test/test_place_delay_model_serdes.cpp @@ -23,12 +23,17 @@ TEST_CASE("round_trip_delta_delay_model", "[vpr]") { } } - DeltaDelayModel model(std::move(delays), false); + float min_cross_layer_delay = 0.; + + DeltaDelayModel model(min_cross_layer_delay, + std::move(delays), + false); const auto& delays1 = model.delays(); model.write(kDeltaDelayBin); - DeltaDelayModel model2(false); + DeltaDelayModel model2(min_cross_layer_delay, + false); model2.read(kDeltaDelayBin); const auto& delays2 = model2.delays(); @@ -61,15 +66,19 @@ TEST_CASE("round_trip_override_delay_model", "[vpr]") { } } } - OverrideDelayModel model(false); - auto base_model = std::make_unique(delays, false); + float min_cross_layer_delay = 0.; + OverrideDelayModel model(min_cross_layer_delay, false); + auto base_model = std::make_unique(min_cross_layer_delay, + delays, + false); model.set_base_delay_model(std::move(base_model)); model.set_delay_override(1, 2, 3, 4, 5, 6, -1); model.set_delay_override(2, 2, 3, 4, 5, 6, -2); model.write(kOverrideDelayBin); - OverrideDelayModel model2(false); + OverrideDelayModel model2(min_cross_layer_delay, + false); model2.read(kOverrideDelayBin); const auto& delays1 = model.base_delay_model()->delays(); diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp index 30772950e19..f9a5d7e5bd4 100644 --- a/vpr/test/test_vpr_constraints.cpp +++ b/vpr/test/test_vpr_constraints.cpp @@ -441,7 +441,7 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") { TEST_CASE("MacroConstraints", "[vpr]") { t_pl_macro pl_macro; PartitionRegion head_pr; - t_pl_offset offset(2, 1, 0); + t_pl_offset offset(2, 1, 0, 0); Region reg; reg.set_region_rect({5, 2, 9, 6, 0}); diff --git a/vtr_flow/arch/multi_die/aman_3d_coffe.xml b/vtr_flow/arch/multi_die/aman_3d_coffe.xml new file mode 100644 index 00000000000..1825d967a18 --- /dev/null +++ b/vtr_flow/arch/multi_die/aman_3d_coffe.xml @@ -0,0 +1,1597 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + + + + + + + + + + + + + + + + + + + + + + + + + + + clb.I1 clb.I3 clb.clk + clb.O1 clb.O2 clb.O3 clb.O4 clb.O5 + clb.I2 clb.I4 clb.clk + clb.O6 clb.O7 clb.O8 clb.O9 clb.O10 + + + + + + + + + + + + + + + + + + + + + + + + + + + dsp_top.dsp_I1 dsp_top.reset + dsp_top.dsp_I2 dsp_top.clk + dsp_top.chainin dsp_top.scanin + dsp_top.chainout dsp_top.scanout + dsp_top.result[36:0] + dsp_top.result[73:37] + + + + + + + + + + + + + + + + + + + + memory.addr1[0] memory.addr1[8] memory.addr2[5] memory.data[2] memory.data[10] memory.data[18] memory.data[26] memory.data[34] memory.clk memory.addr1[1] memory.addr1[9] memory.addr2[6] memory.data[3] memory.data[11] memory.data[19] memory.data[27] memory.data[35] + memory.addr1[2] memory.addr1[10] memory.addr2[7] memory.data[4] memory.data[12] memory.data[20] memory.data[28] memory.data[36] memory.addr1[3] memory.addr2[0] memory.addr2[8] memory.data[5] memory.data[13] memory.data[21] memory.data[29] memory.data[37] + memory.addr1[4] memory.addr2[1] memory.addr2[9] memory.data[6] memory.data[14] memory.data[22] memory.data[30] memory.data[38] memory.addr1[5] memory.addr2[2] memory.addr2[10] memory.data[7] memory.data[15] memory.data[23] memory.data[31] memory.data[39] + memory.addr1[6] memory.addr2[3] memory.data[0] memory.data[8] memory.data[16] memory.data[24] memory.data[32] memory.we1 memory.addr1[7] memory.addr2[4] memory.data[1] memory.data[9] memory.data[17] memory.data[25] memory.data[33] memory.we2 + + memory.out[0] memory.out[8] memory.out[16] memory.out[24] memory.out[32] memory.out[1] memory.out[9] memory.out[17] memory.out[25] memory.out[33] + memory.out[2] memory.out[10] memory.out[18] memory.out[26] memory.out[34] memory.out[3] memory.out[11] memory.out[19] memory.out[27] memory.out[35] + memory.out[4] memory.out[12] memory.out[20] memory.out[28] memory.out[36] memory.out[5] memory.out[13] memory.out[21] memory.out[29] memory.out[37] + memory.out[6] memory.out[14] memory.out[22] memory.out[30] memory.out[38] memory.out[7] memory.out[15] memory.out[23] memory.out[31] memory.out[39] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 + + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/arch/multi_die/aman_3d_limited.xml b/vtr_flow/arch/multi_die/aman_3d_limited.xml new file mode 100644 index 00000000000..7d67b2c996c --- /dev/null +++ b/vtr_flow/arch/multi_die/aman_3d_limited.xml @@ -0,0 +1,1579 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + io.inpad + io.outpad io.clock + + + + + + + + + + + + + + + + + + clb.I1 clb.I3 clb.clk clb.O[9:6] + clb.O[5:0] + clb.I2 clb.I4 clb.clk clb.O[19:16] + clb.O[15:10] + + + + + + + + + + + + + + + + + + + + + + + + + + + dsp_top.dsp_I1 dsp_top.reset + dsp_top.dsp_I2 dsp_top.clk + dsp_top.chainin dsp_top.scanin + dsp_top.chainout dsp_top.scanout + dsp_top.result[36:0] + dsp_top.result[73:37] + + + + + + + + + + + + + + + + + + + + memory.addr1[0] memory.addr1[8] memory.addr2[5] memory.data[2] memory.data[10] memory.data[18] memory.data[26] memory.data[34] memory.clk memory.addr1[1] memory.addr1[9] memory.addr2[6] memory.data[3] memory.data[11] memory.data[19] memory.data[27] memory.data[35] + memory.addr1[2] memory.addr1[10] memory.addr2[7] memory.data[4] memory.data[12] memory.data[20] memory.data[28] memory.data[36] memory.addr1[3] memory.addr2[0] memory.addr2[8] memory.data[5] memory.data[13] memory.data[21] memory.data[29] memory.data[37] + memory.addr1[4] memory.addr2[1] memory.addr2[9] memory.data[6] memory.data[14] memory.data[22] memory.data[30] memory.data[38] memory.addr1[5] memory.addr2[2] memory.addr2[10] memory.data[7] memory.data[15] memory.data[23] memory.data[31] memory.data[39] + memory.addr1[6] memory.addr2[3] memory.data[0] memory.data[8] memory.data[16] memory.data[24] memory.data[32] memory.we1 memory.addr1[7] memory.addr2[4] memory.data[1] memory.data[9] memory.data[17] memory.data[25] memory.data[33] memory.we2 + + memory.out[0] memory.out[8] memory.out[16] memory.out[24] memory.out[32] memory.out[1] memory.out[9] memory.out[17] memory.out[25] memory.out[33] + memory.out[2] memory.out[10] memory.out[18] memory.out[26] memory.out[34] memory.out[3] memory.out[11] memory.out[19] memory.out[27] memory.out[35] + memory.out[4] memory.out[12] memory.out[20] memory.out[28] memory.out[36] memory.out[5] memory.out[13] memory.out[21] memory.out[29] memory.out[37] + memory.out[6] memory.out[14] memory.out[22] memory.out[30] memory.out[38] memory.out[7] memory.out[15] memory.out[23] memory.out[31] memory.out[39] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 + 1 1 1 1 + + + + + + 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 + + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vtr_flow/parse/parse_config/common/vpr.place.txt b/vtr_flow/parse/parse_config/common/vpr.place.txt index 8713e8fe51f..64da113842b 100644 --- a/vtr_flow/parse/parse_config/common/vpr.place.txt +++ b/vtr_flow/parse/parse_config/common/vpr.place.txt @@ -1,6 +1,13 @@ #VPR Place Metrics placed_wirelength_est;vpr.out;BB estimate of min-dist \(placement\) wire length: (\d+) +#VPR Number of heap operations +total_swap;vpr.out;Swaps called\s*:\s*(\d+) +accepted_swap;vpr.out;\s*Swaps accepted\s*:\s*(\d+).* +rejected_swap;vpr.out;\s*Swaps rejected\s*:\s*(\d+).* +aborted_swap;vpr.out;\s*Swaps aborted\s*:\s*(\d+).* + + #VPR Run-time Metrics place_mem;vpr.out;.*Placement took.*\(max_rss (.*), .*\) place_time;vpr.out;\s*Placement took (.*) seconds