Skip to content

Changing subtile selection in the try_centroid_placement of initial_placement #2897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 58 additions & 12 deletions vpr/src/place/initial_placement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,25 @@ static bool is_loc_legal(const t_pl_loc& loc,
const PartitionRegion& pr,
t_logical_block_type_ptr block_type);

/**
* @brief Helper function to choose a subtile in specified location if the type is compatible and an available one exists.
*
* @param centroid The centroid location at which the subtile will be selected using its x, y, and layer.
* @param block_type Logical block type we would like to place here.
* @param block_loc_registry Information on where other blocks have been placed.
* @param pr The PartitionRegion of the block we are trying to place - represents its floorplanning constraints;
* it is the size of the whole chip if the block is not constrained.
* @param rng A random number generator to select a subtile from the available and compatible ones.
*
* @return True if the location is on the chip, legal, and at least one available subtile is found at that location;
* false otherwise.
*/
static bool find_subtile_in_location(t_pl_loc& centroid,
t_logical_block_type_ptr block_type,
const BlkLocRegistry& blk_loc_registry,
const PartitionRegion& pr,
vtr::RngContainer& rng);

/**
* @brief Calculates a centroid location for a block based on its placed connections.
*
Expand Down Expand Up @@ -340,6 +359,39 @@ static bool is_loc_legal(const t_pl_loc& loc,
return legal;
}

bool find_subtile_in_location(t_pl_loc& centroid,
t_logical_block_type_ptr block_type,
const BlkLocRegistry& blk_loc_registry,
const PartitionRegion& pr,
vtr::RngContainer& rng) {
//check if the location is on chip and legal, if yes try to update subtile
if (is_loc_on_chip({centroid.x, centroid.y, centroid.layer}) && is_loc_legal(centroid, pr, block_type)) {
//find the compatible subtiles
const auto& device_ctx = g_vpr_ctx.device();
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
const auto& type = device_ctx.grid.get_physical_type({centroid.x, centroid.y, centroid.layer});
const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index);

//filter out the occupied subtiles
const GridBlock& grid_blocks = blk_loc_registry.grid_blocks();
std::vector<int> available_sub_tiles;
available_sub_tiles.reserve(compatible_sub_tiles.size());
for (int sub_tile : compatible_sub_tiles) {
t_pl_loc pos = {centroid.x, centroid.y, sub_tile, centroid.layer};
if (!grid_blocks.block_at_location(pos)) {
available_sub_tiles.push_back(sub_tile);
}
}

if (!available_sub_tiles.empty()) {
centroid.sub_tile = available_sub_tiles[rng.irand((int)available_sub_tiles.size() - 1)];
return true;
}
}

return false;
}

static bool find_centroid_neighbor(t_pl_loc& centroid_loc,
t_logical_block_type_ptr block_type,
bool search_for_empty,
Expand Down Expand Up @@ -551,10 +603,13 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
t_pl_loc centroid_loc(OPEN, OPEN, OPEN, OPEN);
std::vector<ClusterBlockId> unplaced_blocks_to_update_their_score;

bool found_legal_subtile = false;

if (!flat_placement_info.valid) {
// If a flat placement is not provided, use the centroid of connected
// blocks which have already been placed.
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry);
found_legal_subtile = find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng);
} else {
// If a flat placement is provided, use the flat placement to get the
// centroid.
Expand All @@ -567,6 +622,7 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer}) ||
!is_loc_legal(centroid_loc, pr, block_type)) {
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry);
found_legal_subtile = find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng);
}
}

Expand All @@ -577,9 +633,8 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,

//centroid suggestion was either occupied or does not match block type
//try to find a near location that meet these requirements
bool neighbor_legal_loc = false;
if (!is_loc_legal(centroid_loc, pr, block_type)) {
neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false, blk_loc_registry, rng);
if (!found_legal_subtile) {
bool neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false, blk_loc_registry, rng);
if (!neighbor_legal_loc) { //no neighbor candidate found
return false;
}
Expand All @@ -591,15 +646,6 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
}

auto& device_ctx = g_vpr_ctx.device();
//choose the location's subtile if the centroid location is legal.
//if the location is found within the "find_centroid_neighbor", it already has a subtile
//we don't need to find one again
if (!neighbor_legal_loc) {
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
const auto& type = device_ctx.grid.get_physical_type({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index);
centroid_loc.sub_tile = compatible_sub_tiles[rng.irand((int)compatible_sub_tiles.size() - 1)];
}
int width_offset = device_ctx.grid.get_width_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
int height_offset = device_ctx.grid.get_height_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
VTR_ASSERT(width_offset == 0);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time
k6_frac_N10_mem32K_40nm.xml multiclock_output_and_latch.v common 11.99 vpr 255.45 MiB 0.11 36912 -1 -1 1 0.05 -1 -1 34700 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 261584 6 1 13 14 2 8 9 4 4 16 clb auto 101.0 MiB 0.11 13 244.4 MiB 0.04 0 0.875884 -3.21653 -0.875884 0.545 0.47 0.000264546 0.000241337 0.00754986 0.00454282 20 15 7 107788 107788 10441.3 652.579 0.66 0.0136677 0.00891098 742 1670 -1 15 14 32 32 476 268 0 0 476 268 32 32 0 0 45 42 0 0 51 45 0 0 32 32 0 0 205 79 0 0 111 38 0 0 32 0 0 0 0 0 32 0 0 1.31811 0.545 -4.12048 -1.31811 0 0 13748.8 859.301 0.01 0.04 0.18 -1 -1 0.01 0.00722654 0.00593545
k6_frac_N10_mem32K_40nm.xml multiclock_reader_writer.v common 12.96 vpr 261.56 MiB 0.15 45980 -1 -1 1 0.06 -1 -1 34932 -1 -1 2 3 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 267836 3 1 25 26 2 8 6 4 4 16 clb auto 106.4 MiB 0.87 17 249.9 MiB 0.03 0 0.571 -8.64803 -0.571 0.557849 0.47 0.000543454 0.000488368 0.00346482 0.00253954 20 19 1 107788 107788 10441.3 652.579 0.67 0.0113116 0.00855232 742 1670 -1 27 1 6 6 63 36 0 0 63 36 6 6 0 0 9 6 0 0 9 9 0 0 6 6 0 0 18 3 0 0 15 6 0 0 6 0 0 0 0 0 6 0 0 0.865 0.557849 -8.82275 -0.865 0 0 13748.8 859.301 0.01 0.04 0.17 -1 -1 0.01 0.00501901 0.00409753
k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 11.88 vpr 254.22 MiB 0.15 35980 -1 -1 1 0 -1 -1 32420 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260320 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000504445 0.000240584 0.00477542 0.00228264 20 15 1 107788 107788 10441.3 652.579 0.64 0.00804976 0.00416003 742 1670 -1 13 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00215701 0.00121245
k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 11.88 vpr 254.22 MiB 0.15 35980 -1 -1 1 0 -1 -1 32420 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260320 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000504445 0.000240584 0.00477542 0.00228264 20 27 1 107788 107788 10441.3 652.579 0.64 0.00804976 0.00416003 742 1670 -1 13 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00215701 0.00121245
Loading