-
Notifications
You must be signed in to change notification settings - Fork 414
Changing subtile selection in the try_centroid_placement of initial_placement #2897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
e0522b0
8cc499e
489698f
d7d6fc9
0dba701
463dd1c
6664fcb
05e2917
03b820a
8765153
b308dac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,6 +147,26 @@ static bool is_loc_legal(const t_pl_loc& loc, | |
const PartitionRegion& pr, | ||
t_logical_block_type_ptr block_type); | ||
|
||
/** | ||
* @brief Helper function to choose a subtile in specified location if compatible and available one exits. | ||
* | ||
* @param centroid The centroid location at which the subtile will be selected using its x,y, and layer. | ||
* @param block_type Logical block type of the macro head member. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Logical block type we would like to place here |
||
* @param block_loc_registry Placement block location information. To be filled with the location | ||
* where pl_macro is placed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should remove the "To be filled with the location where pl_macro is placed." |
||
* @param pr The PartitionRegion of the macro head member - represents its floorplanning constraints, is the size of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd just say: |
||
* the whole chip if the macro is not constrained. | ||
* @param rng A random number generator to select subtile from available and compatible ones. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (grammatical nit): a subtile from the available and compatible ones |
||
* | ||
* @return False if location on chip, legal, but no available subtile found. True otherwise. False leads us to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the location is on the chip and legal but no available subtile is found at that location. I'd delete the "False leads us to neighbour placement currently" |
||
* neighbour placement currently. | ||
*/ | ||
static bool find_subtile_in_location(t_pl_loc& centroid, | ||
t_logical_block_type_ptr block_type, | ||
const BlkLocRegistry& blk_loc_registry, | ||
const PartitionRegion& pr, | ||
vtr::RngContainer& rng); | ||
|
||
/** | ||
* @brief Calculates a centroid location for a block based on its placed connections. | ||
* | ||
|
@@ -340,6 +360,42 @@ static bool is_loc_legal(const t_pl_loc& loc, | |
return legal; | ||
} | ||
|
||
bool find_subtile_in_location(t_pl_loc& centroid, | ||
t_logical_block_type_ptr block_type, | ||
const BlkLocRegistry& blk_loc_registry, | ||
const PartitionRegion& pr, | ||
vtr::RngContainer& rng) { | ||
//check if the location is on chip and legal, if yes try to update subtile | ||
if (is_loc_on_chip({centroid.x, centroid.y, centroid.layer}) && is_loc_legal(centroid, pr, block_type)) { | ||
//finding the subtile location | ||
const auto& device_ctx = g_vpr_ctx.device(); | ||
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; | ||
const auto& type = device_ctx.grid.get_physical_type({centroid.x, centroid.y, centroid.layer}); | ||
const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index); | ||
|
||
//filter out occupied subtiles | ||
const GridBlock& grid_blocks = blk_loc_registry.grid_blocks(); | ||
std::vector<int> available_sub_tiles; | ||
AlexandreSinger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
available_sub_tiles.reserve(compatible_sub_tiles.size()); | ||
for (int sub_tile : compatible_sub_tiles) { | ||
t_pl_loc pos = {centroid.x, centroid.y, sub_tile, centroid.layer}; | ||
if (!grid_blocks.block_at_location(pos)) { | ||
available_sub_tiles.push_back(sub_tile); | ||
} | ||
} | ||
|
||
//If there is at least one available subtile, update the centroid. Otherwise, sincel location | ||
//is legal and on chip but no subtile found, return false for trying neighbour placement. | ||
if (!available_sub_tiles.empty()) { | ||
centroid.sub_tile = available_sub_tiles[rng.irand((int)available_sub_tiles.size() - 1)]; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A bit strange -- we return true if we find a subtile, or if we don't even try. Could we return true if we found a legal subtile and false otherwise (and change the calling code). It's a more clear interface. |
||
} | ||
|
||
static bool find_centroid_neighbor(t_pl_loc& centroid_loc, | ||
t_logical_block_type_ptr block_type, | ||
bool search_for_empty, | ||
|
@@ -551,10 +607,15 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, | |
t_pl_loc centroid_loc(OPEN, OPEN, OPEN, OPEN); | ||
std::vector<ClusterBlockId> unplaced_blocks_to_update_their_score; | ||
|
||
bool try_neighbour_due_to_subtile = false; | ||
|
||
if (!flat_placement_info.valid) { | ||
// If a flat placement is not provided, use the centroid of connected | ||
// blocks which have already been placed. | ||
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry); | ||
if(!find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng)) { | ||
try_neighbour_due_to_subtile = true; | ||
} | ||
} else { | ||
// If a flat placement is provided, use the flat placement to get the | ||
// centroid. | ||
|
@@ -567,6 +628,9 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, | |
if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer}) || | ||
!is_loc_legal(centroid_loc, pr, block_type)) { | ||
unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc, blk_loc_registry); | ||
if(!find_subtile_in_location(centroid_loc, block_type, blk_loc_registry, pr, rng)) { | ||
try_neighbour_due_to_subtile = true; | ||
} | ||
} | ||
} | ||
|
||
AlexandreSinger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -577,9 +641,8 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, | |
|
||
//centroid suggestion was either occupied or does not match block type | ||
//try to find a near location that meet these requirements | ||
bool neighbor_legal_loc = false; | ||
if (!is_loc_legal(centroid_loc, pr, block_type)) { | ||
neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false, blk_loc_registry, rng); | ||
if (!is_loc_legal(centroid_loc, pr, block_type) || try_neighbour_due_to_subtile) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can simplify this to if (!found_legal_subtile) // pick whatever variable name makes sense It makes that routine more clear, and the logic here more straightforward. |
||
bool neighbor_legal_loc = find_centroid_neighbor(centroid_loc, block_type, false, blk_loc_registry, rng); | ||
if (!neighbor_legal_loc) { //no neighbor candidate found | ||
return false; | ||
} | ||
|
@@ -591,15 +654,6 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro, | |
} | ||
|
||
auto& device_ctx = g_vpr_ctx.device(); | ||
//choose the location's subtile if the centroid location is legal. | ||
//if the location is found within the "find_centroid_neighbor", it already has a subtile | ||
//we don't need to find one again | ||
if (!neighbor_legal_loc) { | ||
AlexandreSinger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; | ||
const auto& type = device_ctx.grid.get_physical_type({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); | ||
const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index); | ||
centroid_loc.sub_tile = compatible_sub_tiles[rng.irand((int)compatible_sub_tiles.size() - 1)]; | ||
} | ||
int width_offset = device_ctx.grid.get_width_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); | ||
int height_offset = device_ctx.grid.get_height_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); | ||
VTR_ASSERT(width_offset == 0); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time | ||
k6_frac_N10_mem32K_40nm.xml multiclock_output_and_latch.v common 11.99 vpr 255.45 MiB 0.11 36912 -1 -1 1 0.05 -1 -1 34700 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 261584 6 1 13 14 2 8 9 4 4 16 clb auto 101.0 MiB 0.11 13 244.4 MiB 0.04 0 0.875884 -3.21653 -0.875884 0.545 0.47 0.000264546 0.000241337 0.00754986 0.00454282 20 15 7 107788 107788 10441.3 652.579 0.66 0.0136677 0.00891098 742 1670 -1 15 14 32 32 476 268 0 0 476 268 32 32 0 0 45 42 0 0 51 45 0 0 32 32 0 0 205 79 0 0 111 38 0 0 32 0 0 0 0 0 32 0 0 1.31811 0.545 -4.12048 -1.31811 0 0 13748.8 859.301 0.01 0.04 0.18 -1 -1 0.01 0.00722654 0.00593545 | ||
k6_frac_N10_mem32K_40nm.xml multiclock_reader_writer.v common 12.96 vpr 261.56 MiB 0.15 45980 -1 -1 1 0.06 -1 -1 34932 -1 -1 2 3 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 267836 3 1 25 26 2 8 6 4 4 16 clb auto 106.4 MiB 0.87 17 249.9 MiB 0.03 0 0.571 -8.64803 -0.571 0.557849 0.47 0.000543454 0.000488368 0.00346482 0.00253954 20 19 1 107788 107788 10441.3 652.579 0.67 0.0113116 0.00855232 742 1670 -1 27 1 6 6 63 36 0 0 63 36 6 6 0 0 9 6 0 0 9 9 0 0 6 6 0 0 18 3 0 0 15 6 0 0 6 0 0 0 0 0 6 0 0 0.865 0.557849 -8.82275 -0.865 0 0 13748.8 859.301 0.01 0.04 0.17 -1 -1 0.01 0.00501901 0.00409753 | ||
k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 11.88 vpr 254.22 MiB 0.15 35980 -1 -1 1 0 -1 -1 32420 -1 -1 2 6 0 0 success v8.0.0-7653-g7c8f300-dirty release VTR_ASSERT_LEVEL=3 sanitizers GNU 9.4.0 on Linux-4.13.1-041301-generic x86_64 2023-04-21 14:13:39 agent-1 /home/mahmo494/RL_experiment/vtr-verilog-to-routing/vtr_flow/tasks 260320 6 2 10 12 2 8 10 4 4 16 clb auto 100.4 MiB 0.06 12 243.6 MiB 0.03 0 0.544641 -1.83465 -0.544641 nan 0.47 0.000504445 0.000240584 0.00477542 0.00228264 20 15 1 107788 107788 10441.3 652.579 0.64 0.00804976 0.00416003 742 1670 -1 13 1 6 6 148 96 0 0 148 96 6 6 0 0 18 16 0 0 18 18 0 0 6 6 0 0 53 27 0 0 47 23 0 0 6 0 0 0 0 0 6 0 0 0.81248 nan -2.54321 -0.81248 0 0 13748.8 859.301 0.01 0.02 0.18 -1 -1 0.01 0.00215701 0.00121245 | ||
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time | ||
k6_frac_N10_mem32K_40nm.xml multiclock_output_and_latch.v common 0.52 vpr 66.02 MiB 0.02 7040 -1 -1 1 0.06 -1 -1 35596 -1 -1 2 6 0 0 success v8.0.0-12164-g463dd1c36-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-20T15:42:53 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 67604 6 1 13 14 2 8 9 4 4 16 clb auto 27.5 MiB 0.01 18 27 14 11 2 66.0 MiB 0.00 0.00 1.02737 -3.59667 -1.02737 0.545 0.01 5.3292e-05 4.5019e-05 0.000306591 0.000256921 -1 -1 -1 -1 20 15 7 107788 107788 10441.3 652.579 0.01 0.00211307 0.00189921 742 1670 -1 17 14 34 34 409 236 1.40641 0.545 -4.27839 -1.40641 0 0 13748.8 859.301 0.00 0.00 0.00 -1 -1 0.00 0.00189972 0.00171949 | ||
k6_frac_N10_mem32K_40nm.xml multiclock_reader_writer.v common 0.61 vpr 65.82 MiB 0.02 6912 -1 -1 1 0.06 -1 -1 35588 -1 -1 2 3 0 0 success v8.0.0-12164-g463dd1c36-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-20T15:42:53 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 67396 3 1 25 26 2 8 6 4 4 16 clb auto 27.5 MiB 0.02 20 15 4 1 10 65.8 MiB 0.00 0.00 0.620042 -8.9502 -0.620042 0.557849 0.01 8.2682e-05 6.9909e-05 0.000633195 0.000565801 -1 -1 -1 -1 20 22 1 107788 107788 10441.3 652.579 0.01 0.00358735 0.00333372 742 1670 -1 27 6 18 18 703 470 0.865467 0.557849 -9.14332 -0.865467 0 0 13748.8 859.301 0.00 0.00 0.00 -1 -1 0.00 0.00327369 0.00304055 | ||
k6_frac_N10_mem32K_40nm.xml multiclock_separate_and_latch.v common 0.48 vpr 66.14 MiB 0.02 6912 -1 -1 1 0.02 -1 -1 33640 -1 -1 2 6 0 0 success v8.0.0-12164-g463dd1c36-dirty Release VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-6.8.0-51-generic x86_64 2025-02-20T15:42:53 haydar-Precision-5820-Tower /home/haydar/vtr-verilog-to-routing 67724 6 2 10 12 2 8 10 4 4 16 clb auto 27.8 MiB 0.00 19 30 14 9 7 66.1 MiB 0.00 0.00 0.620297 -2.13808 -0.620297 nan 0.01 4.1509e-05 3.0578e-05 0.000200894 0.000157769 -1 -1 -1 -1 20 27 14 107788 107788 10441.3 652.579 0.01 0.00213275 0.00183214 742 1670 -1 19 15 33 33 727 463 0.716884 nan -2.60018 -0.716884 0 0 13748.8 859.301 0.00 0.00 0.00 -1 -1 0.00 0.00208972 0.00189128 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the type is compatible and an available one exists.