-
Notifications
You must be signed in to change notification settings - Fork 415
Chan z prefix sum #2781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Chan z prefix sum #2781
Changes from all commits
400b5a1
4018188
0432740
11ee10f
8bead96
bea400e
01b9cd6
af5659d
736d826
1a3e56d
54fe8d7
df4159b
21c62d8
da92578
052d6b9
e15a796
f2939b1
e213fa1
a0b2c06
6ea8921
596ddaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,6 +112,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, | |
, placer_opts_(placer_opts) { | ||
const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); | ||
|
||
is_multi_layer_ = num_layers > 1; | ||
|
||
// Either 3D BB or per layer BB data structure are used, not both. | ||
if (cube_bb_) { | ||
ts_bb_edge_new_.resize(num_nets, t_bb()); | ||
|
@@ -145,10 +147,11 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, | |
* been recomputed. */ | ||
bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET); | ||
|
||
alloc_and_load_chan_w_factors_for_place_cost_(placer_opts_.place_cost_exp); | ||
alloc_and_load_chan_w_factors_for_place_cost_(); | ||
} | ||
|
||
void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_cost_exp) { | ||
void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { | ||
const double place_cost_exp = static_cast<double>(placer_opts_.place_cost_exp); | ||
auto& device_ctx = g_vpr_ctx.device(); | ||
|
||
const int grid_height = device_ctx.grid.height(); | ||
|
@@ -190,7 +193,7 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c | |
} | ||
|
||
chanx_place_cost_fac_[high][low] = (high - low + 1.) / chanx_place_cost_fac_[high][low]; | ||
chanx_place_cost_fac_[high][low] = pow((double)chanx_place_cost_fac_[high][low], (double)place_cost_exp); | ||
chanx_place_cost_fac_[high][low] = pow((double)chanx_place_cost_fac_[high][low], place_cost_exp); | ||
} | ||
} | ||
|
||
|
@@ -220,71 +223,87 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c | |
} | ||
|
||
chany_place_cost_fac_[high][low] = (high - low + 1.) / chany_place_cost_fac_[high][low]; | ||
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp); | ||
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], place_cost_exp); | ||
} | ||
} | ||
|
||
if (device_ctx.grid.get_num_layers() > 1) { | ||
alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp); | ||
if (is_multi_layer_) { | ||
alloc_and_load_for_fast_vertical_cost_update_(); | ||
} | ||
} | ||
|
||
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) { | ||
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() { | ||
const auto& device_ctx = g_vpr_ctx.device(); | ||
const auto& rr_graph = device_ctx.rr_graph; | ||
|
||
const size_t grid_height = device_ctx.grid.height(); | ||
const size_t grid_width = device_ctx.grid.width(); | ||
|
||
|
||
chanz_place_cost_fac_ = vtr::NdMatrix<float, 4>({grid_width, grid_height, grid_width, grid_height}, 0.); | ||
acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int, 2>({grid_width, grid_height}, 0.); | ||
|
||
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.); | ||
|
||
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.); | ||
/* | ||
* Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location | ||
* in the device. We count all these edges, regardless of which layers they connect. Then we divide by | ||
* the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors | ||
* what we do for the horizontal and vertical channels where we assume the channel width doesn't change | ||
* along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited | ||
* if someday we have architectures with widely varying connectivity between different layers in a stack. | ||
*/ | ||
|
||
/* | ||
* To calculate the accumulative number of inter-die connections we first need to get the number of | ||
* inter-die connection per location. To be able to work for the cases that RR Graph is read instead | ||
* of being made from the architecture file, we calculate this number by iterating over the RR graph. Once | ||
* tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First, | ||
* we populate the first row and column. Then, we iterate over the rest of blocks and get the number of | ||
* inter-die connections by adding up the number of inter-die block at that location + the accumulation | ||
* for the block below and left to it. Then, since the accumulated number of inter-die connection to | ||
* the block on the lower left connection of the block is added twice, that part needs to be removed. | ||
*/ | ||
for (const auto& src_rr_node : rr_graph.nodes()) { | ||
for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) { | ||
for (const auto& rr_edge_idx : rr_graph.edges(src_rr_node)) { | ||
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx); | ||
amin1377 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) { | ||
// We assume that the nodes driving the inter-layer connection or being driven by it | ||
// are not streched across multiple tiles | ||
// are not stretched across multiple tiles | ||
int src_x = rr_graph.node_xhigh(src_rr_node); | ||
int src_y = rr_graph.node_yhigh(src_rr_node); | ||
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y); | ||
|
||
tile_num_inter_die_conn[src_x][src_y]++; | ||
} | ||
} | ||
} | ||
|
||
for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) { | ||
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx); | ||
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) { | ||
int src_x = rr_graph.node_xhigh(src_rr_node); | ||
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x); | ||
int src_y = rr_graph.node_yhigh(src_rr_node); | ||
VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y); | ||
tile_num_inter_die_conn[src_x][src_y]++; | ||
} | ||
int num_layers = device_ctx.grid.get_num_layers(); | ||
for (size_t x = 0; x < device_ctx.grid.width(); x++) { | ||
for (size_t y = 0; y < device_ctx.grid.height(); y++) { | ||
tile_num_inter_die_conn[x][y] /= (num_layers-1); | ||
} | ||
amin1377 marked this conversation as resolved.
Show resolved
Hide resolved
amin1377 marked this conversation as resolved.
Show resolved
Hide resolved
amin1377 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) { | ||
for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) { | ||
for (int x_low = 0; x_low <= x_high; x_low++) { | ||
for (int y_low = 0; y_low <= y_high; y_low++) { | ||
int num_inter_die_conn = 0; | ||
for (int x = x_low; x <= x_high; x++) { | ||
for (int y = y_low; y <= y_high; y++) { | ||
num_inter_die_conn += tile_num_inter_die_conn[x][y]; | ||
} | ||
} | ||
int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1); | ||
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast<float>(num_inter_die_conn); | ||
|
||
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow( | ||
(double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low], | ||
(double)place_cost_exp); | ||
} | ||
} | ||
// Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y). | ||
acc_tile_num_inter_die_conn_[0][0] = tile_num_inter_die_conn[0][0]; | ||
amin1377 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Initialize the first row and column | ||
for (size_t x = 1; x < device_ctx.grid.width(); x++) { | ||
acc_tile_num_inter_die_conn_[x][0] = acc_tile_num_inter_die_conn_[x-1][0] + | ||
tile_num_inter_die_conn[x][0]; | ||
} | ||
|
||
for (size_t y = 1; y < device_ctx.grid.height(); y++) { | ||
acc_tile_num_inter_die_conn_[0][y] = acc_tile_num_inter_die_conn_[0][y-1] + | ||
tile_num_inter_die_conn[0][y]; | ||
} | ||
|
||
for (size_t x_high = 1; x_high < device_ctx.grid.width(); x_high++) { | ||
for (size_t y_high = 1; y_high < device_ctx.grid.height(); y_high++) { | ||
acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1][y_high] + | ||
acc_tile_num_inter_die_conn_[x_high][y_high-1] + | ||
tile_num_inter_die_conn[x_high][y_high] - | ||
acc_tile_num_inter_die_conn_[x_high-1][y_high-1]; | ||
} | ||
} | ||
} | ||
|
@@ -818,7 +837,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, | |
} | ||
|
||
/* Now account for the layer motion. */ | ||
if (num_layers > 1) { | ||
if (is_multi_layer_) { | ||
/* We need to update it only if multiple layers are available */ | ||
for (int layer_num = 0; layer_num < num_layers; layer_num++) { | ||
num_sink_pin_layer_new[layer_num] = curr_num_sink_pin_layer[layer_num]; | ||
|
@@ -1402,8 +1421,6 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { | |
|
||
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id]; | ||
|
||
const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1); | ||
|
||
double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size()); | ||
|
||
/* Could insert a check for xmin == xmax. In that case, assume * | ||
|
@@ -1420,12 +1437,14 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { | |
*/ | ||
|
||
double ncost; | ||
ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1]; | ||
ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1]; | ||
if (is_multi_layer) { | ||
ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin]; | ||
ncost = (bb.xmax - bb.xmin + 1) * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1]; | ||
ncost += (bb.ymax - bb.ymin + 1) * chany_place_cost_fac_[bb.xmax][bb.xmin - 1]; | ||
if (is_multi_layer_) { | ||
ncost += (bb.layer_max - bb.layer_min) * get_chanz_cost_factor_(bb); | ||
} | ||
|
||
ncost *= crossing; | ||
|
||
return ncost; | ||
} | ||
|
||
|
@@ -1526,6 +1545,39 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) { | |
return ncost; | ||
} | ||
|
||
float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) { | ||
float place_cost_exp = placer_opts_.place_cost_exp; | ||
|
||
int num_inter_dir_conn; | ||
|
||
if (bb.xmin == 0 && bb.ymin == 0) { | ||
num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax]; | ||
} else if (bb.xmin == 0) { | ||
num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] - | ||
acc_tile_num_inter_die_conn_[bb.xmax][bb.ymin-1]; | ||
} else if (bb.ymin == 0) { | ||
num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] - | ||
acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymax]; | ||
} else { | ||
num_inter_dir_conn = acc_tile_num_inter_die_conn_[bb.xmax][bb.ymax] - | ||
acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymax] - | ||
acc_tile_num_inter_die_conn_[bb.xmax][bb.ymin-1] + | ||
acc_tile_num_inter_die_conn_[bb.xmin-1][bb.ymin-1]; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that would be cleaner. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @soheilshahrouz, if you remember, my initial attempt was to use this data structure, but I encountered some memory issues with it. I’ll create an issue for this and, in another PR, attempt to replace this data structure with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I figured out what the problem was. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. I'll try it once the PR is merged. |
||
|
||
float z_cost_factor; | ||
if (num_inter_dir_conn == 0) { | ||
return 1.0f; | ||
} else { | ||
int bb_num_tiles = (bb.xmax - bb.xmin + 1) * (bb.ymax - bb.ymin + 1); | ||
z_cost_factor = bb_num_tiles / static_cast<float>(num_inter_dir_conn); | ||
z_cost_factor = pow((double)z_cost_factor, (double)place_cost_exp); | ||
} | ||
|
||
return z_cost_factor; | ||
|
||
} | ||
|
||
double NetCostHandler::recompute_bb_cost_() { | ||
double cost = 0; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time | ||
k6_frac_N10_40nm.xml stereovision3.v common 1.44 vpr 57.96 MiB -1 -1 0.42 25620 5 0.11 -1 -1 36164 -1 -1 7 10 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 59352 10 2 181 183 1 37 19 5 5 25 clb auto 19.7 MiB 0.05 108 58.0 MiB 0.01 0.00 1.93928 -79.1821 -1.93928 1.93928 0.02 0.000104618 8.1277e-05 0.00610425 0.00496002 24 129 10 485046 377258 28445.8 1137.83 0.13 0.0425172 0.0362734 109 8 74 103 1476 611 2.06938 2.06938 -89.2305 -2.06938 0 0 37126.9 1485.07 0.02 0.01 0.00966903 0.00924379 | ||
k6_frac_N10_40nm.xml stereovision3.v common 1.44 vpr 57.96 MiB -1 -1 0.42 25620 5 0.11 -1 -1 36164 -1 -1 7 10 -1 -1 success v8.0.0-6989-g4a9293e1e-dirty release IPO VTR_ASSERT_LEVEL=3 GNU 11.3.0 on Linux-5.15.0-58-generic x86_64 2023-02-04T01:37:29 dev /home/dev/Desktop/CAS-Atlantic/vtr-verilog-to-routing 59352 10 2 181 183 1 37 19 5 5 25 clb auto 19.7 MiB 0.05 108 58.0 MiB 0.01 0.00 1.93928 -79.1821 -1.93928 1.93928 0.02 0.000104618 8.1277e-05 0.00610425 0.00496002 26 129 10 485046 377258 34134.96 1365.396 0.13 0.0425172 0.0362734 109 8 74 103 1476 611 2.06938 2.06938 -89.2305 -2.06938 0 0 37126.9 1485.07 0.02 0.01 0.00966903 0.00924379 |
Uh oh!
There was an error while loading. Please reload this page.