Skip to content

Commit 6bede42

Browse files
Separate NoC cost computation and normalization
1 parent 4896984 commit 6bede42

File tree

6 files changed

+207
-174
lines changed

6 files changed

+207
-174
lines changed

vpr/src/place/initial_noc_placement.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,11 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
158158
t_placer_costs costs;
159159

160160
// Initialize NoC-related costs
161-
costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
162-
costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
163-
costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
161+
costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
162+
std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
163+
costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
164164
update_noc_normalization_factors(costs);
165-
costs.cost = calculate_noc_cost(NocCostTerms(costs), costs, noc_opts);
165+
costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
166166

167167
// Maximum distance in each direction that a router can travel in a move
168168
// It is assumed that NoC routers are organized in a square grid.
@@ -214,9 +214,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
214214
if (create_move_outcome != e_create_move::ABORT) {
215215
apply_move_blocks(blocks_affected);
216216

217-
NocCostTerms noc_delta_c {0.0, 0.0, 0.0};
218-
find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c, noc_opts);
219-
double delta_cost = calculate_noc_cost(noc_delta_c, costs, noc_opts);
217+
NocCostTerms noc_delta_c;
218+
find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
219+
double delta_cost = calculate_noc_cost(noc_delta_c, costs.noc_cost_norm_factors, noc_opts);
220220

221221
double prob = starting_prob - i_move * prob_step;
222222
bool move_accepted = accept_noc_swap(delta_cost, prob);

vpr/src/place/noc_place_utils.cpp

Lines changed: 82 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,16 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
7979
initial_noc_routing();
8080

8181
// Initialize traffic_flow_costs
82-
costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
83-
costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
84-
costs.noc_congestion_cost = comp_noc_congestion_cost(noc_opts);
82+
costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
83+
std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
84+
costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
8585
}
8686

8787
void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
88-
NocCostTerms& delta_c,
89-
const t_noc_opts& noc_opts) {
88+
NocCostTerms& delta_c) {
9089
VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
9190
VTR_ASSERT_SAFE(delta_c.latency == 0.);
91+
VTR_ASSERT(delta_c.latency_overrun == 0.);
9292
VTR_ASSERT_SAFE(delta_c.congestion == 0.);
9393
auto& noc_ctx = g_vpr_ctx.mutable_noc();
9494

@@ -122,11 +122,13 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
122122

123123
// calculate the new aggregate bandwidth and latency costs for the affected traffic flow
124124
proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
125-
proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
125+
std::tie(proposed_traffic_flow_costs[traffic_flow_id].latency,
126+
proposed_traffic_flow_costs[traffic_flow_id].latency_overrun) = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
126127

127128
// compute how much the aggregate bandwidth and latency costs change with this swap
128129
delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
129130
delta_c.latency += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
131+
delta_c.latency_overrun += proposed_traffic_flow_costs[traffic_flow_id].latency_overrun - traffic_flow_costs[traffic_flow_id].latency_overrun;
130132
}
131133

132134
// Iterate over all affected links and calculate their new congestion cost and store it
@@ -135,7 +137,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
135137
const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
136138

137139
// calculate the new congestion cost for the link and store it
138-
proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link, noc_opts);
140+
proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link);
139141

140142
// compute how much the congestion cost changes with this swap
141143
delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link];
@@ -154,6 +156,7 @@ void commit_noc_costs() {
154156
// reset the proposed traffic flows costs
155157
proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = INVALID_NOC_COST_TERM;
156158
proposed_traffic_flow_costs[traffic_flow_id].latency = INVALID_NOC_COST_TERM;
159+
proposed_traffic_flow_costs[traffic_flow_id].latency_overrun = INVALID_NOC_COST_TERM;
157160
}
158161

159162
// Iterate over all the NoC links whose bandwidth utilization was affected by the proposed move
@@ -321,12 +324,13 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
321324
auto& noc_ctx = g_vpr_ctx.noc();
322325

323326
// reset the cost variables first
324-
new_cost = NocCostTerms{0.0, 0.0, 0.0};
327+
new_cost = NocCostTerms{0.0, 0.0, 0.0, 0.0};
325328

326329
// go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC
327330
for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
328331
new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
329332
new_cost.latency += traffic_flow_costs[traffic_flow_id].latency;
333+
new_cost.latency_overrun += traffic_flow_costs[traffic_flow_id].latency_overrun;
330334
}
331335

332336
// Iterate over all NoC links and accumulate their congestion costs
@@ -339,14 +343,23 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
339343

340344
void update_noc_normalization_factors(t_placer_costs& costs) {
341345
//Prevent the norm factors from going to infinity
342-
costs.noc_aggregate_bandwidth_cost_norm = std::min(1 / costs.noc_aggregate_bandwidth_cost, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
343-
costs.noc_latency_cost_norm = std::min(1 / costs.noc_latency_cost, MAX_INV_NOC_LATENCY_COST);
346+
costs.noc_cost_norm_factors.aggregate_bandwidth = std::min(1 / costs.noc_cost_terms.aggregate_bandwidth, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
347+
costs.noc_cost_norm_factors.latency = std::min(1 / costs.noc_cost_terms.latency, MAX_INV_NOC_LATENCY_COST);
344348

345-
// to avoid division by zero
346-
if (costs.noc_congestion_cost > 0.0) {
347-
costs.noc_congestion_cost_norm = std::min(1 / costs.noc_congestion_cost, MAX_INV_NOC_CONGESTION_COST);
349+
// to avoid division by zero and negative numbers
350+
// latency overrun cost may take very small negative values due to round-off error
351+
if (costs.noc_cost_terms.latency_overrun > 0.0) {
352+
costs.noc_cost_norm_factors.latency_overrun = std::min(1 / costs.noc_cost_terms.latency_overrun, MAX_INV_NOC_LATENCY_COST);
348353
} else {
349-
costs.noc_congestion_cost_norm = MAX_INV_NOC_CONGESTION_COST;
354+
costs.noc_cost_norm_factors.latency_overrun = MAX_INV_NOC_LATENCY_COST;
355+
}
356+
357+
// to avoid division by zero and negative numbers
358+
// congestion cost may take very small negative values due to round-off error
359+
if (costs.noc_cost_terms.congestion > 0.0) {
360+
costs.noc_cost_norm_factors.congestion = std::min(1 / costs.noc_cost_terms.congestion, MAX_INV_NOC_CONGESTION_COST);
361+
} else {
362+
costs.noc_cost_norm_factors.congestion = MAX_INV_NOC_CONGESTION_COST;
350363
}
351364

352365
return;
@@ -378,41 +391,43 @@ double comp_noc_aggregate_bandwidth_cost(void) {
378391
return noc_aggregate_bandwidth_cost;
379392
}
380393

381-
double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
394+
std::pair<double, double> comp_noc_latency_cost() {
382395
// used to get traffic flow route information
383396
auto& noc_ctx = g_vpr_ctx.noc();
384397
// datastructure that stores all the traffic flow routes
385398
const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
386399

387-
double noc_latency_cost = 0.;
400+
std::pair<double, double> noc_latency_cost_terms{0.0, 0.0};
388401

389402
// now go through each traffic flow route and calculate its
390403
// latency. Then store this in local data structures and accumulate it.
391404
for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
392405
const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
393406
const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
394407

395-
double curr_traffic_flow_latency_cost = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
408+
auto [curr_traffic_flow_latency, curr_traffic_flow_latency_overrun] = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
396409

397-
// store the calculated latency for the current traffic flow in local datastructures (this also initializes them)
398-
traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency_cost;
410+
// store the calculated latency cost terms for the current traffic flow in local datastructures (this also initializes them)
411+
traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency;
412+
traffic_flow_costs[traffic_flow_id].latency_overrun = curr_traffic_flow_latency_overrun;
399413

400-
// accumulate the latency cost
401-
noc_latency_cost += curr_traffic_flow_latency_cost;
414+
// accumulate the latency cost terms
415+
noc_latency_cost_terms.first += curr_traffic_flow_latency;
416+
noc_latency_cost_terms.second += curr_traffic_flow_latency_overrun;
402417
}
403418

404-
return noc_latency_cost;
419+
return noc_latency_cost_terms;
405420
}
406421

407-
double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
422+
double comp_noc_congestion_cost() {
408423
// Used to access NoC links
409424
auto& noc_ctx = g_vpr_ctx.noc();
410425

411426
double congestion_cost = 0.;
412427

413428
// Iterate over all NoC links
414429
for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
415-
double link_congestion_cost = calculate_link_congestion_cost(link, noc_opts);
430+
double link_congestion_cost = calculate_link_congestion_cost(link);
416431

417432
// store the congestion cost for this link in static data structures (this also initializes them)
418433
link_congestion_costs[link] = link_congestion_cost;
@@ -426,7 +441,7 @@ double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
426441

427442
int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
428443
int error = 0;
429-
NocCostTerms cost_check{0.0, 0.0, 0.0};
444+
NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0};
430445

431446
// get current router block locations
432447
auto& place_ctx = g_vpr_ctx.placement();
@@ -468,8 +483,9 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
468483
double current_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(temp_found_noc_route, curr_traffic_flow);
469484
cost_check.aggregate_bandwidth += current_flow_aggregate_bandwidth_cost;
470485

471-
double current_flow_latency_cost = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow, noc_opts);
472-
cost_check.latency += current_flow_latency_cost;
486+
auto [curr_traffic_flow_latency_cost, curr_traffic_flow_latency_overrun_cost] = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow);
487+
cost_check.latency += curr_traffic_flow_latency_cost;
488+
cost_check.latency_overrun += curr_traffic_flow_latency_overrun_cost;
473489

474490
// increase bandwidth utilization for the links that constitute the current flow's route
475491
for (auto& link_id : temp_found_noc_route) {
@@ -485,35 +501,46 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
485501

486502
// Iterate over all NoC links and accumulate congestion cost
487503
for(const auto& link : temp_noc_link_storage) {
488-
cost_check.congestion += calculate_link_congestion_cost(link, noc_opts);
504+
cost_check.congestion += calculate_link_congestion_cost(link);
489505
}
490506

491507
// check whether the aggregate bandwidth placement cost is within the error tolerance
492-
if (fabs(cost_check.aggregate_bandwidth - costs.noc_aggregate_bandwidth_cost) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
508+
if (fabs(cost_check.aggregate_bandwidth - costs.noc_cost_terms.aggregate_bandwidth) > costs.noc_cost_terms.aggregate_bandwidth * error_tolerance) {
493509
VTR_LOG_ERROR(
494510
"noc_aggregate_bandwidth_cost_check: %g and noc_aggregate_bandwidth_cost: %g differ in check_noc_placement_costs.\n",
495-
cost_check.aggregate_bandwidth, costs.noc_aggregate_bandwidth_cost);
511+
cost_check.aggregate_bandwidth, costs.noc_cost_terms.aggregate_bandwidth);
496512
error++;
497513
}
498514

499515
// only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
500516
if (cost_check.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
501517
// check whether the latency placement cost is within the error tolerance
502-
if (fabs(cost_check.latency - costs.noc_latency_cost) > costs.noc_latency_cost * error_tolerance) {
518+
if (fabs(cost_check.latency - costs.noc_cost_terms.latency) > costs.noc_cost_terms.latency * error_tolerance) {
503519
VTR_LOG_ERROR(
504520
"noc_latency_cost_check: %g and noc_latency_cost: %g differ in check_noc_placement_costs.\n",
505-
cost_check.latency, costs.noc_latency_cost);
521+
cost_check.latency, costs.noc_cost_terms.latency);
522+
error++;
523+
}
524+
}
525+
526+
// only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
527+
if (cost_check.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
528+
// check whether the latency overrun placement cost is within the error tolerance
529+
if (fabs(cost_check.latency_overrun - costs.noc_cost_terms.latency_overrun) > costs.noc_cost_terms.latency_overrun * error_tolerance) {
530+
VTR_LOG_ERROR(
531+
"noc_latency_overrun_cost_check: %g and noc_latency_overrun_cost: %g differ in check_noc_placement_costs.\n",
532+
cost_check.latency_overrun, costs.noc_cost_terms.latency_overrun);
506533
error++;
507534
}
508535
}
509536

510537
// check the recomputed congestion cost only if it is higher than the minimum expected value
511538
if (cost_check.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
512539
// check whether the NoC congestion cost is within the error range
513-
if (fabs(cost_check.congestion - costs.noc_congestion_cost) > costs.noc_congestion_cost * error_tolerance) {
540+
if (fabs(cost_check.congestion - costs.noc_cost_terms.congestion) > costs.noc_cost_terms.congestion * error_tolerance) {
514541
VTR_LOG_ERROR(
515542
"noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
516-
cost_check.congestion, costs.noc_congestion_cost);
543+
cost_check.congestion, costs.noc_cost_terms.congestion);
517544
error++;
518545
}
519546
}
@@ -528,7 +555,9 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
528555
return (traffic_flow_info.traffic_flow_priority * traffic_flow_info.traffic_flow_bandwidth * num_of_links_in_traffic_flow);
529556
}
530557

531-
double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts) {
558+
std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
559+
const NocStorage& noc_model,
560+
const t_noc_traffic_flow& traffic_flow_info) {
532561
// there will always be one more router than links in a traffic flow
533562
int num_of_links_in_traffic_flow = traffic_flow_route.size();
534563
int num_of_routers_in_traffic_flow = num_of_links_in_traffic_flow + 1;
@@ -538,26 +567,28 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
538567
double noc_link_latency = noc_model.get_noc_link_latency();
539568
double noc_router_latency = noc_model.get_noc_router_latency();
540569

541-
// calculate the traffic flow_latency
570+
// calculate the traffic flow latency
542571
double latency = (noc_link_latency * num_of_links_in_traffic_flow) + (noc_router_latency * num_of_routers_in_traffic_flow);
543572

544-
// calculate the cost
545-
double single_traffic_flow_latency_cost = (noc_opts.noc_latency_constraints_weighting * std::max(0., latency - max_latency)) + (noc_opts.noc_latency_weighting * latency);
573+
// calculate the traffic flow latency overrun
574+
double latency_overrun = std::max(latency - max_latency, 0.);
546575

547576
// scale the latency cost by its priority to indicate its importance
548-
return (single_traffic_flow_latency_cost * traffic_flow_info.traffic_flow_priority);
549-
}
577+
latency *= traffic_flow_info.traffic_flow_priority;
578+
latency_overrun *= traffic_flow_info.traffic_flow_priority;
550579

551-
double calculate_link_congestion_cost(const NocLink& link, const t_noc_opts& noc_opts) {
552-
double congested_bw_ratio, congestion_cost;
580+
return {latency, latency_overrun};
581+
}
553582

554-
congested_bw_ratio = link.get_congested_bandwidth_ratio();
555-
congestion_cost = noc_opts.noc_congestion_weighting * congested_bw_ratio;
583+
double calculate_link_congestion_cost(const NocLink& link) {
584+
double congested_bw_ratio = link.get_congested_bandwidth_ratio();
556585

557-
return congestion_cost;
586+
return congested_bw_ratio;
558587
}
559588

560-
double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts) {
589+
double calculate_noc_cost(const NocCostTerms& cost_terms,
590+
const NocCostTerms& norm_factors,
591+
const t_noc_opts& noc_opts) {
561592
double cost = 0.0;
562593

563594
/* NoC's contribution to the placement cost is a weighted sum over:
@@ -566,9 +597,10 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs&
566597
* 3) Link congestion costs
567598
*/
568599
cost = noc_opts.noc_placement_weighting * (
569-
cost_terms.latency * norm_factors.noc_latency_cost_norm +
570-
cost_terms.aggregate_bandwidth * norm_factors.noc_aggregate_bandwidth_cost_norm +
571-
cost_terms.congestion * norm_factors.noc_congestion_cost_norm);
600+
cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth +
601+
cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_constraints_weighting +
602+
cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting +
603+
cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting);
572604

573605
return cost;
574606
}
@@ -692,7 +724,7 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) {
692724
* we now only swap router blocks for the percentage of time the user
693725
* supplied.
694726
* */
695-
return (vtr::irand(99) < user_supplied_noc_router_swap_percentage) ? true : false;
727+
return (vtr::irand(99) < user_supplied_noc_router_swap_percentage);
696728
}
697729

698730
static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type) {

0 commit comments

Comments
 (0)