@@ -79,16 +79,16 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
79
79
initial_noc_routing ();
80
80
81
81
// Initialize traffic_flow_costs
82
- costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost ();
83
- costs.noc_latency_cost = comp_noc_latency_cost (noc_opts );
84
- costs.noc_congestion_cost = comp_noc_congestion_cost (noc_opts );
82
+ costs.noc_cost_terms . aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost ();
83
+ std::tie ( costs.noc_cost_terms . latency , costs. noc_cost_terms . latency_overrun ) = comp_noc_latency_cost ();
84
+ costs.noc_cost_terms . congestion = comp_noc_congestion_cost ();
85
85
}
86
86
87
87
void find_affected_noc_routers_and_update_noc_costs (const t_pl_blocks_to_be_moved& blocks_affected,
88
- NocCostTerms& delta_c,
89
- const t_noc_opts& noc_opts) {
88
+ NocCostTerms& delta_c) {
90
89
VTR_ASSERT_SAFE (delta_c.aggregate_bandwidth == 0 .);
91
90
VTR_ASSERT_SAFE (delta_c.latency == 0 .);
91
+ VTR_ASSERT (delta_c.latency_overrun == 0 .);
92
92
VTR_ASSERT_SAFE (delta_c.congestion == 0 .);
93
93
auto & noc_ctx = g_vpr_ctx.mutable_noc ();
94
94
@@ -122,11 +122,13 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
122
122
123
123
// calculate the new aggregate bandwidth and latency costs for the affected traffic flow
124
124
proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost (traffic_flow_route, curr_traffic_flow);
125
- proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost (traffic_flow_route, noc_ctx.noc_model , curr_traffic_flow, noc_opts);
125
+ std::tie (proposed_traffic_flow_costs[traffic_flow_id].latency ,
126
+ proposed_traffic_flow_costs[traffic_flow_id].latency_overrun ) = calculate_traffic_flow_latency_cost (traffic_flow_route, noc_ctx.noc_model , curr_traffic_flow);
126
127
127
128
// compute how much the aggregate bandwidth and latency costs change with this swap
128
129
delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth ;
129
130
delta_c.latency += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency ;
131
+ delta_c.latency_overrun += proposed_traffic_flow_costs[traffic_flow_id].latency_overrun - traffic_flow_costs[traffic_flow_id].latency_overrun ;
130
132
}
131
133
132
134
// Iterate over all affected links and calculate their new congestion cost and store it
@@ -135,7 +137,7 @@ void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_move
135
137
const auto & link = noc_ctx.noc_model .get_single_noc_link (link_id);
136
138
137
139
// calculate the new congestion cost for the link and store it
138
- proposed_link_congestion_costs[link] = calculate_link_congestion_cost (link, noc_opts );
140
+ proposed_link_congestion_costs[link] = calculate_link_congestion_cost (link);
139
141
140
142
// compute how much the congestion cost changes with this swap
141
143
delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link];
@@ -154,6 +156,7 @@ void commit_noc_costs() {
154
156
// reset the proposed traffic flows costs
155
157
proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = INVALID_NOC_COST_TERM;
156
158
proposed_traffic_flow_costs[traffic_flow_id].latency = INVALID_NOC_COST_TERM;
159
+ proposed_traffic_flow_costs[traffic_flow_id].latency_overrun = INVALID_NOC_COST_TERM;
157
160
}
158
161
159
162
// Iterate over all the NoC links whose bandwidth utilization was affected by the proposed move
@@ -321,12 +324,13 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
321
324
auto & noc_ctx = g_vpr_ctx.noc ();
322
325
323
326
// reset the cost variables first
324
- new_cost = NocCostTerms{0.0 , 0.0 , 0.0 };
327
+ new_cost = NocCostTerms{0.0 , 0.0 , 0.0 , 0.0 };
325
328
326
329
// go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC
327
330
for (const auto & traffic_flow_id : noc_ctx.noc_traffic_flows_storage .get_all_traffic_flow_id ()) {
328
331
new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth ;
329
332
new_cost.latency += traffic_flow_costs[traffic_flow_id].latency ;
333
+ new_cost.latency_overrun += traffic_flow_costs[traffic_flow_id].latency_overrun ;
330
334
}
331
335
332
336
// Iterate over all NoC links and accumulate their congestion costs
@@ -339,14 +343,23 @@ void recompute_noc_costs(NocCostTerms& new_cost) {
339
343
340
344
void update_noc_normalization_factors (t_placer_costs& costs) {
341
345
// Prevent the norm factors from going to infinity
342
- costs.noc_aggregate_bandwidth_cost_norm = std::min (1 / costs.noc_aggregate_bandwidth_cost , MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
343
- costs.noc_latency_cost_norm = std::min (1 / costs.noc_latency_cost , MAX_INV_NOC_LATENCY_COST);
346
+ costs.noc_cost_norm_factors . aggregate_bandwidth = std::min (1 / costs.noc_cost_terms . aggregate_bandwidth , MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
347
+ costs.noc_cost_norm_factors . latency = std::min (1 / costs.noc_cost_terms . latency , MAX_INV_NOC_LATENCY_COST);
344
348
345
- // to avoid division by zero
346
- if (costs.noc_congestion_cost > 0.0 ) {
347
- costs.noc_congestion_cost_norm = std::min (1 / costs.noc_congestion_cost , MAX_INV_NOC_CONGESTION_COST);
349
+ // to avoid division by zero and negative numbers
350
+ // latency overrun cost may take very small negative values due to round-off error
351
+ if (costs.noc_cost_terms .latency_overrun > 0.0 ) {
352
+ costs.noc_cost_norm_factors .latency_overrun = std::min (1 / costs.noc_cost_terms .latency_overrun , MAX_INV_NOC_LATENCY_COST);
348
353
} else {
349
- costs.noc_congestion_cost_norm = MAX_INV_NOC_CONGESTION_COST;
354
+ costs.noc_cost_norm_factors .latency_overrun = MAX_INV_NOC_LATENCY_COST;
355
+ }
356
+
357
+ // to avoid division by zero and negative numbers
358
+ // congestion cost may take very small negative values due to round-off error
359
+ if (costs.noc_cost_terms .congestion > 0.0 ) {
360
+ costs.noc_cost_norm_factors .congestion = std::min (1 / costs.noc_cost_terms .congestion , MAX_INV_NOC_CONGESTION_COST);
361
+ } else {
362
+ costs.noc_cost_norm_factors .congestion = MAX_INV_NOC_CONGESTION_COST;
350
363
}
351
364
352
365
return ;
@@ -378,41 +391,43 @@ double comp_noc_aggregate_bandwidth_cost(void) {
378
391
return noc_aggregate_bandwidth_cost;
379
392
}
380
393
381
- double comp_noc_latency_cost (const t_noc_opts& noc_opts ) {
394
+ std::pair< double , double > comp_noc_latency_cost () {
382
395
// used to get traffic flow route information
383
396
auto & noc_ctx = g_vpr_ctx.noc ();
384
397
// datastructure that stores all the traffic flow routes
385
398
const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage ;
386
399
387
- double noc_latency_cost = 0 .;
400
+ std::pair< double , double > noc_latency_cost_terms{ 0.0 , 0.0 } ;
388
401
389
402
// now go through each traffic flow route and calculate its
390
403
// latency. Then store this in local data structures and accumulate it.
391
404
for (const auto & traffic_flow_id : noc_ctx.noc_traffic_flows_storage .get_all_traffic_flow_id ()) {
392
405
const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow (traffic_flow_id);
393
406
const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route (traffic_flow_id);
394
407
395
- double curr_traffic_flow_latency_cost = calculate_traffic_flow_latency_cost (curr_traffic_flow_route, noc_ctx.noc_model , curr_traffic_flow, noc_opts );
408
+ auto [curr_traffic_flow_latency, curr_traffic_flow_latency_overrun] = calculate_traffic_flow_latency_cost (curr_traffic_flow_route, noc_ctx.noc_model , curr_traffic_flow);
396
409
397
- // store the calculated latency for the current traffic flow in local datastructures (this also initializes them)
398
- traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency_cost;
410
+ // store the calculated latency cost terms for the current traffic flow in local datastructures (this also initializes them)
411
+ traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency;
412
+ traffic_flow_costs[traffic_flow_id].latency_overrun = curr_traffic_flow_latency_overrun;
399
413
400
- // accumulate the latency cost
401
- noc_latency_cost += curr_traffic_flow_latency_cost;
414
+ // accumulate the latency cost terms
415
+ noc_latency_cost_terms.first += curr_traffic_flow_latency;
416
+ noc_latency_cost_terms.second += curr_traffic_flow_latency_overrun;
402
417
}
403
418
404
- return noc_latency_cost ;
419
+ return noc_latency_cost_terms ;
405
420
}
406
421
407
- double comp_noc_congestion_cost (const t_noc_opts& noc_opts ) {
422
+ double comp_noc_congestion_cost () {
408
423
// Used to access NoC links
409
424
auto & noc_ctx = g_vpr_ctx.noc ();
410
425
411
426
double congestion_cost = 0 .;
412
427
413
428
// Iterate over all NoC links
414
429
for (const auto & link : noc_ctx.noc_model .get_noc_links ()) {
415
- double link_congestion_cost = calculate_link_congestion_cost (link, noc_opts );
430
+ double link_congestion_cost = calculate_link_congestion_cost (link);
416
431
417
432
// store the congestion cost for this link in static data structures (this also initializes them)
418
433
link_congestion_costs[link] = link_congestion_cost;
@@ -426,7 +441,7 @@ double comp_noc_congestion_cost(const t_noc_opts& noc_opts) {
426
441
427
442
int check_noc_placement_costs (const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
428
443
int error = 0 ;
429
- NocCostTerms cost_check{0.0 , 0.0 , 0.0 };
444
+ NocCostTerms cost_check{0.0 , 0.0 , 0.0 , 0.0 };
430
445
431
446
// get current router block locations
432
447
auto & place_ctx = g_vpr_ctx.placement ();
@@ -468,8 +483,9 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
468
483
double current_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost (temp_found_noc_route, curr_traffic_flow);
469
484
cost_check.aggregate_bandwidth += current_flow_aggregate_bandwidth_cost;
470
485
471
- double current_flow_latency_cost = calculate_traffic_flow_latency_cost (temp_found_noc_route, noc_model, curr_traffic_flow, noc_opts);
472
- cost_check.latency += current_flow_latency_cost;
486
+ auto [curr_traffic_flow_latency_cost, curr_traffic_flow_latency_overrun_cost] = calculate_traffic_flow_latency_cost (temp_found_noc_route, noc_model, curr_traffic_flow);
487
+ cost_check.latency += curr_traffic_flow_latency_cost;
488
+ cost_check.latency_overrun += curr_traffic_flow_latency_overrun_cost;
473
489
474
490
// increase bandwidth utilization for the links that constitute the current flow's route
475
491
for (auto & link_id : temp_found_noc_route) {
@@ -485,35 +501,46 @@ int check_noc_placement_costs(const t_placer_costs& costs, double error_toleranc
485
501
486
502
// Iterate over all NoC links and accumulate congestion cost
487
503
for (const auto & link : temp_noc_link_storage) {
488
- cost_check.congestion += calculate_link_congestion_cost (link, noc_opts );
504
+ cost_check.congestion += calculate_link_congestion_cost (link);
489
505
}
490
506
491
507
// check whether the aggregate bandwidth placement cost is within the error tolerance
492
- if (fabs (cost_check.aggregate_bandwidth - costs.noc_aggregate_bandwidth_cost ) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
508
+ if (fabs (cost_check.aggregate_bandwidth - costs.noc_cost_terms . aggregate_bandwidth ) > costs.noc_cost_terms . aggregate_bandwidth * error_tolerance) {
493
509
VTR_LOG_ERROR (
494
510
" noc_aggregate_bandwidth_cost_check: %g and noc_aggregate_bandwidth_cost: %g differ in check_noc_placement_costs.\n " ,
495
- cost_check.aggregate_bandwidth , costs.noc_aggregate_bandwidth_cost );
511
+ cost_check.aggregate_bandwidth , costs.noc_cost_terms . aggregate_bandwidth );
496
512
error++;
497
513
}
498
514
499
515
// only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
500
516
if (cost_check.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
501
517
// check whether the latency placement cost is within the error tolerance
502
- if (fabs (cost_check.latency - costs.noc_latency_cost ) > costs.noc_latency_cost * error_tolerance) {
518
+ if (fabs (cost_check.latency - costs.noc_cost_terms . latency ) > costs.noc_cost_terms . latency * error_tolerance) {
503
519
VTR_LOG_ERROR (
504
520
" noc_latency_cost_check: %g and noc_latency_cost: %g differ in check_noc_placement_costs.\n " ,
505
- cost_check.latency , costs.noc_latency_cost );
521
+ cost_check.latency , costs.noc_cost_terms .latency );
522
+ error++;
523
+ }
524
+ }
525
+
526
+ // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
527
+ if (cost_check.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
528
+ // check whether the latency overrun placement cost is within the error tolerance
529
+ if (fabs (cost_check.latency_overrun - costs.noc_cost_terms .latency_overrun ) > costs.noc_cost_terms .latency_overrun * error_tolerance) {
530
+ VTR_LOG_ERROR (
531
+ " noc_latency_overrun_cost_check: %g and noc_latency_overrun_cost: %g differ in check_noc_placement_costs.\n " ,
532
+ cost_check.latency_overrun , costs.noc_cost_terms .latency_overrun );
506
533
error++;
507
534
}
508
535
}
509
536
510
537
// check the recomputed congestion cost only if it is higher than the minimum expected value
511
538
if (cost_check.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
512
539
// check whether the NoC congestion cost is within the error range
513
- if (fabs (cost_check.congestion - costs.noc_congestion_cost ) > costs.noc_congestion_cost * error_tolerance) {
540
+ if (fabs (cost_check.congestion - costs.noc_cost_terms . congestion ) > costs.noc_cost_terms . congestion * error_tolerance) {
514
541
VTR_LOG_ERROR (
515
542
" noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n " ,
516
- cost_check.congestion , costs.noc_congestion_cost );
543
+ cost_check.congestion , costs.noc_cost_terms . congestion );
517
544
error++;
518
545
}
519
546
}
@@ -528,7 +555,9 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
528
555
return (traffic_flow_info.traffic_flow_priority * traffic_flow_info.traffic_flow_bandwidth * num_of_links_in_traffic_flow);
529
556
}
530
557
531
- double calculate_traffic_flow_latency_cost (const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts) {
558
+ std::pair<double , double > calculate_traffic_flow_latency_cost (const std::vector<NocLinkId>& traffic_flow_route,
559
+ const NocStorage& noc_model,
560
+ const t_noc_traffic_flow& traffic_flow_info) {
532
561
// there will always be one more router than links in a traffic flow
533
562
int num_of_links_in_traffic_flow = traffic_flow_route.size ();
534
563
int num_of_routers_in_traffic_flow = num_of_links_in_traffic_flow + 1 ;
@@ -538,26 +567,28 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
538
567
double noc_link_latency = noc_model.get_noc_link_latency ();
539
568
double noc_router_latency = noc_model.get_noc_router_latency ();
540
569
541
- // calculate the traffic flow_latency
570
+ // calculate the traffic flow latency
542
571
double latency = (noc_link_latency * num_of_links_in_traffic_flow) + (noc_router_latency * num_of_routers_in_traffic_flow);
543
572
544
- // calculate the cost
545
- double single_traffic_flow_latency_cost = (noc_opts. noc_latency_constraints_weighting * std::max (0 ., latency - max_latency)) + (noc_opts. noc_latency_weighting * latency );
573
+ // calculate the traffic flow latency overrun
574
+ double latency_overrun = std::max (latency - max_latency, 0 . );
546
575
547
576
// scale the latency cost by its priority to indicate its importance
548
- return (single_traffic_flow_latency_cost * traffic_flow_info.traffic_flow_priority ) ;
549
- }
577
+ latency *= traffic_flow_info.traffic_flow_priority ;
578
+ latency_overrun *= traffic_flow_info. traffic_flow_priority ;
550
579
551
- double calculate_link_congestion_cost ( const NocLink& link, const t_noc_opts& noc_opts) {
552
- double congested_bw_ratio, congestion_cost;
580
+ return {latency, latency_overrun};
581
+ }
553
582
554
- congested_bw_ratio = link. get_congested_bandwidth_ratio ();
555
- congestion_cost = noc_opts. noc_congestion_weighting * congested_bw_ratio ;
583
+ double calculate_link_congestion_cost ( const NocLink& link) {
584
+ double congested_bw_ratio = link. get_congested_bandwidth_ratio () ;
556
585
557
- return congestion_cost ;
586
+ return congested_bw_ratio ;
558
587
}
559
588
560
- double calculate_noc_cost (const NocCostTerms& cost_terms, const t_placer_costs& norm_factors, const t_noc_opts& noc_opts) {
589
+ double calculate_noc_cost (const NocCostTerms& cost_terms,
590
+ const NocCostTerms& norm_factors,
591
+ const t_noc_opts& noc_opts) {
561
592
double cost = 0.0 ;
562
593
563
594
/* NoC's contribution to the placement cost is a weighted sum over:
@@ -566,9 +597,10 @@ double calculate_noc_cost(const NocCostTerms& cost_terms, const t_placer_costs&
566
597
* 3) Link congestion costs
567
598
*/
568
599
cost = noc_opts.noc_placement_weighting * (
569
- cost_terms.latency * norm_factors.noc_latency_cost_norm +
570
- cost_terms.aggregate_bandwidth * norm_factors.noc_aggregate_bandwidth_cost_norm +
571
- cost_terms.congestion * norm_factors.noc_congestion_cost_norm );
600
+ cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth +
601
+ cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_constraints_weighting +
602
+ cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting +
603
+ cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting );
572
604
573
605
return cost;
574
606
}
@@ -692,7 +724,7 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) {
692
724
* we now only swap router blocks for the percentage of time the user
693
725
* supplied.
694
726
* */
695
- return (vtr::irand (99 ) < user_supplied_noc_router_swap_percentage) ? true : false ;
727
+ return (vtr::irand (99 ) < user_supplied_noc_router_swap_percentage);
696
728
}
697
729
698
730
static bool select_random_router_cluster (ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type) {
0 commit comments