@@ -56,6 +56,8 @@ using std::min;
56
56
* cost computation. 0.01 means that there is a 1% error tolerance. */
57
57
#define ERROR_TOL .01
58
58
59
+ #define FINAL_RLIM 1
60
+
59
61
/* This defines the maximum number of swap attempts before invoking the *
60
62
* once-in-a-while placement legality check as well as floating point *
61
63
* variables round-offs check. */
@@ -100,8 +102,10 @@ struct t_placer_prev_inverse_costs {
100
102
struct t_annealing_state {
101
103
float t;
102
104
float rlim;
105
+ float inverse_delta_rlim;
103
106
float alpha;
104
107
float restart_t ;
108
+ float crit_exponent;
105
109
int move_lim_max;
106
110
int move_lim;
107
111
};
@@ -325,7 +329,11 @@ static float starting_t(t_placer_costs* costs,
325
329
t_pl_blocks_to_be_moved& blocks_affected,
326
330
const t_placer_opts& placer_opts);
327
331
328
- static bool update_state (t_annealing_state* state, float success_rat, const t_placer_costs& costs, const t_annealing_sched& annealing_sched);
332
+ static bool update_state (t_annealing_state* state,
333
+ float success_rat,
334
+ const t_placer_costs& costs,
335
+ const t_placer_opts& placer_opts,
336
+ const t_annealing_sched& annealing_sched);
329
337
330
338
static void update_rlim (float * rlim, float success_rat, const DeviceGrid& grid);
331
339
@@ -424,7 +432,7 @@ static void print_place_status(const float t,
424
432
size_t tot_moves);
425
433
static void print_resources_utilization ();
426
434
427
- static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max);
435
+ static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent );
428
436
429
437
/* ****************************************************************************/
430
438
void try_place (const t_placer_opts& placer_opts,
@@ -443,8 +451,7 @@ void try_place(const t_placer_opts& placer_opts,
443
451
444
452
int tot_iter, moves_since_cost_recompute, width_fac, num_connections,
445
453
outer_crit_iter_count, inner_recompute_limit;
446
- float success_rat, crit_exponent,
447
- first_rlim, final_rlim, inverse_delta_rlim;
454
+ float success_rat, first_crit_exponent, first_rlim;
448
455
449
456
t_placer_costs costs;
450
457
t_placer_prev_inverse_costs prev_inverse_costs;
@@ -511,7 +518,7 @@ void try_place(const t_placer_opts& placer_opts,
511
518
if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || placer_opts.enable_timing_computations ) {
512
519
costs.bb_cost = comp_bb_cost (NORMAL);
513
520
514
- crit_exponent = placer_opts.td_place_exp_first ; /* this will be modified when rlim starts to change */
521
+ first_crit_exponent = placer_opts.td_place_exp_first ; /* this will be modified when rlim starts to change */
515
522
516
523
num_connections = count_connections ();
517
524
VTR_LOG (" \n " );
@@ -534,7 +541,7 @@ void try_place(const t_placer_opts& placer_opts,
534
541
timing_info->set_warn_unconstrained (false ); // Don't warn again about unconstrained nodes again during placement
535
542
536
543
// Initial slack estimates
537
- load_criticalities (*timing_info, crit_exponent , netlist_pin_lookup);
544
+ load_criticalities (*timing_info, first_crit_exponent , netlist_pin_lookup);
538
545
539
546
critical_path = timing_info->least_slack_critical_path ();
540
547
@@ -565,7 +572,7 @@ void try_place(const t_placer_opts& placer_opts,
565
572
costs.timing_cost = 0 ;
566
573
outer_crit_iter_count = 0 ;
567
574
num_connections = 0 ;
568
- crit_exponent = 0 ;
575
+ first_crit_exponent = 0 ;
569
576
570
577
prev_inverse_costs.timing_cost = 0 ; /* inverses not used */
571
578
prev_inverse_costs.bb_cost = 0 ;
@@ -643,8 +650,6 @@ void try_place(const t_placer_opts& placer_opts,
643
650
}
644
651
645
652
first_rlim = (float )max (device_ctx.grid .width () - 1 , device_ctx.grid .height () - 1 );
646
- final_rlim = 1 ;
647
- inverse_delta_rlim = 1 / (first_rlim - final_rlim);
648
653
649
654
float first_t = starting_t (&costs, &prev_inverse_costs,
650
655
annealing_sched, move_lim, first_rlim,
@@ -654,7 +659,7 @@ void try_place(const t_placer_opts& placer_opts,
654
659
placer_opts);
655
660
656
661
t_annealing_state state;
657
- init_annealing_state (&state, annealing_sched, first_t , first_rlim, move_lim);
662
+ init_annealing_state (&state, annealing_sched, first_t , first_rlim, move_lim, first_crit_exponent );
658
663
659
664
if (!placer_opts.move_stats_file .empty ()) {
660
665
f_move_stats_file = std::unique_ptr<FILE, decltype (&vtr::fclose)>(vtr::fopen (placer_opts.move_stats_file .c_str (), " w" ), vtr::fclose);
@@ -676,14 +681,14 @@ void try_place(const t_placer_opts& placer_opts,
676
681
677
682
outer_loop_recompute_criticalities (placer_opts, &costs, &prev_inverse_costs,
678
683
num_connections,
679
- crit_exponent,
684
+ state. crit_exponent ,
680
685
&outer_crit_iter_count,
681
686
netlist_pin_lookup,
682
687
place_delay_model.get (),
683
688
*timing_info);
684
689
685
690
placement_inner_loop (state.t , num_temps, state.rlim , placer_opts,
686
- state.move_lim , crit_exponent, inner_recompute_limit, &stats,
691
+ state.move_lim , state. crit_exponent , inner_recompute_limit, &stats,
687
692
&costs,
688
693
&prev_inverse_costs,
689
694
&moves_since_cost_recompute,
@@ -708,31 +713,24 @@ void try_place(const t_placer_opts& placer_opts,
708
713
print_place_status (state.t , state.alpha ,
709
714
stats,
710
715
critical_path.delay (), sTNS , sWNS ,
711
- success_rat, std_dev, state.rlim , crit_exponent, tot_iter);
716
+ success_rat, std_dev, state.rlim , state. crit_exponent , tot_iter);
712
717
713
718
sprintf (msg, " Cost: %g BB Cost %g TD Cost %g Temperature: %g" ,
714
719
costs.cost , costs.bb_cost , costs.timing_cost , state.t );
715
720
update_screen (ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info);
716
- update_rlim (&state.rlim , success_rat, device_ctx.grid );
717
-
718
- if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
719
- crit_exponent = (1 - (state.rlim - final_rlim) * inverse_delta_rlim)
720
- * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first )
721
- + placer_opts.td_place_exp_first ;
722
- }
723
721
724
722
#ifdef VERBOSE
725
723
if (getEchoEnabled ()) {
726
724
print_clb_placement (" first_iteration_clb_placement.echo" );
727
725
}
728
726
#endif
729
- } while (update_state (&state, success_rat, costs, annealing_sched));
727
+ } while (update_state (&state, success_rat, costs, placer_opts, annealing_sched));
730
728
/* Outer loop of the simmulated annealing ends */
731
729
732
730
outer_loop_recompute_criticalities (placer_opts, &costs,
733
731
&prev_inverse_costs,
734
732
num_connections,
735
- crit_exponent,
733
+ state. crit_exponent ,
736
734
&outer_crit_iter_count,
737
735
netlist_pin_lookup,
738
736
place_delay_model.get (),
@@ -743,7 +741,7 @@ void try_place(const t_placer_opts& placer_opts,
743
741
/* Run inner loop again with temperature = 0 so as to accept only swaps
744
742
* which reduce the cost of the placement */
745
743
placement_inner_loop (state.t , num_temps, state.rlim , placer_opts,
746
- state.move_lim , crit_exponent, inner_recompute_limit, &stats,
744
+ state.move_lim , state. crit_exponent , inner_recompute_limit, &stats,
747
745
&costs,
748
746
&prev_inverse_costs,
749
747
&moves_since_cost_recompute,
@@ -766,7 +764,7 @@ void try_place(const t_placer_opts& placer_opts,
766
764
767
765
print_place_status (state.t , state.alpha , stats,
768
766
critical_path.delay (), sTNS , sWNS ,
769
- success_rat, std_dev, state.rlim , crit_exponent, tot_iter);
767
+ success_rat, std_dev, state.rlim , state. crit_exponent , tot_iter);
770
768
771
769
if (placer_opts.placement_saves_per_temperature >= 1 ) {
772
770
std::string filename = vtr::string_fmt (" placement_%03d_%03d.place" , num_temps + 1 , 0 );
@@ -1092,21 +1090,25 @@ static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid)
1092
1090
}
1093
1091
1094
1092
/* Update the temperature according to the annealing schedule selected. */
1095
- static bool update_state (t_annealing_state* state, float success_rat, const t_placer_costs& costs, const t_annealing_sched& annealing_sched) {
1093
+ static bool update_state (t_annealing_state* state,
1094
+ float success_rat,
1095
+ const t_placer_costs& costs,
1096
+ const t_placer_opts& placer_opts,
1097
+ const t_annealing_sched& annealing_sched) {
1096
1098
/* Return `false` when the exit criterion is met. */
1097
1099
if (annealing_sched.type == USER_SCHED) {
1098
1100
state->t *= annealing_sched.alpha_t ;
1099
1101
return state->t >= annealing_sched.exit_t ;
1100
1102
}
1101
1103
1104
+ auto & device_ctx = g_vpr_ctx.device ();
1102
1105
auto & cluster_ctx = g_vpr_ctx.clustering ();
1103
1106
1104
1107
/* Automatic annealing schedule */
1105
1108
float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist .nets ().size ();
1106
1109
1107
- bool restart_temp = state->t < t_exit || std::isnan (t_exit); // May get nan if there are no nets
1108
-
1109
1110
if (annealing_sched.type == DUSTY_SCHED) {
1111
+ bool restart_temp = state->t < t_exit || std::isnan (t_exit); // May get nan if there are no nets
1110
1112
if (success_rat < annealing_sched.success_min || restart_temp) {
1111
1113
if (state->alpha > annealing_sched.alpha_max ) return false ;
1112
1114
state->t = state->restart_t / sqrt (state->alpha ); // Take a half step from the restart temperature.
@@ -1118,7 +1120,6 @@ static bool update_state(t_annealing_state* state, float success_rat, const t_pl
1118
1120
state->t *= state->alpha ;
1119
1121
}
1120
1122
state->move_lim = std::max (1 , std::min (state->move_lim_max , (int )(state->move_lim_max * (annealing_sched.success_target / success_rat))));
1121
- return true ;
1122
1123
} else { /* annealing_sched.type == AUTO_SCHED */
1123
1124
if (success_rat > 0.96 ) {
1124
1125
state->alpha = 0.5 ;
@@ -1131,8 +1132,19 @@ static bool update_state(t_annealing_state* state, float success_rat, const t_pl
1131
1132
}
1132
1133
state->t *= state->alpha ;
1133
1134
1134
- return !restart_temp;
1135
+ // Must be duplicated to retain previous behavior
1136
+ if (state->t < t_exit || std::isnan (t_exit)) return false ;
1135
1137
}
1138
+
1139
+ update_rlim (&state->rlim , success_rat, device_ctx.grid );
1140
+
1141
+ if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
1142
+ state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim )
1143
+ * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first )
1144
+ + placer_opts.td_place_exp_first ;
1145
+ }
1146
+
1147
+ return true ;
1136
1148
}
1137
1149
1138
1150
static float starting_t (t_placer_costs* costs,
@@ -2662,17 +2674,24 @@ static void print_resources_utilization() {
2662
2674
VTR_LOG (" \n " );
2663
2675
}
2664
2676
2665
- static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max) {
2677
+ static void init_annealing_state (t_annealing_state* state,
2678
+ const t_annealing_sched& annealing_sched,
2679
+ float t,
2680
+ float rlim,
2681
+ int move_lim_max,
2682
+ float crit_exponent) {
2666
2683
state->alpha = annealing_sched.alpha_min ;
2667
2684
state->t = t;
2668
2685
state->restart_t = t;
2669
2686
state->rlim = rlim;
2687
+ state->inverse_delta_rlim = 1 / (rlim - FINAL_RLIM);
2670
2688
state->move_lim_max = std::max (1 , move_lim_max);
2671
2689
if (annealing_sched.type == DUSTY_SCHED) {
2672
2690
state->move_lim = std::max (1 , (int )(state->move_lim_max * annealing_sched.success_target ));
2673
2691
} else {
2674
2692
state->move_lim = state->move_lim_max ;
2675
2693
}
2694
+ state->crit_exponent = crit_exponent;
2676
2695
}
2677
2696
2678
2697
bool placer_needs_lookahead (const t_vpr_setup& vpr_setup) {
0 commit comments