@@ -58,6 +58,8 @@ using std::min;
58
58
* cost computation. 0.01 means that there is a 1% error tolerance. */
59
59
#define ERROR_TOL .01
60
60
61
+ #define FINAL_RLIM 1
62
+
61
63
/* This defines the maximum number of swap attempts before invoking the *
62
64
* once-in-a-while placement legality check as well as floating point *
63
65
* variables round-offs check. */
@@ -106,8 +108,10 @@ struct t_placer_prev_inverse_costs {
106
108
struct t_annealing_state {
107
109
float t;
108
110
float rlim;
111
+ float inverse_delta_rlim;
109
112
float alpha;
110
113
float restart_t ;
114
+ float crit_exponent;
111
115
int move_lim_max;
112
116
int move_lim;
113
117
};
@@ -353,7 +357,11 @@ static float starting_t(t_placer_costs* costs,
353
357
t_pl_blocks_to_be_moved& blocks_affected,
354
358
const t_placer_opts& placer_opts);
355
359
356
- static bool update_state (t_annealing_state* state, float success_rat, const t_placer_costs& costs, const t_annealing_sched& annealing_sched);
360
+ static bool update_state (t_annealing_state* state,
361
+ float success_rat,
362
+ const t_placer_costs& costs,
363
+ const t_placer_opts& placer_opts,
364
+ const t_annealing_sched& annealing_sched);
357
365
358
366
static void update_rlim (float * rlim, float success_rat, const DeviceGrid& grid);
359
367
@@ -484,7 +492,7 @@ static void print_place_status(const size_t num_temps,
484
492
size_t tot_moves);
485
493
static void print_resources_utilization ();
486
494
487
- static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max);
495
+ static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent );
488
496
489
497
/* ****************************************************************************/
490
498
void try_place (const t_placer_opts& placer_opts,
@@ -509,8 +517,7 @@ void try_place(const t_placer_opts& placer_opts,
509
517
510
518
int tot_iter, moves_since_cost_recompute, width_fac, num_connections,
511
519
outer_crit_iter_count, inner_recompute_limit;
512
- float success_rat, crit_exponent,
513
- first_rlim, final_rlim, inverse_delta_rlim;
520
+ float success_rat, first_crit_exponent, first_rlim;
514
521
515
522
t_placer_costs costs;
516
523
t_placer_prev_inverse_costs prev_inverse_costs;
@@ -575,7 +582,7 @@ void try_place(const t_placer_opts& placer_opts,
575
582
if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
576
583
costs.bb_cost = comp_bb_cost (NORMAL);
577
584
578
- crit_exponent = placer_opts.td_place_exp_first ; /* this will be modified when rlim starts to change */
585
+ first_crit_exponent = placer_opts.td_place_exp_first ; /* this will be modified when rlim starts to change */
579
586
580
587
num_connections = count_connections ();
581
588
VTR_LOG (" \n " );
@@ -602,7 +609,7 @@ void try_place(const t_placer_opts& placer_opts,
602
609
atom_ctx.lookup ,
603
610
*timing_info->timing_graph ());
604
611
// Update timing and costs
605
- recompute_criticalities (crit_exponent ,
612
+ recompute_criticalities (first_crit_exponent ,
606
613
place_delay_model.get (),
607
614
placer_criticalities.get (),
608
615
pin_timing_invalidator.get (),
@@ -635,7 +642,7 @@ void try_place(const t_placer_opts& placer_opts,
635
642
costs.timing_cost = 0 ;
636
643
outer_crit_iter_count = 0 ;
637
644
num_connections = 0 ;
638
- crit_exponent = 0 ;
645
+ first_crit_exponent = 0 ;
639
646
640
647
prev_inverse_costs.timing_cost = 0 ; /* inverses not used */
641
648
prev_inverse_costs.bb_cost = 0 ;
@@ -721,8 +728,6 @@ void try_place(const t_placer_opts& placer_opts,
721
728
}
722
729
723
730
first_rlim = (float )max (device_ctx.grid .width () - 1 , device_ctx.grid .height () - 1 );
724
- final_rlim = 1 ;
725
- inverse_delta_rlim = 1 / (first_rlim - final_rlim);
726
731
727
732
float first_t = starting_t (&costs, &prev_inverse_costs,
728
733
annealing_sched, move_lim, first_rlim,
@@ -735,7 +740,7 @@ void try_place(const t_placer_opts& placer_opts,
735
740
placer_opts);
736
741
737
742
t_annealing_state state;
738
- init_annealing_state (&state, annealing_sched, first_t , first_rlim, move_lim);
743
+ init_annealing_state (&state, annealing_sched, first_t , first_rlim, move_lim, first_crit_exponent );
739
744
740
745
if (!placer_opts.move_stats_file .empty ()) {
741
746
f_move_stats_file = std::unique_ptr<FILE, decltype (&vtr::fclose)>(vtr::fopen (placer_opts.move_stats_file .c_str (), " w" ), vtr::fclose);
@@ -759,15 +764,15 @@ void try_place(const t_placer_opts& placer_opts,
759
764
760
765
outer_loop_recompute_criticalities (placer_opts, &costs, &prev_inverse_costs,
761
766
num_connections,
762
- crit_exponent,
767
+ state. crit_exponent ,
763
768
&outer_crit_iter_count,
764
769
place_delay_model.get (),
765
770
placer_criticalities.get (),
766
771
pin_timing_invalidator.get (),
767
772
timing_info.get ());
768
773
769
774
placement_inner_loop (state.t , num_temps, state.rlim , placer_opts,
770
- state.move_lim , crit_exponent, inner_recompute_limit, &stats,
775
+ state.move_lim , state. crit_exponent , inner_recompute_limit, &stats,
771
776
&costs,
772
777
&prev_inverse_costs,
773
778
&moves_since_cost_recompute,
@@ -795,25 +800,18 @@ void try_place(const t_placer_opts& placer_opts,
795
800
state.t , state.alpha ,
796
801
stats,
797
802
critical_path.delay (), sTNS , sWNS ,
798
- success_rat, std_dev, state.rlim , crit_exponent, tot_iter);
803
+ success_rat, std_dev, state.rlim , state. crit_exponent , tot_iter);
799
804
800
805
sprintf (msg, " Cost: %g BB Cost %g TD Cost %g Temperature: %g" ,
801
806
costs.cost , costs.bb_cost , costs.timing_cost , state.t );
802
807
update_screen (ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info);
803
- update_rlim (&state.rlim , success_rat, device_ctx.grid );
804
-
805
- if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
806
- crit_exponent = (1 - (state.rlim - final_rlim) * inverse_delta_rlim)
807
- * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first )
808
- + placer_opts.td_place_exp_first ;
809
- }
810
808
811
809
#ifdef VERBOSE
812
810
if (getEchoEnabled ()) {
813
811
print_clb_placement (" first_iteration_clb_placement.echo" );
814
812
}
815
813
#endif
816
- } while (update_state (&state, success_rat, costs, annealing_sched));
814
+ } while (update_state (&state, success_rat, costs, placer_opts, annealing_sched));
817
815
/* Outer loop of the simmulated annealing ends */
818
816
819
817
auto pre_quench_timing_stats = timing_ctx.stats ;
@@ -823,7 +821,7 @@ void try_place(const t_placer_opts& placer_opts,
823
821
outer_loop_recompute_criticalities (placer_opts, &costs,
824
822
&prev_inverse_costs,
825
823
num_connections,
826
- crit_exponent,
824
+ state. crit_exponent ,
827
825
&outer_crit_iter_count,
828
826
place_delay_model.get (),
829
827
placer_criticalities.get (),
@@ -835,7 +833,7 @@ void try_place(const t_placer_opts& placer_opts,
835
833
/* Run inner loop again with temperature = 0 so as to accept only swaps
836
834
* which reduce the cost of the placement */
837
835
placement_inner_loop (state.t , num_temps, state.rlim , placer_opts,
838
- move_lim, crit_exponent, quench_recompute_limit, &stats,
836
+ move_lim, state. crit_exponent , quench_recompute_limit, &stats,
839
837
&costs,
840
838
&prev_inverse_costs,
841
839
&moves_since_cost_recompute,
@@ -862,7 +860,7 @@ void try_place(const t_placer_opts& placer_opts,
862
860
quench_elapsed_sec,
863
861
state.t , state.alpha , stats,
864
862
critical_path.delay (), sTNS , sWNS ,
865
- success_rat, std_dev, state.rlim , crit_exponent, tot_iter);
863
+ success_rat, std_dev, state.rlim , state. crit_exponent , tot_iter);
866
864
}
867
865
auto post_quench_timing_stats = timing_ctx.stats ;
868
866
@@ -893,7 +891,7 @@ void try_place(const t_placer_opts& placer_opts,
893
891
VTR_ASSERT (timing_info);
894
892
895
893
// Update timing and costs
896
- recompute_criticalities (crit_exponent,
894
+ recompute_criticalities (state. crit_exponent ,
897
895
place_delay_model.get (),
898
896
placer_criticalities.get (),
899
897
pin_timing_invalidator.get (),
@@ -1206,21 +1204,25 @@ static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid)
1206
1204
}
1207
1205
1208
1206
/* Update the temperature according to the annealing schedule selected. */
1209
- static bool update_state (t_annealing_state* state, float success_rat, const t_placer_costs& costs, const t_annealing_sched& annealing_sched) {
1207
+ static bool update_state (t_annealing_state* state,
1208
+ float success_rat,
1209
+ const t_placer_costs& costs,
1210
+ const t_placer_opts& placer_opts,
1211
+ const t_annealing_sched& annealing_sched) {
1210
1212
/* Return `false` when the exit criterion is met. */
1211
1213
if (annealing_sched.type == USER_SCHED) {
1212
1214
state->t *= annealing_sched.alpha_t ;
1213
1215
return state->t >= annealing_sched.exit_t ;
1214
1216
}
1215
1217
1218
+ auto & device_ctx = g_vpr_ctx.device ();
1216
1219
auto & cluster_ctx = g_vpr_ctx.clustering ();
1217
1220
1218
1221
/* Automatic annealing schedule */
1219
1222
float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist .nets ().size ();
1220
1223
1221
- bool restart_temp = state->t < t_exit || std::isnan (t_exit); // May get nan if there are no nets
1222
-
1223
1224
if (annealing_sched.type == DUSTY_SCHED) {
1225
+ bool restart_temp = state->t < t_exit || std::isnan (t_exit); // May get nan if there are no nets
1224
1226
if (success_rat < annealing_sched.success_min || restart_temp) {
1225
1227
if (state->alpha > annealing_sched.alpha_max ) return false ;
1226
1228
state->t = state->restart_t / sqrt (state->alpha ); // Take a half step from the restart temperature.
@@ -1232,7 +1234,6 @@ static bool update_state(t_annealing_state* state, float success_rat, const t_pl
1232
1234
state->t *= state->alpha ;
1233
1235
}
1234
1236
state->move_lim = std::max (1 , std::min (state->move_lim_max , (int )(state->move_lim_max * (annealing_sched.success_target / success_rat))));
1235
- return true ;
1236
1237
} else { /* annealing_sched.type == AUTO_SCHED */
1237
1238
if (success_rat > 0.96 ) {
1238
1239
state->alpha = 0.5 ;
@@ -1245,8 +1246,19 @@ static bool update_state(t_annealing_state* state, float success_rat, const t_pl
1245
1246
}
1246
1247
state->t *= state->alpha ;
1247
1248
1248
- return !restart_temp;
1249
+ // Must be duplicated to retain previous behavior
1250
+ if (state->t < t_exit || std::isnan (t_exit)) return false ;
1249
1251
}
1252
+
1253
+ update_rlim (&state->rlim , success_rat, device_ctx.grid );
1254
+
1255
+ if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
1256
+ state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim )
1257
+ * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first )
1258
+ + placer_opts.td_place_exp_first ;
1259
+ }
1260
+
1261
+ return true ;
1250
1262
}
1251
1263
1252
1264
static float starting_t (t_placer_costs* costs,
@@ -2958,17 +2970,24 @@ static void print_resources_utilization() {
2958
2970
VTR_LOG (" \n " );
2959
2971
}
2960
2972
2961
- static void init_annealing_state (t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max) {
2973
+ static void init_annealing_state (t_annealing_state* state,
2974
+ const t_annealing_sched& annealing_sched,
2975
+ float t,
2976
+ float rlim,
2977
+ int move_lim_max,
2978
+ float crit_exponent) {
2962
2979
state->alpha = annealing_sched.alpha_min ;
2963
2980
state->t = t;
2964
2981
state->restart_t = t;
2965
2982
state->rlim = rlim;
2983
+ state->inverse_delta_rlim = 1 / (rlim - FINAL_RLIM);
2966
2984
state->move_lim_max = std::max (1 , move_lim_max);
2967
2985
if (annealing_sched.type == DUSTY_SCHED) {
2968
2986
state->move_lim = std::max (1 , (int )(state->move_lim_max * annealing_sched.success_target ));
2969
2987
} else {
2970
2988
state->move_lim = state->move_lim_max ;
2971
2989
}
2990
+ state->crit_exponent = crit_exponent;
2972
2991
}
2973
2992
2974
2993
bool placer_needs_lookahead (const t_vpr_setup& vpr_setup) {
0 commit comments