diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9991523ca79..82eceb426dd 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -63,10 +63,6 @@ using std::min;
  * cost computation. 0.01 means that there is a 1% error tolerance.       */
 #define ERROR_TOL .01
 
-/* The final rlim (range limit) is 1, which is the smallest value that can *
- * still make progress, since an rlim of 0 wouldn't allow any swaps.       */
-#define FINAL_RLIM 1
-
 /* This defines the maximum number of swap attempts before invoking the   *
  * once-in-a-while placement legality check as well as floating point     *
  * variables round-offs check.                                            */
@@ -89,19 +85,8 @@ enum e_cost_methods {
     CHECK
 };
 
-struct t_placer_statistics {
-    double av_cost, av_bb_cost, av_timing_cost,
-        sum_of_squares;
-    int success_sum;
-};
-
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
-
-constexpr double MAX_INV_TIMING_COST = 1.e9;
-/* Stops inverse timing cost from going to infinity with very lax timing constraints,
- * which avoids multiplying by a gigantic prev_inverse.timing_cost when auto-normalizing.
- * The exact value of this cost has relatively little impact, but should not be
- * large enough to be on the order of timing costs for normal constraints. */
+constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
 /********************** Variables local to place.c ***************************/
 
@@ -262,11 +247,8 @@ static double comp_bb_cost(e_cost_methods method);
 static void update_move_nets(int num_nets_affected);
 static void reset_move_nets(int num_nets_affected);
 
-static e_move_result try_swap(float t,
-                              float crit_exponent,
+static e_move_result try_swap(const t_annealing_state* state,
                               t_placer_costs* costs,
-                              t_placer_prev_inverse_costs* prev_inverse_costs,
-                              float rlim,
                               MoveGenerator& move_generator,
                               SetupTimingInfo* timing_info,
                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
@@ -291,12 +273,9 @@ static int check_placement_consistency();
 static int check_block_placement_consistency();
 static int check_macro_placement_consistency();
 
-static float starting_t(float crit_exponent,
+static float starting_t(const t_annealing_state* state,
                         t_placer_costs* costs,
-                        t_placer_prev_inverse_costs* prev_inverse_costs,
                         t_annealing_sched annealing_sched,
-                        int max_moves,
-                        float rlim,
                         const PlaceDelayModel* delay_model,
                         PlacerCriticalities* criticalities,
                         PlacerSetupSlacks* setup_slacks,
@@ -306,18 +285,8 @@ static float starting_t(float crit_exponent,
                         t_pl_blocks_to_be_moved& blocks_affected,
                         const t_placer_opts& placer_opts);
 
-static bool update_annealing_state(t_annealing_state* state,
-                                   float success_rat,
-                                   const t_placer_costs& costs,
-                                   const t_placer_opts& placer_opts,
-                                   const t_annealing_sched& annealing_sched);
-
-static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid);
-
 static int count_connections();
 
-static double get_std_dev(int n, double sum_x_squared, double av_x);
-
 static double recompute_bb_cost();
 
 static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
@@ -369,7 +338,6 @@ static void free_try_swap_arrays();
 
 static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
                                           t_placer_costs* costs,
-                                          t_placer_prev_inverse_costs* prev_inverse_costs,
                                           int num_connections,
                                           float crit_exponent,
                                           int* outer_crit_iter_count,
@@ -379,16 +347,11 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
                                           ClusteredPinTimingInvalidator* pin_timing_invalidator,
                                           SetupTimingInfo* timing_info);
 
-static void placement_inner_loop(float t,
-                                 int temp_num,
-                                 float rlim,
+static void placement_inner_loop(const t_annealing_state* state,
                                  const t_placer_opts& placer_opts,
-                                 int move_lim,
-                                 float crit_exponent,
                                  int inner_recompute_limit,
                                  t_placer_statistics* stats,
                                  t_placer_costs* costs,
-                                 t_placer_prev_inverse_costs* prev_inverse_costs,
                                  int* moves_since_cost_recompute,
                                  ClusteredPinTimingInvalidator* pin_timing_invalidator,
                                  const PlaceDelayModel* delay_model,
@@ -404,32 +367,21 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const PlacerCriticalities* criticalities,
                                          t_placer_costs* costs);
 
-static void calc_placer_stats(t_placer_statistics& stats, float& success_rat, double& std_dev, const t_placer_costs& costs, const int move_lim);
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
                                                const PlacementDelayCalculator& delay_calc);
 
 static void print_place_status_header();
-static void print_place_status(const size_t num_temps,
-                               const float elapsed_sec,
-                               const float t,
-                               const float alpha,
+static void print_place_status(const t_annealing_state& state,
                                const t_placer_statistics& stats,
-                               const float cpd,
-                               const float sTNS,
-                               const float sWNS,
-                               const float acc_rate,
-                               const float std_dev,
-                               const float rlim,
-                               const float crit_exponent,
+                               float elapsed_sec,
+                               float cpd,
+                               float sTNS,
+                               float sWNS,
                                size_t tot_moves);
 static void print_resources_utilization();
 
-void transform_blocks_affected(t_pl_blocks_to_be_moved blocksAffected);
-static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent);
-
 /*****************************************************************************/
 void try_place(const t_placer_opts& placer_opts,
                t_annealing_sched annealing_sched,
@@ -456,16 +408,15 @@ void try_place(const t_placer_opts& placer_opts,
 
     int tot_iter, moves_since_cost_recompute, width_fac, num_connections,
         outer_crit_iter_count, inner_recompute_limit;
-    float success_rat, first_crit_exponent, first_rlim;
+    float first_crit_exponent, first_rlim, first_t;
+    int first_move_lim;
 
-    t_placer_costs costs;
-    t_placer_prev_inverse_costs prev_inverse_costs;
+    t_placer_costs costs(placer_opts.place_algorithm);
 
     tatum::TimingPathInfo critical_path;
     float sTNS = NAN;
     float sWNS = NAN;
 
-    double std_dev;
     char msg[vtr::bufsize];
     t_placer_statistics stats;
 
@@ -573,7 +524,7 @@ void try_place(const t_placer_opts& placer_opts,
 
         critical_path = timing_info->least_slack_critical_path();
 
-        //Write out the initial timing echo file
+        /* Write out the initial timing echo file */
         if (isEchoFileEnabled(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH)) {
             tatum::write_echo(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH),
                               *timing_ctx.graph, *timing_ctx.constraints, *placement_delay_calc, timing_info->analyzer());
@@ -585,20 +536,27 @@ void try_place(const t_placer_opts& placer_opts,
 
         outer_crit_iter_count = 1;
 
-        prev_inverse_costs.timing_cost = 1 / costs.timing_cost;
-        prev_inverse_costs.bb_cost = 1 / costs.bb_cost;
-        costs.cost = 1; /*our new cost function uses normalized values of           */
-        /*bb_cost and timing_cost, the value of cost will be reset  */
-        /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
-    } else { /*BOUNDING_BOX_PLACE */
-        costs.cost = costs.bb_cost = comp_bb_cost(NORMAL);
-        costs.timing_cost = 0;
+        /* Initialize the normalization factors. Calling costs.update_norm_factors() *
+         * here would fail the golden results of strong_sdc benchmark                */
+        costs.timing_cost_norm = 1 / costs.timing_cost;
+        costs.bb_cost_norm = 1 / costs.bb_cost;
+        costs.cost = 1;
+    } else {
+        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
+
+        /* Total cost is the same as wirelength cost */
+        costs.bb_cost = comp_bb_cost(NORMAL);
+        costs.cost = costs.bb_cost;
+
+        /* Timing cost and normalization factors are not used */
+        costs.timing_cost = INVALID_COST;
+        costs.timing_cost_norm = INVALID_COST;
+        costs.bb_cost_norm = INVALID_COST;
+
+        /* Other initializations */
         outer_crit_iter_count = 0;
         num_connections = 0;
         first_crit_exponent = 0;
-
-        prev_inverse_costs.timing_cost = 0; /*inverses not used */
-        prev_inverse_costs.bb_cost = 0;
     }
 
     //Sanity check that initial placement is legal
@@ -637,65 +595,44 @@ void try_place(const t_placer_opts& placer_opts,
         print_place(nullptr, nullptr, filename.c_str());
     }
 
-    int move_lim = 1;
-    if (placer_opts.effort_scaling == e_place_effort_scaling::CIRCUIT) {
-        //This scales the move limit proportional to num_blocks ^ (4/3)
-        move_lim = (int)(annealing_sched.inner_num * pow(cluster_ctx.clb_nlist.blocks().size(), 1.3333));
-    } else if (placer_opts.effort_scaling == e_place_effort_scaling::DEVICE_CIRCUIT) {
-        //This scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
-        //
-        //For highly utilized devices (device_size ~ num_blocks) this is the same as
-        //num_blocks ^ (4/3).
-        //
-        //For low utilization devices (device_size >> num_blocks) this performs more
-        //moves (device_size ^ (2/3)) to ensure better optimization. In this case,
-        //more moves than num_blocks ^ (4/3) may be required, since the search space
-        //is larger.
-        float device_size = device_ctx.grid.width() * device_ctx.grid.height();
-        move_lim = (int)(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(cluster_ctx.clb_nlist.blocks().size(), 2. / 3.));
-    } else {
-        VPR_ERROR(VPR_ERROR_PLACE, "Unrecognized placer effort scaling");
-    }
-    VTR_LOG("Moves per temperature: %d\n", move_lim);
-
-    /* Sometimes I want to run the router with a random placement.  Avoid *
-     * using 0 moves to stop division by 0 and 0 length vector problems,  *
-     * by setting move_lim to 1 (which is still too small to do any       *
-     * significant optimization).                                         */
-    if (move_lim <= 0)
-        move_lim = 1;
+    first_move_lim = get_initial_move_lim(placer_opts, annealing_sched);
 
     if (placer_opts.inner_loop_recompute_divider != 0) {
-        inner_recompute_limit = (int)(0.5 + (float)move_lim / (float)placer_opts.inner_loop_recompute_divider);
+        inner_recompute_limit = (int)(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
     } else {
         /*don't do an inner recompute */
-        inner_recompute_limit = move_lim + 1;
+        inner_recompute_limit = first_move_lim + 1;
     }
 
     int quench_recompute_limit;
     if (placer_opts.quench_recompute_divider != 0) {
-        quench_recompute_limit = (int)(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
+        quench_recompute_limit = (int)(0.5 + (float)first_move_lim / (float)placer_opts.quench_recompute_divider);
     } else {
         /*don't do an quench recompute */
-        quench_recompute_limit = move_lim + 1;
+        quench_recompute_limit = first_move_lim + 1;
     }
 
+    /* Get the first range limiter */
     first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
 
-    float first_t = starting_t(first_crit_exponent,
-                               &costs, &prev_inverse_costs,
-                               annealing_sched, move_lim, first_rlim,
-                               place_delay_model.get(),
-                               placer_criticalities.get(),
-                               placer_setup_slacks.get(),
-                               timing_info.get(),
-                               *move_generator,
-                               pin_timing_invalidator.get(),
-                               blocks_affected,
-                               placer_opts);
-
-    t_annealing_state state;
-    init_annealing_state(&state, annealing_sched, first_t, first_rlim, move_lim, first_crit_exponent);
+    /* Set the temperature high so essentially all swaps will be accepted   */
+    /* when trying to determine the starting temp for placement inner loop. */
+    first_t = HUGE_POSITIVE_FLOAT;
+
+    t_annealing_state state(annealing_sched, first_t, first_rlim, first_move_lim, first_crit_exponent);
+
+    /* Update the starting temperature for placement annealing to a more appropriate value */
+    state.t = starting_t(&state,
+                         &costs,
+                         annealing_sched,
+                         place_delay_model.get(),
+                         placer_criticalities.get(),
+                         placer_setup_slacks.get(),
+                         timing_info.get(),
+                         *move_generator,
+                         pin_timing_invalidator.get(),
+                         blocks_affected,
+                         placer_opts);
 
     if (!placer_opts.move_stats_file.empty()) {
         f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(vtr::fopen(placer_opts.move_stats_file.c_str(), "w"), vtr::fclose);
@@ -704,7 +641,6 @@ void try_place(const t_placer_opts& placer_opts,
 
     tot_iter = 0;
     moves_since_cost_recompute = 0;
-    int num_temps = 0;
 
 #ifdef ENABLE_ANALYTIC_PLACE
     // Analytic placer: When enabled, skip most of the annealing and go straight to quench
@@ -720,12 +656,9 @@ void try_place(const t_placer_opts& placer_opts,
     /* Outer loop of the simulated annealing begins */
     do {
         vtr::Timer temperature_timer;
-        if (placer_opts.place_algorithm.is_timing_driven()) {
-            costs.cost = 1;
-        }
 
         outer_loop_update_timing_info(placer_opts,
-                                      &costs, &prev_inverse_costs,
+                                      &costs,
                                       num_connections,
                                       state.crit_exponent,
                                       &outer_crit_iter_count,
@@ -735,9 +668,9 @@ void try_place(const t_placer_opts& placer_opts,
                                       pin_timing_invalidator.get(),
                                       timing_info.get());
 
-        placement_inner_loop(state.t, num_temps, state.rlim, placer_opts,
-                             state.move_lim, state.crit_exponent, inner_recompute_limit, &stats,
-                             &costs, &prev_inverse_costs,
+        placement_inner_loop(&state, placer_opts,
+                             inner_recompute_limit, &stats,
+                             &costs,
                              &moves_since_cost_recompute,
                              pin_timing_invalidator.get(),
                              place_delay_model.get(),
@@ -749,10 +682,7 @@ void try_place(const t_placer_opts& placer_opts,
                              placer_opts.place_algorithm);
 
         tot_iter += state.move_lim;
-
-        calc_placer_stats(stats, success_rat, std_dev, costs, state.move_lim);
-
-        ++num_temps;
+        ++state.num_temps;
 
         if (placer_opts.place_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
@@ -760,12 +690,7 @@ void try_place(const t_placer_opts& placer_opts,
             sWNS = timing_info->setup_worst_negative_slack();
         }
 
-        print_place_status(num_temps,
-                           temperature_timer.elapsed_sec(),
-                           state.t, state.alpha,
-                           stats,
-                           critical_path.delay(), sTNS, sWNS,
-                           success_rat, std_dev, state.rlim, state.crit_exponent, tot_iter);
+        print_place_status(state, stats, temperature_timer.elapsed_sec(), critical_path.delay(), sTNS, sWNS, tot_iter);
 
         sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
                 costs.cost, costs.bb_cost, costs.timing_cost, state.t);
@@ -776,7 +701,7 @@ void try_place(const t_placer_opts& placer_opts,
             print_clb_placement("first_iteration_clb_placement.echo");
         }
 #endif
-    } while (update_annealing_state(&state, success_rat, costs, placer_opts, annealing_sched));
+    } while (state.outer_loop_update(stats.success_rate, costs, placer_opts, annealing_sched));
     /* Outer loop of the simmulated annealing ends */
 
 #ifdef ENABLE_ANALYTIC_PLACE
@@ -784,12 +709,16 @@ void try_place(const t_placer_opts& placer_opts,
 quench:
 #endif /* ENABLE_ANALYTIC_PLACE */
 
+    /* Start Quench */
+    state.t = 0;                         //Freeze out: only accept solutions that improve placement.
+    state.move_lim = state.move_lim_max; //Revert the move limit to initial value.
+
     auto pre_quench_timing_stats = timing_ctx.stats;
     { /* Quench */
         vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
         outer_loop_update_timing_info(placer_opts,
-                                      &costs, &prev_inverse_costs,
+                                      &costs,
                                       num_connections,
                                       state.crit_exponent,
                                       &outer_crit_iter_count,
@@ -799,13 +728,11 @@ void try_place(const t_placer_opts& placer_opts,
                                       pin_timing_invalidator.get(),
                                       timing_info.get());
 
-        state.t = 0; /* freeze out */
-
         /* Run inner loop again with temperature = 0 so as to accept only swaps
          * which reduce the cost of the placement */
-        placement_inner_loop(state.t, num_temps, state.rlim, placer_opts,
-                             move_lim, state.crit_exponent, quench_recompute_limit, &stats,
-                             &costs, &prev_inverse_costs,
+        placement_inner_loop(&state, placer_opts,
+                             quench_recompute_limit, &stats,
+                             &costs,
                              &moves_since_cost_recompute,
                              pin_timing_invalidator.get(),
                              place_delay_model.get(),
@@ -816,10 +743,8 @@ void try_place(const t_placer_opts& placer_opts,
                              timing_info.get(),
                              placer_opts.place_quench_algorithm);
 
-        tot_iter += move_lim;
-        ++num_temps;
-
-        calc_placer_stats(stats, success_rat, std_dev, costs, move_lim);
+        tot_iter += state.move_lim;
+        ++state.num_temps;
 
         if (placer_opts.place_quench_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
@@ -827,17 +752,12 @@ void try_place(const t_placer_opts& placer_opts,
             sWNS = timing_info->setup_worst_negative_slack();
         }
 
-        float quench_elapsed_sec = temperature_timer.elapsed_sec();
-        print_place_status(num_temps,
-                           quench_elapsed_sec,
-                           state.t, state.alpha, stats,
-                           critical_path.delay(), sTNS, sWNS,
-                           success_rat, std_dev, state.rlim, state.crit_exponent, tot_iter);
+        print_place_status(state, stats, temperature_timer.elapsed_sec(), critical_path.delay(), sTNS, sWNS, tot_iter);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
     if (placer_opts.placement_saves_per_temperature >= 1) {
-        std::string filename = vtr::string_fmt("placement_%03d_%03d.place", num_temps + 1, 0);
+        std::string filename = vtr::string_fmt("placement_%03d_%03d.place", state.num_temps + 1, 0);
         VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
         print_place(nullptr, nullptr, filename.c_str());
     }
@@ -905,7 +825,7 @@ void try_place(const t_placer_opts& placer_opts,
     float reject_rate = (float)num_swap_rejected / total_swap_attempts;
     float accept_rate = (float)num_swap_accepted / total_swap_attempts;
     float abort_rate = (float)num_swap_aborted / total_swap_attempts;
-    VTR_LOG("Placement number of temperatures: %d\n", num_temps);
+    VTR_LOG("Placement number of temperatures: %d\n", state.num_temps);
     VTR_LOG("Placement total # of swap attempts: %*d\n", num_swap_print_digits, total_swap_attempts);
     VTR_LOG("\tSwaps accepted: %*d (%4.1f %%)\n", num_swap_print_digits, num_swap_accepted, 100 * accept_rate);
     VTR_LOG("\tSwaps rejected: %*d (%4.1f %%)\n", num_swap_print_digits, num_swap_rejected, 100 * reject_rate);
@@ -928,7 +848,6 @@ void try_place(const t_placer_opts& placer_opts,
 /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
 static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
                                           t_placer_costs* costs,
-                                          t_placer_prev_inverse_costs* prev_inverse_costs,
                                           int num_connections,
                                           float crit_exponent,
                                           int* outer_crit_iter_count,
@@ -964,24 +883,16 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
     }
     (*outer_crit_iter_count)++;
 
-    /*at each temperature change we update these values to be used     */
-    /*for normalizing the tradeoff between timing and wirelength (bb)  */
-    prev_inverse_costs->bb_cost = 1 / costs->bb_cost;
-    /*Prevent inverse timing cost from going to infinity */
-    prev_inverse_costs->timing_cost = min(1 / costs->timing_cost, MAX_INV_TIMING_COST);
+    /* Update the cost normalization factors */
+    costs->update_norm_factors();
 }
 
 /* Function which contains the inner loop of the simulated annealing */
-static void placement_inner_loop(float t,
-                                 int temp_num,
-                                 float rlim,
+static void placement_inner_loop(const t_annealing_state* state,
                                  const t_placer_opts& placer_opts,
-                                 int move_lim,
-                                 float crit_exponent,
                                  int inner_recompute_limit,
                                  t_placer_statistics* stats,
                                  t_placer_costs* costs,
-                                 t_placer_prev_inverse_costs* prev_inverse_costs,
                                  int* moves_since_cost_recompute,
                                  ClusteredPinTimingInvalidator* pin_timing_invalidator,
                                  const PlaceDelayModel* delay_model,
@@ -995,21 +906,14 @@ static void placement_inner_loop(float t,
 
     int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature?
 
-    stats->av_cost = 0.;
-    stats->av_bb_cost = 0.;
-    stats->av_timing_cost = 0.;
-    stats->sum_of_squares = 0.;
-    stats->success_sum = 0;
+    stats->reset();
 
     inner_crit_iter_count = 1;
 
     /* Inner loop begins */
-    for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(t,
-                                             crit_exponent,
+    for (inner_iter = 0; inner_iter < state->move_lim; inner_iter++) {
+        e_move_result swap_result = try_swap(state,
                                              costs,
-                                             prev_inverse_costs,
-                                             rlim,
                                              move_generator,
                                              timing_info,
                                              pin_timing_invalidator,
@@ -1023,11 +927,7 @@ static void placement_inner_loop(float t,
 
         if (swap_result == ACCEPTED) {
             /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
-            stats->success_sum++;
-            stats->av_cost += costs->cost;
-            stats->av_bb_cost += costs->bb_cost;
-            stats->av_timing_cost += costs->timing_cost;
-            stats->sum_of_squares += (costs->cost) * (costs->cost);
+            stats->single_swap_update(*costs);
             num_swap_accepted++;
         } else if (swap_result == ABORTED) {
             num_swap_aborted++;
@@ -1040,14 +940,14 @@ static void placement_inner_loop(float t,
              * We do this only once in a while, since it is expensive.
              */
             if (inner_crit_iter_count >= inner_recompute_limit
-                && inner_iter != move_lim - 1) { /*on last iteration don't recompute */
+                && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */
 
                 inner_crit_iter_count = 0;
 #ifdef VERBOSE
                 VTR_LOG("Inner loop recompute criticalities\n");
 #endif
                 //Update all timing related classes
-                perform_full_timing_update(crit_exponent,
+                perform_full_timing_update(state->crit_exponent,
                                            delay_model,
                                            criticalities,
                                            setup_slacks,
@@ -1078,14 +978,16 @@ static void placement_inner_loop(float t,
 
         if (placer_opts.placement_saves_per_temperature >= 1
             && inner_iter > 0
-            && (inner_iter + 1) % (move_lim / placer_opts.placement_saves_per_temperature) == 0) {
-            std::string filename = vtr::string_fmt("placement_%03d_%03d.place", temp_num + 1, inner_placement_save_count);
-            VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", inner_iter, move_lim, filename.c_str());
+            && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) {
+            std::string filename = vtr::string_fmt("placement_%03d_%03d.place", state->num_temps + 1, inner_placement_save_count);
+            VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", inner_iter, state->move_lim, filename.c_str());
             print_place(nullptr, nullptr, filename.c_str());
             ++inner_placement_save_count;
         }
     }
-    /* Inner loop ends */
+
+    /* Calculate the success_rate and std_dev of the costs. */
+    stats->calc_iteration_stats(*costs, state->move_lim);
 }
 
 static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
@@ -1131,118 +1033,10 @@ static int count_connections() {
     return (count);
 }
 
-static double get_std_dev(int n, double sum_x_squared, double av_x) {
-    /* Returns the standard deviation of data set x.  There are n sample points, *
-     * sum_x_squared is the summation over n of x^2 and av_x is the average x.   *
-     * All operations are done in double precision, since round off error can be *
-     * a problem in the initial temp. std_dev calculation for big circuits.      */
-
-    double std_dev;
-
-    if (n <= 1)
-        std_dev = 0.;
-    else
-        std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1);
-
-    if (std_dev > 0.) /* Very small variances sometimes round negative */
-        std_dev = sqrt(std_dev);
-    else
-        std_dev = 0.;
-
-    return (std_dev);
-}
-
-static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid) {
-    /* Update the range limited to keep acceptance prob. near 0.44.  Use *
-     * a floating point rlim to allow gradual transitions at low temps.  */
-
-    float upper_lim;
-
-    *rlim = (*rlim) * (1. - 0.44 + success_rat);
-    upper_lim = max(grid.width() - 1, grid.height() - 1);
-    *rlim = min(*rlim, upper_lim);
-    *rlim = max(*rlim, (float)1.);
-}
-
-/* Update the annealing state according to the annealing schedule selected.
- *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
- *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
- *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
- *                See doc/src/vpr/dusty_sa.rst for more details.
- *
- * Returns true until the schedule is finished. */
-static bool update_annealing_state(t_annealing_state* state,
-                                   float success_rat,
-                                   const t_placer_costs& costs,
-                                   const t_placer_opts& placer_opts,
-                                   const t_annealing_sched& annealing_sched) {
-#ifndef NO_GRAPHICS
-    t_draw_state* draw_state = get_draw_state_vars();
-    if (draw_state->list_of_breakpoints.size() != 0)
-        //update temperature in the current information variable
-        get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
-#endif
-
-    /* Return `false` when the exit criterion is met. */
-    if (annealing_sched.type == USER_SCHED) {
-        state->t *= annealing_sched.alpha_t;
-        return state->t >= annealing_sched.exit_t;
-    }
-
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    /* Automatic annealing schedule */
-    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
-
-    if (annealing_sched.type == DUSTY_SCHED) {
-        bool restart_temp = state->t < t_exit || std::isnan(t_exit); //May get nan if there are no nets
-        if (success_rat < annealing_sched.success_min || restart_temp) {
-            if (state->alpha > annealing_sched.alpha_max) return false;
-            state->t = state->restart_t / sqrt(state->alpha); // Take a half step from the restart temperature.
-            state->alpha = 1.0 - ((1.0 - state->alpha) * annealing_sched.alpha_decay);
-        } else {
-            if (success_rat > annealing_sched.success_target) {
-                state->restart_t = state->t;
-            }
-            state->t *= state->alpha;
-        }
-        state->move_lim = std::max(1, std::min(state->move_lim_max, (int)(state->move_lim_max * (annealing_sched.success_target / success_rat))));
-    } else { /* annealing_sched.type == AUTO_SCHED */
-        if (success_rat > 0.96) {
-            state->alpha = 0.5;
-        } else if (success_rat > 0.8) {
-            state->alpha = 0.9;
-        } else if (success_rat > 0.15 || state->rlim > 1.) {
-            state->alpha = 0.95;
-        } else {
-            state->alpha = 0.8;
-        }
-        state->t *= state->alpha;
-
-        // Must be duplicated to retain previous behavior
-        if (state->t < t_exit || std::isnan(t_exit)) return false;
-    }
-
-    // Gradually changes from the initial crit_exponent to the final crit_exponent based on how much the range limit has shrunk.
-    // The idea is that as the range limit shrinks (indicating we are fine-tuning a more optimized placement) we can focus more on a smaller number of critical connections, which a higher crit_exponent achieves.
-    update_rlim(&state->rlim, success_rat, device_ctx.grid);
-
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim)
-                                   * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
-                               + placer_opts.td_place_exp_first;
-    }
-
-    return true;
-}
-
-static float starting_t(float crit_exponent,
+///@brief Find the starting temperature for the annealing loop.
+static float starting_t(const t_annealing_state* state,
                         t_placer_costs* costs,
-                        t_placer_prev_inverse_costs* prev_inverse_costs,
                         t_annealing_sched annealing_sched,
-                        int max_moves,
-                        float rlim,
                         const PlaceDelayModel* delay_model,
                         PlacerCriticalities* criticalities,
                         PlacerSetupSlacks* setup_slacks,
@@ -1251,32 +1045,26 @@ static float starting_t(float crit_exponent,
                         ClusteredPinTimingInvalidator* pin_timing_invalidator,
                         t_pl_blocks_to_be_moved& blocks_affected,
                         const t_placer_opts& placer_opts) {
-    /* Finds the starting temperature (hot condition).              */
-
-    int i, num_accepted, move_lim;
-    double std_dev, av, sum_of_squares; /* Double important to avoid round off */
-
-    if (annealing_sched.type == USER_SCHED)
+    /* Use user-specified value for the initial temperature. */
+    if (annealing_sched.type == USER_SCHED) {
         return (annealing_sched.init_t);
+    }
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    move_lim = min(max_moves, (int)cluster_ctx.clb_nlist.blocks().size());
+    /* Use to calculate the average of cost when swap is accepted. */
+    int num_accepted = 0;
 
-    num_accepted = 0;
-    av = 0.;
-    sum_of_squares = 0.;
+    /* Use double types to avoid round off. */
+    double av = 0., sum_of_squares = 0.;
 
-    /* Try one move per block. Set the temperature high so essentially all accepted. */
-    float t = HUGE_POSITIVE_FLOAT;
+    /* Determines the block swap loop count. */
+    int move_lim = std::min(state->move_lim_max, (int)cluster_ctx.clb_nlist.blocks().size());
 
-    for (i = 0; i < move_lim; i++) {
+    for (int i = 0; i < move_lim; i++) {
         //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
-        e_move_result swap_result = try_swap(t,
-                                             crit_exponent,
+        e_move_result swap_result = try_swap(state,
                                              costs,
-                                             prev_inverse_costs,
-                                             rlim,
                                              move_generator,
                                              timing_info,
                                              pin_timing_invalidator,
@@ -1300,18 +1088,19 @@ static float starting_t(float crit_exponent,
         }
     }
 
-    if (num_accepted != 0)
-        av /= num_accepted;
-    else
-        av = 0.;
+    /* Take the average of the accepted swaps' cost values. */
+    av = num_accepted > 0 ? (av / num_accepted) : 0.;
 
-    std_dev = get_std_dev(num_accepted, sum_of_squares, av);
+    /* Get the standard deviation. */
+    double std_dev = get_std_dev(num_accepted, sum_of_squares, av);
 
+    /* Print warning if not all swaps are accepted. */
     if (num_accepted != move_lim) {
         VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n", num_accepted, move_lim);
     }
 
 #ifdef VERBOSE
+    /* Print stats related to finding the initital temp. */
     VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
 #endif
 
@@ -1347,11 +1136,24 @@ static void reset_move_nets(int num_nets_affected) {
     }
 }
 
-static e_move_result try_swap(float t,
-                              float crit_exponent,
+/**
+ * @brief Pick some block and moves it to another spot.
+ *
+ * If the new location is empty, directly move the block. If the new location
+ * is occupied, switch the blocks. Due to the different sizes of the blocks,
+ * this block switching may occur for multiple times. It might also cause the
+ * current swap attempt to abort due to inability to find suitable locations
+ * for moved blocks.
+ *
+ * The move generator will record all the switched blocks in the variable
+ * `blocks_affected`. Afterwards, the move will be assessed by the chosen
+ * cost formulation. Currently, there are three ways to assess move cost,
+ * which are stored in the enum type `e_place_algorithm`.
+ *
+ * @return Whether the block swap is accepted, rejected or aborted.
+ */
+static e_move_result try_swap(const t_annealing_state* state,
                               t_placer_costs* costs,
-                              t_placer_prev_inverse_costs* prev_inverse_costs,
-                              float rlim,
                               MoveGenerator& move_generator,
                               SetupTimingInfo* timing_info,
                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
@@ -1362,30 +1164,27 @@ static e_move_result try_swap(float t,
                               float rlim_escape_fraction,
                               const t_place_algorithm& place_algorithm,
                               float timing_tradeoff) {
-    /* Picks some block and moves it to another spot.  If this spot is   *
-     * occupied, switch the blocks.  Assess the change in cost function. *
-     * rlim is the range limiter.                                        *
-     * Returns whether the swap is accepted, rejected or aborted.        *
-     * Passes back the new value of the cost functions.                  */
-
     num_ts_called++;
 
     MoveOutcomeStats move_outcome_stats;
 
-    /* I'm using negative values of proposed_net_cost as a flag, so DO NOT   *
-     * use cost functions that can go negative.                          */
+    /* I'm using negative values of proposed_net_cost as a flag, *
+     * so DO NOT use cost functions that can go negative.        */
 
-    double delta_c = 0; /* Change in cost due to this swap. */
-    double bb_delta_c = 0;
-    double timing_delta_c = 0;
+    double delta_c = 0;        //Change in cost due to this swap.
+    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
+    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
 
-    //Allow some fraction of moves to not be restricted by rlim,
-    //in the hopes of better escaping local minima
+    /* Allow some fraction of moves to not be restricted by rlim, */
+    /* in the hopes of better escaping local minima.              */
+    float rlim;
     if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) {
         rlim = std::numeric_limits<float>::infinity();
+    } else {
+        rlim = state->rlim;
     }
 
-    //Generate a new move (perturbation) used to explore the space of possible placements
+    /* Generate a new move (perturbation) used to explore the space of possible placements. */
     e_create_move create_move_outcome = move_generator.propose_move(blocks_affected, rlim);
 
     LOG_MOVE_STATS_PROPOSED(t, blocks_affected);
@@ -1393,9 +1192,6 @@ static e_move_result try_swap(float t,
     e_move_result move_outcome = ABORTED;
 
     if (create_move_outcome == e_create_move::ABORT) {
-        //Proposed move is not legal -- give up on this move
-        clear_move_blocks(blocks_affected);
-
         LOG_MOVE_STATS_OUTCOME(std::numeric_limits<float>::quiet_NaN(),
                                std::numeric_limits<float>::quiet_NaN(),
                                std::numeric_limits<float>::quiet_NaN(),
@@ -1407,25 +1203,24 @@ static e_move_result try_swap(float t,
 
         /*
          * To make evaluating the move simpler (e.g. calculating changed bounding box),
-         * we first move the blocks to thier new locations (apply the move to
-         * place_ctx.block_locs) and then computed the change in cost. If the move is
-         * accepted, the inverse look-up in place_ctx.grid_blocks is updated (committing
-         * the move). If the move is rejected the blocks are returned to their original
-         * positions (reverting place_ctx.block_locs to its original state).
+         * we first move the blocks to their new locations (apply the move to
+         * place_ctx.block_locs) and then compute the change in cost. If the move
+         * is accepted, the inverse look-up in place_ctx.grid_blocks is updated
+         * (committing the move). If the move is rejected, the blocks are returned to
+         * their original positions (reverting place_ctx.block_locs to its original state).
          *
-         * Note that the inverse look-up place_ctx.grid_blocks is only updated
-         * after move acceptance is determined, and so should not be used when
-         * evaluating a move.
+         * Note that the inverse look-up place_ctx.grid_blocks is only updated after
+         * move acceptance is determined, so it should not be used when evaluating a move.
          */
 
-        //Update the block positions
+        /* Update the block positions */
         apply_move_blocks(blocks_affected);
 
-        //Find all the nets affected by this swap and update their costs
-        //This routine calculates new connection delays and timing costs
-        //and store them in proposed_* data structures
-        //This routine also calculates the wiring cost, which doesn't
-        //depend on the timing driven data
+        //Find all the nets affected by this swap and update the wiring costs.
+        //This cost value doesn't depend on the timing info.
+        //
+        //Also find all the pins affected by the swap, and calculates new connection
+        //delays and timing costs and store them in proposed_* data structures.
         int num_nets_affected = find_affected_nets_and_update_costs(place_algorithm,
                                                                     delay_model,
                                                                     criticalities,
@@ -1433,32 +1228,31 @@ static e_move_result try_swap(float t,
                                                                     bb_delta_c,
                                                                     timing_delta_c);
 
-        //For setup slack analysis, we first do a timing analysis to get the newest slack values
-        //resulted from the proposed block moves. If the move turns out to be accepted, we keep
-        //the updated slack values and commit the block moves. If rejected, we reject the proposed
-        //block moves and revert this timing analysis.
+        //For setup slack analysis, we first do a timing analysis to get the newest
+        //slack values resulted from the proposed block moves. If the move turns out
+        //to be accepted, we keep the updated slack values and commit the block moves.
+        //If rejected, we reject the proposed block moves and revert this timing analysis.
         if (place_algorithm == SLACK_TIMING_PLACE) {
-            //Gather all the connections with modified delays for incremental timing updates.
-            //This routine relies on comparing proposed_connection_delay and connection_delay.
+            /* Invalidates timing of modified connections for incremental timing updates. */
             invalidate_affected_connections(blocks_affected,
                                             pin_timing_invalidator,
                                             timing_info);
 
-            //Update the connection_timing_cost and connection_delay
-            //values from the temporary values.
+            /* Update the connection_timing_cost and connection_delay *
+             * values from the temporary values.                      */
             commit_td_cost(blocks_affected);
 
-            //Update timing information. Since we are analyzing setup slacks,
-            //we only update those values and keep the criticalities stale
-            //so as not to interfere with the original timing driven algorithm.
-            //
-            //Note: the timing info must be updated after applying block moves
-            //and committing the timing driven delays and costs.
-            //If we wish to revert this timing update due to move rejection,
-            //we need to revert block moves and restore the timing values.
+            /* Update timing information. Since we are analyzing setup slacks,   *
+             * we only update those values and keep the criticalities stale      *
+             * so as not to interfere with the original timing driven algorithm. *
+             *
+             * Note: the timing info must be updated after applying block moves  *
+             * and committing the timing driven delays and costs.                *
+             * If we wish to revert this timing update due to move rejection,    *
+             * we need to revert block moves and restore the timing values.      */
             criticalities->disable_update();
             setup_slacks->enable_update();
-            update_timing_classes(crit_exponent,
+            update_timing_classes(state->crit_exponent,
                                   timing_info,
                                   criticalities,
                                   setup_slacks,
@@ -1467,21 +1261,18 @@ static e_move_result try_swap(float t,
             /* Get the setup slack analysis cost */
             //TODO: calculate a weighted average of the slack cost and wiring cost
             delta_c = analyze_setup_slack_cost(setup_slacks);
-
         } else if (place_algorithm == CRITICALITY_TIMING_PLACE) {
-            /*in this case we redefine delta_c as a combination of timing and bb.  *
-             *additionally, we normalize all values, therefore delta_c is in       *
-             *relation to 1*/
-            delta_c = (1 - timing_tradeoff) * bb_delta_c * prev_inverse_costs->bb_cost
-                      + timing_tradeoff * timing_delta_c * prev_inverse_costs->timing_cost;
-
+            /* Take delta_c as a combination of timing and wiring cost. In
+             * addition to `timing_tradeoff`, we normalize the cost values */
+            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm
+                      + timing_tradeoff * timing_delta_c * costs->timing_cost_norm;
         } else {
-            VTR_ASSERT(place_algorithm == BOUNDING_BOX_PLACE);
+            VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
             delta_c = bb_delta_c;
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
-        move_outcome = assess_swap(delta_c, t);
+        move_outcome = assess_swap(delta_c, state->t);
 
         if (move_outcome == ACCEPTED) {
             costs->cost += delta_c;
@@ -1499,26 +1290,26 @@ static e_move_result try_swap(float t,
             if (place_algorithm == CRITICALITY_TIMING_PLACE) {
                 costs->timing_cost += timing_delta_c;
 
-                //Invalidates timing of modified connections for incremental timing updates
-                //This routine relies on comparing proposed_connection_delay and connection_delay
-                //If the setup slack analysis was not performed, the
-                //sink pins are yet to be invalidated.
+                /* Invalidates timing of modified connections for incremental *
+                 * timing updates. These invalidations are accumulated for a  *
+                 * big timing update in the outer loop.                       */
                 invalidate_affected_connections(blocks_affected,
                                                 pin_timing_invalidator,
                                                 timing_info);
 
-                //update the connection_timing_cost and connection_delay
-                //values from the temporary values
+                /* Update the connection_timing_cost and connection_delay *
+                 * values from the temporary values.                      */
                 commit_td_cost(blocks_affected);
             }
 
-            /* update net cost functions and reset flags. */
+            /* Update net cost functions and reset flags. */
             update_move_nets(num_nets_affected);
 
             /* Update clb data structures since we kept the move. */
             commit_move_blocks(blocks_affected);
 
-        } else { //move_outcome == REJECTED
+        } else {
+            VTR_ASSERT_SAFE(move_outcome == REJECTED);
 
             /* Reset the net cost function flags first. */
             reset_move_nets(num_nets_affected);
@@ -1527,21 +1318,21 @@ static e_move_result try_swap(float t,
             revert_move_blocks(blocks_affected);
 
             if (place_algorithm == SLACK_TIMING_PLACE) {
-                //Revert the timing delays and costs to pre-update values
-                //These routines must be called after reverting the block moves
+                /* Revert the timing delays and costs to pre-update values.       */
+                /* These routines must be called after reverting the block moves. */
                 //TODO: make this process incremental
                 comp_td_connection_delays(delay_model);
                 comp_td_costs(delay_model, *criticalities, &costs->timing_cost);
 
-                //Re-invalidate the affected sink pins since the proposed move is
-                //rejected, and the same blocks are reverted to their original
-                //positions. The affected sink pins should stay the same.
+                /* Re-invalidate the affected sink pins since the proposed *
+                 * move is rejected, and the same blocks are reverted to   *
+                 * their original positions.                               */
                 invalidate_affected_connections(blocks_affected,
                                                 pin_timing_invalidator,
                                                 timing_info);
 
                 /* Revert the timing update */
-                update_timing_classes(crit_exponent,
+                update_timing_classes(state->crit_exponent,
                                       timing_info,
                                       criticalities,
                                       setup_slacks,
@@ -1559,8 +1350,8 @@ static e_move_result try_swap(float t,
         }
 
         move_outcome_stats.delta_cost_norm = delta_c;
-        move_outcome_stats.delta_bb_cost_norm = bb_delta_c * prev_inverse_costs->bb_cost;
-        move_outcome_stats.delta_timing_cost_norm = timing_delta_c * prev_inverse_costs->timing_cost;
+        move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs->bb_cost_norm;
+        move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs->timing_cost_norm;
 
         move_outcome_stats.delta_bb_cost_abs = bb_delta_c;
         move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
@@ -1578,6 +1369,8 @@ static e_move_result try_swap(float t,
     stop_placement_and_check_breakopints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c);
 #    endif
 #endif
+
+    /* Clear the data structure containing block move info */
     clear_move_blocks(blocks_affected);
 
     //VTR_ASSERT(check_macro_placement_consistency() == 0);
@@ -1589,10 +1382,28 @@ static e_move_result try_swap(float t,
     return move_outcome;
 }
 
-//Puts all the nets changed by the current swap into nets_to_update,
-//and updates their bounding box.
-//
-//Returns the number of affected nets.
+/**
+ * @brief Find all the nets and pins affected by this swap and update costs.
+ *
+ * Find all the nets affected by this swap and update the bounding box (wiring)
+ * costs. This cost function doesn't depend on the timing info.
+ *
+ * Find all the connections affected by this swap and update the timing cost.
+ * For a connection to be affected, it not only needs to be on or driven by
+ * a block, but it also needs to have its delay changed. Otherwise, it will
+ * not be added to the affected_pins structure.
+ *
+ * For more, see update_td_delta_costs().
+ *
+ * The timing costs are calculated by getting the new connection delays,
+ * multiplied by the connection criticalities returned by the timing
+ * analyzer. These timing costs are stored in the proposed_* data structures.
+ *
+ * The change in the bounding box cost is stored in `bb_delta_c`.
+ * The change in the timing cost is stored in `timing_delta_c`.
+ *
+ * @return The number of affected nets.
+ */
 static int find_affected_nets_and_update_costs(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
                                                const PlacerCriticalities* criticalities,
@@ -1605,37 +1416,35 @@ static int find_affected_nets_and_update_costs(const t_place_algorithm& place_al
 
     int num_affected_nets = 0;
 
-    //Go through all the blocks moved
+    /* Go through all the blocks moved. */
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
-        //Go through all the pins in the moved block
+        /* Go through all the pins in the moved block. */
         for (ClusterPinId blk_pin : cluster_ctx.clb_nlist.block_pins(blk)) {
             ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(blk_pin);
             VTR_ASSERT_SAFE_MSG(net_id, "Only valid nets should be found in compressed netlist block pins");
 
             if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                continue; //TODO: do we require anyting special here for global nets. "Global nets are assumed to span the whole chip, and do not effect costs"
+                //TODO: Do we require anyting special here for global nets?
+                //"Global nets are assumed to span the whole chip, and do not effect costs."
+                continue;
 
-            //Record effected nets
+            /* Record effected nets */
             record_affected_net(net_id, num_affected_nets);
 
-            //Update the net bounding boxes
-            //
-            //Do not update the net cost here since it should only be updated
-            //once per net, not once per pin.
+            /* Update the net bounding boxes. */
             update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
 
             if (place_algorithm.is_timing_driven()) {
-                /* Determine the change in connection delay and timing cost */
+                /* Determine the change in connection delay and timing cost. */
                 update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c);
             }
         }
     }
 
-    /* Now update the bounding box costs (since the net bounding boxes are up-to-date).
-     * The cost is only updated once per net.
-     */
+    /* Now update the bounding box costs (since the net bounding     *
+     * boxes are up-to-date). The cost is only updated once per net. */
     for (int inet_affected = 0; inet_affected < num_affected_nets; inet_affected++) {
         ClusterNetId net_id = ts_nets_to_update[inet_affected];
 
@@ -1646,18 +1455,25 @@ static int find_affected_nets_and_update_costs(const t_place_algorithm& place_al
     return num_affected_nets;
 }
 
+///@brief Record effected nets.
 static void record_affected_net(const ClusterNetId net, int& num_affected_nets) {
-    //Record effected nets
+    /* Record effected nets. */
     if (proposed_net_cost[net] < 0.) {
-        //Net not marked yet.
+        /* Net not marked yet. */
         ts_nets_to_update[num_affected_nets] = net;
         num_affected_nets++;
 
-        //Flag to say we've marked this net.
+        /* Flag to say we've marked this net. */
         proposed_net_cost[net] = 1.;
     }
 }
 
+/**
+ * @brief Update the net bounding boxes.
+ *
+ * Do not update the net cost here since it should only
+ * be updated once per net, not once per pin.
+ */
 static void update_net_bb(const ClusterNetId net,
                           const t_pl_blocks_to_be_moved& blocks_affected,
                           int iblk,
@@ -2800,21 +2616,6 @@ static void free_try_swap_arrays() {
     g_vpr_ctx.mutable_placement().compressed_block_grids.clear();
 }
 
-static void calc_placer_stats(t_placer_statistics& stats, float& success_rat, double& std_dev, const t_placer_costs& costs, const int move_lim) {
-    success_rat = ((float)stats.success_sum) / move_lim;
-    if (stats.success_sum == 0) {
-        stats.av_cost = costs.cost;
-        stats.av_bb_cost = costs.bb_cost;
-        stats.av_timing_cost = costs.timing_cost;
-    } else {
-        stats.av_cost /= stats.success_sum;
-        stats.av_bb_cost /= stats.success_sum;
-        stats.av_timing_cost /= stats.success_sum;
-    }
-
-    std_dev = get_std_dev(stats.success_sum, stats.sum_of_squares, stats.av_cost);
-}
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
@@ -2848,18 +2649,12 @@ static void print_place_status_header() {
     VTR_LOG("---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
 }
 
-static void print_place_status(const size_t num_temps,
-                               const float elapsed_sec,
-                               const float t,
-                               const float alpha,
+static void print_place_status(const t_annealing_state& state,
                                const t_placer_statistics& stats,
-                               const float cpd,
-                               const float sTNS,
-                               const float sWNS,
-                               const float acc_rate,
-                               const float std_dev,
-                               const float rlim,
-                               const float crit_exponent,
+                               float elapsed_sec,
+                               float cpd,
+                               float sTNS,
+                               float sWNS,
                                size_t tot_moves) {
     VTR_LOG(
         "%4zu "
@@ -2868,16 +2663,16 @@ static void print_place_status(const size_t num_temps,
         "%7.3f %10.2f %-10.5g "
         "%7.3f % 10.3g % 8.3f "
         "%7.3f %7.4f %6.1f %8.2f",
-        num_temps,
+        state.num_temps,
         elapsed_sec,
-        t,
+        state.t,
         stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
         1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
-        acc_rate, std_dev, rlim, crit_exponent);
+        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
 
     pretty_print_uint(" ", tot_moves, 9, 3);
 
-    VTR_LOG(" %6.3f\n", alpha);
+    VTR_LOG(" %6.3f\n", state.alpha);
     fflush(stdout);
 }
 
@@ -2916,26 +2711,6 @@ static void print_resources_utilization() {
     VTR_LOG("\n");
 }
 
-static void init_annealing_state(t_annealing_state* state,
-                                 const t_annealing_sched& annealing_sched,
-                                 float t,
-                                 float rlim,
-                                 int move_lim_max,
-                                 float crit_exponent) {
-    state->alpha = annealing_sched.alpha_min;
-    state->t = t;
-    state->restart_t = t;
-    state->rlim = rlim;
-    state->inverse_delta_rlim = 1 / (rlim - FINAL_RLIM);
-    state->move_lim_max = std::max(1, move_lim_max);
-    if (annealing_sched.type == DUSTY_SCHED) {
-        state->move_lim = std::max(1, (int)(state->move_lim_max * annealing_sched.success_target));
-    } else {
-        state->move_lim = state->move_lim_max;
-    }
-    state->crit_exponent = crit_exponent;
-}
-
 bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) {
     return (vpr_setup.PlacerOpts.place_algorithm.is_timing_driven());
 }
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 4090156474d..80695ff5686 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -1,36 +1,330 @@
 /**
  * @file place_util.cpp
- * @brief Definitions of structure routines declared in place_util.h.
+ * @brief Definitions of structure methods and routines declared in place_util.h.
+ *        These are mainly utility functions used by the placer.
  */
 
 #include "place_util.h"
 #include "globals.h"
+#include "draw_global.h"
 
+/* File-scope routines */
 static vtr::Matrix<t_grid_blocks> init_grid_blocks();
 
+/**
+ * @brief Initialize the placer's block-grid dual direction mapping.
+ *
+ * Forward direction - block to grid: place_ctx.block_locs.
+ * Reverse direction - grid to block: place_ctx.grid_blocks.
+ *
+ * Initialize both of them to empty states.
+ */
 void init_placement_context() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
+    /* Intialize the lookup of CLB block positions */
     place_ctx.block_locs.clear();
     place_ctx.block_locs.resize(cluster_ctx.clb_nlist.blocks().size());
 
+    /* Initialize the reverse lookup of CLB block positions */
     place_ctx.grid_blocks = init_grid_blocks();
 }
 
+/**
+ * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
+ *
+ * The container at each grid block location should have a length equal to the
+ * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
+ */
 static vtr::Matrix<t_grid_blocks> init_grid_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
 
+    /* Structure should have the same dimensions as the grid. */
     auto grid_blocks = vtr::Matrix<t_grid_blocks>({device_ctx.grid.width(), device_ctx.grid.height()});
+
     for (size_t x = 0; x < device_ctx.grid.width(); ++x) {
         for (size_t y = 0; y < device_ctx.grid.height(); ++y) {
             auto type = device_ctx.grid[x][y].type;
+            grid_blocks[x][y].blocks.resize(type->capacity, EMPTY_BLOCK_ID);
+        }
+    }
+    return grid_blocks;
+}
 
-            int capacity = type->capacity;
+/**
+ * @brief Mutator: updates the norm factors in the outer loop iteration.
+ *
+ * At each temperature change we update these values to be used
+ * for normalizing the trade-off between timing and wirelength (bb)
+ */
+void t_placer_costs::update_norm_factors() {
+    if (place_algorithm.is_timing_driven()) {
+        bb_cost_norm = 1 / bb_cost;
+        //Prevent the norm factor from going to infinity
+        timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
+        cost = 1; //The value of cost will be reset to 1 if timing driven
+    } else {
+        VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
+        cost = bb_cost; //The cost value should be identical to the wirelength cost
+    }
+}
+
+///@brief Constructor: Initialize all annealing state variables and macros.
+t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
+                                     float first_t,
+                                     float first_rlim,
+                                     int first_move_lim,
+                                     float first_crit_exponent) {
+    num_temps = 0;
+    alpha = annealing_sched.alpha_min;
+    t = first_t;
+    restart_t = first_t;
+    rlim = first_rlim;
+    move_lim_max = first_move_lim;
+    crit_exponent = first_crit_exponent;
+
+    /* Determine the current move_lim based on the schedule type */
+    if (annealing_sched.type == DUSTY_SCHED) {
+        move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target));
+    } else {
+        move_lim = move_lim_max;
+    }
+
+    /* Store this inverse value for speed when updating crit_exponent. */
+    INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM);
+
+    /* The range limit cannot exceed the largest grid size. */
+    auto& grid = g_vpr_ctx.device().grid;
+    UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
+}
+
+/**
+ * @brief Get the initial limit for inner loop block move attempt limit.
+ *
+ * There are two ways to scale the move limit.
+ * e_place_effort_scaling::CIRCUIT
+ *      scales the move limit proportional to num_blocks ^ (4/3)
+ * e_place_effort_scaling::DEVICE_CIRCUIT
+ *      scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
+ *
+ * The second method is almost identical to the first one when the device
+ * is highly utilized (device_size ~ num_blocks). For low utilization devices
+ * (device_size >> num_blocks), the search space is larger, so the second method
+ * performs more moves to ensure better optimization.
+ */
+int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    float device_size = device_ctx.grid.width() * device_ctx.grid.height();
+    size_t num_blocks = cluster_ctx.clb_nlist.blocks().size();
+
+    int move_lim;
+    if (placer_opts.effort_scaling == e_place_effort_scaling::CIRCUIT) {
+        move_lim = int(annealing_sched.inner_num * pow(num_blocks, 1.3333));
+    } else {
+        VTR_ASSERT(placer_opts.effort_scaling == e_place_effort_scaling::DEVICE_CIRCUIT);
+        move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.));
+    }
+
+    /* Avoid having a non-positive move_lim */
+    move_lim = std::max(move_lim, 1);
+
+    VTR_LOG("Moves per temperature: %d\n", move_lim);
+
+    return move_lim;
+}
+
+/**
+ * @brief Update the annealing state according to the annealing schedule selected.
+ *
+ *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
+ *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
+ *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
+ *                See doc/src/vpr/dusty_sa.rst for more details.
+ *
+ * @return True->continues the annealing. False->exits the annealing.
+ */
+bool t_annealing_state::outer_loop_update(float success_rate,
+                                          const t_placer_costs& costs,
+                                          const t_placer_opts& placer_opts,
+                                          const t_annealing_sched& annealing_sched) {
+#ifndef NO_GRAPHICS
+    t_draw_state* draw_state = get_draw_state_vars();
+    if (draw_state->list_of_breakpoints.size() != 0) {
+        /* Update temperature in the current information variable. */
+        get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
+    }
+#endif
+
+    if (annealing_sched.type == USER_SCHED) {
+        /* Update t with user specified alpha. */
+        t *= annealing_sched.alpha_t;
+
+        /* Check if the exit criterion is met. */
+        bool exit_anneal = t >= annealing_sched.exit_t;
+
+        return exit_anneal;
+    }
 
-            grid_blocks[x][y].blocks.resize(capacity, EMPTY_BLOCK_ID);
+    /* Automatically determine exit temperature. */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+
+    if (annealing_sched.type == DUSTY_SCHED) {
+        /* May get nan if there are no nets */
+        bool restart_temp = t < t_exit || std::isnan(t_exit);
+
+        /* If the success rate or the temperature is *
+         * too low, reset the temperature and alpha. */
+        if (success_rate < annealing_sched.success_min || restart_temp) {
+            /* Only exit anneal when alpha gets too large. */
+            if (alpha > annealing_sched.alpha_max) {
+                return false;
+            }
+            /* Take a half step from the restart temperature. */
+            t = restart_t / sqrt(alpha);
+            /* Update alpha. */
+            alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay);
+        } else {
+            /* If the success rate is promising, next time   *
+             * reset t to the current annealing temperature. */
+            if (success_rate > annealing_sched.success_target) {
+                restart_t = t;
+            }
+            /* Update t. */
+            t *= alpha;
+        }
+
+        /* Update move lim. */
+        update_move_lim(annealing_sched.success_target, success_rate);
+    } else {
+        VTR_ASSERT_SAFE(annealing_sched.type == AUTO_SCHED);
+        /* Automatically adjust alpha according to success rate. */
+        if (success_rate > 0.96) {
+            alpha = 0.5;
+        } else if (success_rate > 0.8) {
+            alpha = 0.9;
+        } else if (success_rate > 0.15 || rlim > 1.) {
+            alpha = 0.95;
+        } else {
+            alpha = 0.8;
+        }
+        /* Update temp. */
+        t *= alpha;
+        /* Must be duplicated to retain previous behavior. */
+        if (t < t_exit || std::isnan(t_exit)) {
+            return false;
         }
     }
 
-    return grid_blocks;
+    /* Update the range limiter. */
+    update_rlim(success_rate);
+
+    /* If using timing driven algorithm, update the crit_exponent. */
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        update_crit_exponent(placer_opts);
+    }
+
+    /* Continues the annealing. */
+    return true;
+}
+
+/**
+ * @brief Update the range limiter to keep acceptance prob. near 0.44.
+ *
+ * Use a floating point rlim to allow gradual transitions at low temps.
+ * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
+ */
+void t_annealing_state::update_rlim(float success_rate) {
+    rlim *= (1. - 0.44 + success_rate);
+    rlim = std::min(rlim, UPPER_RLIM);
+    rlim = std::max(rlim, FINAL_RLIM);
+}
+
+/**
+ * @brief Update the criticality exponent.
+ *
+ * When rlim shrinks towards the FINAL_RLIM value (indicating
+ * that we are fine-tuning a more optimized placement), we can
+ * focus more on a smaller number of critical connections.
+ * To achieve this, we make the crit_exponent sharper, so that
+ * critical connections would become more critical than before.
+ *
+ * We calculate how close rlim is to its final value comparing
+ * to its initial value. Then, we apply the same scaling factor
+ * on the crit_exponent so that it lands on the suitable value
+ * between td_place_exp_first and td_place_exp_last. The scaling
+ * factor is calculated and applied linearly.
+ */
+void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
+    /* If rlim == FINAL_RLIM, then scale == 0. */
+    float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
+
+    /* Apply the scaling factor on crit_exponent. */
+    crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
+                    + placer_opts.td_place_exp_first;
+}
+
+/**
+ * @brief Update the move limit based on the success rate.
+ *
+ * The value is bounded between 1 and move_lim_max.
+ */
+void t_annealing_state::update_move_lim(float success_target, float success_rate) {
+    move_lim = move_lim_max * (success_target / success_rate);
+    move_lim = std::min(move_lim, move_lim_max);
+    move_lim = std::max(move_lim, 1);
+}
+
+void t_placer_statistics::reset() {
+    av_cost = 0.;
+    av_bb_cost = 0.;
+    av_timing_cost = 0.;
+    sum_of_squares = 0.;
+    success_sum = 0;
+    success_rate = 0.;
+    std_dev = 0.;
+}
+
+void t_placer_statistics::single_swap_update(const t_placer_costs& costs) {
+    success_sum++;
+    av_cost += costs.cost;
+    av_bb_cost += costs.bb_cost;
+    av_timing_cost += costs.timing_cost;
+    sum_of_squares += (costs.cost) * (costs.cost);
+}
+
+void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int move_lim) {
+    if (success_sum == 0) {
+        av_cost = costs.cost;
+        av_bb_cost = costs.bb_cost;
+        av_timing_cost = costs.timing_cost;
+    } else {
+        av_cost /= success_sum;
+        av_bb_cost /= success_sum;
+        av_timing_cost /= success_sum;
+    }
+    success_rate = success_sum / float(move_lim);
+    std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
+}
+
+/**
+ * @brief Returns the standard deviation of data set x.
+ *
+ * There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
+ * is the average x. All operations are done in double precision, since round off
+ * error can be a problem in the initial temp. std_dev calculation for big circuits.
+ */
+double get_std_dev(int n, double sum_x_squared, double av_x) {
+    double std_dev;
+    if (n <= 1) {
+        std_dev = 0.;
+    } else {
+        std_dev = (sum_x_squared - n * av_x * av_x) / (double)(n - 1);
+    }
+
+    /* Very small variances sometimes round negative. */
+    return (std_dev > 0.) ? sqrt(std_dev) : 0.;
 }
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 0534ba662a4..818df5d6b4e 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -1,37 +1,206 @@
 /**
  * @file place_util.h
  * @brief Utility structures representing various states of the
- *        placement. Also contains declarations of related routines.
+ *        placement and utility functions used by the placer.
  */
 
 #pragma once
+#include "vpr_types.h"
 
-struct t_placer_costs {
-    //Although we do nost cost calculations with float's we
-    //use doubles for the accumulated costs to avoid round-off,
-    //particularly on large designs where the magnitude of a single
-    //move's delta cost is small compared to the overall cost.
+/**
+ * @brief Data structure that stores different cost values in the placer.
+ *
+ * Although we do cost calculations with float values, we use doubles
+ * for the accumulated costs to avoid round-off, particularly on large
+ * designs where the magnitude of a single move's delta cost is small
+ * compared to the overall cost.
+ *
+ * To balance the trade-off between timing and wirelength (bb) cost, the
+ * change in costs produced by block swaps are divided by the final cost
+ * values of the previous iteration. However, the divisions are expensive,
+ * so we store their multiplicative inverses when they are updated in
+ * the outer loop routines to speed up the normalization process.
+ *
+ *   @param cost The weighted average of the wiring cost and the timing cost.
+ *   @param bb_cost The bounding box cost, aka the wiring cost.
+ *   @param timing_cost The timing cost, which is connection delay * criticality.
+ *
+ *   @param bb_cost_norm The normalization factor for the wiring cost.
+ *   @param timing_cost_norm The normalization factor for the timing cost, which
+ *              is upper-bounded by the value of MAX_INV_TIMING_COST.
+ *
+ *   @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
+ *              with very lax timing constraints, which avoids multiplying by a
+ *              gigantic timing_cost_norm when auto-normalizing. The exact value
+ *              of this cost has relatively little impact, but should not be large
+ *              enough to be on the order of timing costs for normal constraints.
+ *
+ *   @param place_algorithm Determines how the member values are updated upon
+ *              each temperature change during the placer annealing process.
+ */
+class t_placer_costs {
+  public: //members
     double cost;
     double bb_cost;
     double timing_cost;
+    double bb_cost_norm;
+    double timing_cost_norm;
+
+  public: //Constructor
+    t_placer_costs(t_place_algorithm algo)
+        : place_algorithm(algo) {}
+
+  public: //Mutator
+    void update_norm_factors();
+
+  private:
+    double MAX_INV_TIMING_COST = 1.e9;
+    t_place_algorithm place_algorithm;
 };
 
-struct t_placer_prev_inverse_costs {
-    double bb_cost;
-    double timing_cost;
+/**
+ * @brief Stores variables that are used by the annealing process.
+ *
+ * This structure is updated by update_annealing_state() on each outer
+ * loop iteration. It stores various important variables that need to
+ * be accessed during the placement inner loop.
+ *
+ * Private variables are not given accessor functions. They serve as
+ * macros originally defined in place.cpp as global scope variables.
+ *
+ * Public members:
+ *   @param t
+ *              Temperature for simulated annealing.
+ *   @param restart_t
+ *              Temperature used after restart due to minimum success ratio.
+ *              Currently only used and updated by DUSTY_SCHED.
+ *   @param alpha
+ *              Temperature decays factor (multiplied each outer loop iteration).
+ *   @param num_temps
+ *              The count of how many temperature iterations have passed.
+ *
+ *   @param rlim
+ *              Range limit for block swaps.
+ *              Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *   @param crit_exponent
+ *              Used by timing-driven placement to "sharpen" the timing criticality.
+ *              Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *   @param move_lim
+ *              Current block move limit.
+ *              Currently only updated by DUSTY_SCHED.
+ *   @param move_lim_max
+ *              Maximum block move limit.
+ *
+ * Private members:
+ *   @param UPPER_RLIM
+ *              The upper limit for the range limiter value.
+ *   @param FINAL_RLIM
+ *              The final rlim (range limit) is 1, which is the smallest value that
+ *              can still make progress, since an rlim of 0 wouldn't allow any swaps.
+ *   @param INVERSE_DELTA_RLIM
+ *              Used to update crit_exponent. See update_rlim() for more.
+ *
+ * Mutators:
+ *   @param outer_loop_update()
+ *              Update the annealing state variables in the placement outer loop.
+ *   @param update_rlim(), update_crit_exponent(), update_move_lim()
+ *              Inline subroutines used by the main routine outer_loop_update().
+ */
+class t_annealing_state {
+  public:
+    float t;
+    float restart_t;
+    float alpha;
+    int num_temps;
+
+    float rlim;
+    float crit_exponent;
+    int move_lim;
+    int move_lim_max;
+
+  private:
+    float UPPER_RLIM;
+    float FINAL_RLIM = 1.;
+    float INVERSE_DELTA_RLIM;
+
+  public: //Constructor
+    t_annealing_state(const t_annealing_sched& annealing_sched,
+                      float first_t,
+                      float first_rlim,
+                      int first_move_lim,
+                      float first_crit_exponent);
+
+  public: //Mutator
+    bool outer_loop_update(float success_rate,
+                           const t_placer_costs& costs,
+                           const t_placer_opts& placer_opts,
+                           const t_annealing_sched& annealing_sched);
+
+  private: //Mutator
+    inline void update_rlim(float success_rate);
+    inline void update_crit_exponent(const t_placer_opts& placer_opts);
+    inline void update_move_lim(float success_target, float success_rate);
 };
 
-// Used by update_annealing_state()
-struct t_annealing_state {
-    float t;                  // Temperature
-    float rlim;               // Range limit for swaps
-    float inverse_delta_rlim; // used to calculate crit_exponent
-    float alpha;              // Temperature decays by this factor each outer iteration
-    float restart_t;          // Temperature used after restart due to minimum success ratio
-    float crit_exponent;      // Used by timing-driven placement to "sharpen" timing criticality
-    int move_lim_max;         // Maximum move limit
-    int move_lim;             // Current move limit
+/**
+ * @brief Stores statistics produced by a single annealing iteration.
+ *
+ * This structure is refreshed at the beginning of every annealing loop
+ * by calling reset(). Whenever a block swap move is accepted, this
+ * structure calls single_swap_update() to update its variables. At the
+ * end of the current iteration, it calls calc_iteration_stats() to
+ * summarize the results (success_rate & std_dev of the total costs).
+ *
+ * In terms of calculating statistics for total cost, we mean that we
+ * operate upon the set of placer cost values gathered after every
+ * accepted block move.
+ *
+ *   @param av_cost
+ *              Average total cost. Cost formulation depends on
+ *              the place algorithm currently being used.
+ *   @param av_bb_cost
+ *              Average bounding box (wiring) cost.
+ *   @param av_timing_cost
+ *              Average timing cost (delay * criticality).
+ *   @param sum_of_squares
+ *              Sum of squares of the total cost.
+ *   @param success_num
+ *              Number of accepted block swaps for the current iteration.
+ *   @param success_rate
+ *              num_accepted / total_trials for the current iteration.
+ *   @param std_dev
+ *              Standard deviation of the total cost.
+ *
+ */
+class t_placer_statistics {
+  public:
+    double av_cost;
+    double av_bb_cost;
+    double av_timing_cost;
+    double sum_of_squares;
+    int success_sum;
+    float success_rate;
+    double std_dev;
+
+  public: //Constructor
+    t_placer_statistics() { reset(); }
+
+  public: //Mutator
+    ///@brief Clear all data fields.
+    void reset();
+
+    ///@brief Update stats when a single swap move has been accepted.
+    void calc_iteration_stats(const t_placer_costs& costs, int move_lim);
+
+    ///@brief Calculate placer success rate and cost std_dev for this iteration.
+    void single_swap_update(const t_placer_costs& costs);
 };
 
-//Initialize the placement context
+///@brief Initialize the placer's block-grid dual direction mapping.
 void init_placement_context();
+
+///@brief Get the initial limit for inner loop block move attempt limit.
+int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched);
+
+///@brief Returns the standard deviation of data set x.
+double get_std_dev(int n, double sum_x_squared, double av_x);