diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
index aa6e97d3e16..f3f9f5caa0f 100644
--- a/doc/src/vpr/command_line_usage.rst
+++ b/doc/src/vpr/command_line_usage.rst
@@ -701,16 +701,24 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe
     
     **Default:** ````.
     
-.. option:: --place_algorithm {bounding_box | path_timing_driven}
+.. option:: --place_algorithm {bounding_box | criticality_timing | slack_timing}
 
     Controls the algorithm used by the placer.
 
-    ``bounding_box`` focuses purely on minimizing the bounding box wirelength of the circuit.
+    ``bounding_box`` Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.
 
-    ``path_timing_driven`` focuses on minimizing both wirelength and the critical path delay.
+    ``criticality_timing`` Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).
 
+    ``slack_timing`` Focuses on improving the circuit slack values to reduce critical path delay.
 
-    **Default:**  ``path_timing_driven``
+    **Default:**  ``criticality_timing``
+
+.. option:: --place_quench_algorithm {bounding_box | criticality_timing | slack_timing}
+
+    Controls the algorithm used by the placer during placement quench.
+    The algorithm options have identical functionality as the ones used by the option ``--place_algorithm``. If specified, it overrides the option ``--place_algorithm`` during placement quench.
+
+    **Default:**  ``criticality_timing``
 
 .. option:: --place_chan_width <int>
 
diff --git a/vpr/src/base/CheckSetup.cpp b/vpr/src/base/CheckSetup.cpp
index c3ee3ca59b2..cd914374764 100644
--- a/vpr/src/base/CheckSetup.cpp
+++ b/vpr/src/base/CheckSetup.cpp
@@ -23,7 +23,7 @@ void CheckSetup(const t_packer_opts& PackerOpts,
     }
 
     if ((GLOBAL == RouterOpts.route_type)
-        && (BOUNDING_BOX_PLACE != PlacerOpts.place_algorithm)) {
+        && (PlacerOpts.place_algorithm.is_timing_driven())) {
         /* Works, but very weird.  Can't optimize timing well, since you're
          * not doing proper architecture delay modelling. */
         VTR_LOG_WARN(
@@ -32,7 +32,7 @@ void CheckSetup(const t_packer_opts& PackerOpts,
     }
 
     if ((false == Timing.timing_analysis_enabled)
-        && (PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)) {
+        && (PlacerOpts.place_algorithm.is_timing_driven())) {
         /* May work, not tested */
         VPR_FATAL_ERROR(VPR_ERROR_OTHER,
                         "Timing analysis must be enabled for timing-driven placement.\n");
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index b318c13e4cb..5eeb45d61a7 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -534,6 +534,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->td_place_exp_last = Options.place_exp_last;
 
     PlacerOpts->place_algorithm = Options.PlaceAlgorithm;
+    PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm;
 
     PlacerOpts->constraints_file = Options.constraints_file;
 
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index a2a17b56f66..e7127fc3ff9 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -499,12 +499,15 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
     if ((PLACE_ONCE == PlacerOpts.place_freq)
         || (PLACE_ALWAYS == PlacerOpts.place_freq)) {
         VTR_LOG("PlacerOpts.place_algorithm: ");
-        switch (PlacerOpts.place_algorithm) {
+        switch (PlacerOpts.place_algorithm.get()) {
             case BOUNDING_BOX_PLACE:
                 VTR_LOG("BOUNDING_BOX_PLACE\n");
                 break;
-            case PATH_TIMING_DRIVEN_PLACE:
-                VTR_LOG("PATH_TIMING_DRIVEN_PLACE\n");
+            case CRITICALITY_TIMING_PLACE:
+                VTR_LOG("CRITICALITY_TIMING_PLACE\n");
+                break;
+            case SLACK_TIMING_PLACE:
+                VTR_LOG("SLACK_TIMING_PLACE\n");
                 break;
             default:
                 VTR_LOG_ERROR("Unknown placement algorithm\n");
@@ -533,7 +536,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
 
         VTR_LOG("PlacerOpts.place_chan_width: %d\n", PlacerOpts.place_chan_width);
 
-        if (PATH_TIMING_DRIVEN_PLACE == PlacerOpts.place_algorithm) {
+        if (PlacerOpts.place_algorithm.is_timing_driven()) {
             VTR_LOG("PlacerOpts.inner_loop_recompute_divider: %d\n", PlacerOpts.inner_loop_recompute_divider);
             VTR_LOG("PlacerOpts.recompute_crit_iter: %d\n", PlacerOpts.recompute_crit_iter);
             VTR_LOG("PlacerOpts.timing_tradeoff: %f\n", PlacerOpts.timing_tradeoff);
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 2a8ea5a230e..bb9907448ab 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -350,13 +350,22 @@ struct ParsePlaceDeltaDelayAlgorithm {
 struct ParsePlaceAlgorithm {
     ConvertedValue<e_place_algorithm> from_str(std::string str) {
         ConvertedValue<e_place_algorithm> conv_value;
-        if (str == "bounding_box")
+        if (str == "bounding_box") {
             conv_value.set_value(BOUNDING_BOX_PLACE);
-        else if (str == "path_timing_driven")
-            conv_value.set_value(PATH_TIMING_DRIVEN_PLACE);
-        else {
+        } else if (str == "criticality_timing") {
+            conv_value.set_value(CRITICALITY_TIMING_PLACE);
+        } else if (str == "slack_timing") {
+            conv_value.set_value(SLACK_TIMING_PLACE);
+        } else {
             std::stringstream msg;
-            msg << "Invalid conversion from '" << str << "' to e_router_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
+            msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
+
+            //Deprecated option: "path_timing_driven" -> PATH_DRIVEN_TIMING_PLACE
+            //New option: "criticality_timing" -> CRITICALITY_TIMING_PLACE
+            if (str == "path_timing_driven") {
+                msg << "\nDeprecated option: 'path_timing_driven'. It has been renamed to 'criticality_timing'";
+            }
+
             conv_value.set_error(msg.str());
         }
         return conv_value;
@@ -364,17 +373,19 @@ struct ParsePlaceAlgorithm {
 
     ConvertedValue<std::string> to_str(e_place_algorithm val) {
         ConvertedValue<std::string> conv_value;
-        if (val == BOUNDING_BOX_PLACE)
+        if (val == BOUNDING_BOX_PLACE) {
             conv_value.set_value("bounding_box");
-        else {
-            VTR_ASSERT(val == PATH_TIMING_DRIVEN_PLACE);
-            conv_value.set_value("path_timing_driven");
+        } else if (val == CRITICALITY_TIMING_PLACE) {
+            conv_value.set_value("criticality_timing");
+        } else {
+            VTR_ASSERT(val == SLACK_TIMING_PLACE);
+            conv_value.set_value("slack_timing");
         }
         return conv_value;
     }
 
     std::vector<std::string> default_choices() {
-        return {"bounding_box", "path_timing_driven"};
+        return {"bounding_box", "criticality_timing", "slack_timing"};
     }
 };
 
@@ -1679,9 +1690,25 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceAlgorithm, "--place_algorithm")
-        .help("Controls which placement algorithm is used")
-        .default_value("path_timing_driven")
-        .choices({"bounding_box", "path_timing_driven"})
+        .help(
+            "Controls which placement algorithm is used. Valid options:\n"
+            " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
+            " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n"
+            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n")
+        .default_value("criticality_timing")
+        .choices({"bounding_box", "criticality_timing", "slack_timing"})
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
+    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceQuenchAlgorithm, "--place_quench_algorithm")
+        .help(
+            "Controls which placement algorithm is used during placement quench.\n"
+            "If specified, it overrides the option --place_algorithm during placement quench.\n"
+            "Valid options:\n"
+            " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
+            " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n"
+            " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n")
+        .default_value("criticality_timing")
+        .choices({"bounding_box", "criticality_timing", "slack_timing"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width")
@@ -2314,12 +2341,17 @@ void set_conditional_defaults(t_options& args) {
     //Which placement algorithm to use?
     if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) {
         if (args.timing_analysis) {
-            args.PlaceAlgorithm.set(PATH_TIMING_DRIVEN_PLACE, Provenance::INFERRED);
+            args.PlaceAlgorithm.set(CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
         } else {
             args.PlaceAlgorithm.set(BOUNDING_BOX_PLACE, Provenance::INFERRED);
         }
     }
 
+    //Which placement algorithm to use during placement quench?
+    if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) {
+        args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED);
+    }
+
     //Place chan width follows Route chan width if unspecified
     if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) {
         args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED);
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 3ac214d20a2..e377c70bd7f 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -104,6 +104,7 @@ struct t_options {
     argparse::ArgValue<float> PlaceSuccessTarget;
     argparse::ArgValue<sched_type> anneal_sched_type;
     argparse::ArgValue<e_place_algorithm> PlaceAlgorithm;
+    argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm;
     argparse::ArgValue<e_pad_loc_type> pad_loc_type;
     argparse::ArgValue<int> PlaceChanWidth;
     argparse::ArgValue<float> place_rlim_escape_fraction;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 91174023c63..105a2eb84d3 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -829,29 +829,82 @@ struct t_annealing_sched {
     float success_target;
 };
 
-/* Various options for the placer.                                           *
- * place_algorithm:  BOUNDING_BOX_PLACE or PATH_TIMING_DRIVEN_PLACE          *
- * timing_tradeoff:  When TIMING_DRIVEN_PLACE mode, what is the tradeoff     *
- *                   timing driven and BOUNDING_BOX_PLACE.                   *
- * place_cost_exp:  Power to which denominator is raised for linear_cong.    *
- * place_chan_width:  The channel width assumed if only one placement is     *
- *                    performed.                                             *
- * pad_loc_type:  Are pins free to move during placement or fixed randomly.  *
- * constraints_file:  File used to lock block locations during placement.    *
- * place_freq:  Should the placement be skipped, done once, or done for each *
- *              channel width in the binary search.                          *
- * recompute_crit_iter: how many temperature stages pass before we recompute *
- *               criticalities based on average point to point delay         *
- * inner_loop_crit_divider: (move_lim/inner_loop_crit_divider) determines how*
- *               many inner_loop iterations pass before a recompute of       *
- *               criticalities is done.                                      *
- * td_place_exp_first: exponent that is used on the timing_driven criticlity *
- *               it is the value that the exponent starts at.                *
- * td_place_exp_last: value that the criticality exponent will be at the end *
- * doPlacement: true if placement is supposed to be done in the CAD flow, false otherwise */
+/******************************************************************
+ * Placer data types
+ *******************************************************************/
+
+/**
+ * @brief Types of placement algorithms used in the placer.
+ *
+ *   @param BOUNDING_BOX_PLACE
+ *              Focuses purely on minimizing the bounding
+ *              box wirelength of the circuit.
+ *   @param CRITICALITY_TIMING_PLACE
+ *              Focuses on minimizing both the wirelength and the
+ *              connection timing costs (criticality * delay).
+ *   @param SLACK_TIMING_PLACE
+ *              Focuses on improving the circuit slack values
+ *              to reduce critical path delay.
+ *
+ * The default is to use CRITICALITY_TIMING_PLACE. BOUNDING_BOX_PLACE
+ * is used when there is no timing information available (wiring only).
+ * SLACK_TIMING_PLACE is mainly feasible during placement quench.
+ */
 enum e_place_algorithm {
     BOUNDING_BOX_PLACE,
-    PATH_TIMING_DRIVEN_PLACE
+    CRITICALITY_TIMING_PLACE,
+    SLACK_TIMING_PLACE
+};
+
+/**
+ * @brief Provides a wrapper around enum e_place_algorithm.
+ *
+ * Supports the method is_timing_driven(), which allows flexible updates
+ * to the placer algorithms if more timing driven placement strategies
+ * are added in tht future. This method is used across various placement
+ * setup files, and it can be useful for major placer routines as well.
+ *
+ * More methods can be added to this class if the placement strategies
+ * will be further divided into more categories the future.
+ *
+ * Also supports assignments and comparisons between t_place_algorithm
+ * and e_place_algorithm so as not to break down previous codes.
+ */
+class t_place_algorithm {
+  public:
+    //Constructors
+    t_place_algorithm() = default;
+    t_place_algorithm(e_place_algorithm _algo)
+        : algo(_algo) {}
+    ~t_place_algorithm() = default;
+
+    //Assignment operators
+    t_place_algorithm& operator=(const t_place_algorithm& rhs) {
+        algo = rhs.algo;
+        return *this;
+    }
+    t_place_algorithm& operator=(e_place_algorithm rhs) {
+        algo = rhs;
+        return *this;
+    }
+
+    //Equality operators
+    bool operator==(const t_place_algorithm& rhs) const { return algo == rhs.algo; }
+    bool operator==(e_place_algorithm rhs) const { return algo == rhs; }
+    bool operator!=(const t_place_algorithm& rhs) const { return algo != rhs.algo; }
+    bool operator!=(e_place_algorithm rhs) const { return algo != rhs; }
+
+    ///@brief Check if the algorithm belongs to the timing driven category.
+    inline bool is_timing_driven() const {
+        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE;
+    }
+
+    ///@brief Accessor: returns the underlying e_place_algorithm enum value.
+    e_place_algorithm get() const { return algo; }
+
+  private:
+    ///@brief The underlying algorithm. Default set to CRITICALITY_TIMING_PLACE.
+    e_place_algorithm algo = e_place_algorithm::CRITICALITY_TIMING_PLACE;
 };
 
 enum e_pad_loc_type {
@@ -859,6 +912,7 @@ enum e_pad_loc_type {
     RANDOM
 };
 
+///@brief Used to calculate the inner placer loop's block swapping limit move_lim.
 enum e_place_effort_scaling {
     CIRCUIT,       ///<Effort scales based on circuit size only
     DEVICE_CIRCUIT ///<Effort scales based on both circuit and device size
@@ -889,8 +943,54 @@ enum class e_place_delta_delay_algorithm {
     DIJKSTRA_EXPANSION,
 };
 
+/**
+ * @brief Various options for the placer.
+ *
+ *   @param place_algorithm
+ *              Controls which placement algorithm is used.
+ *   @param place_quench_algorithm
+ *              Controls which placement algorithm is used
+ *              during placement quench.
+ *   @param timing_tradeoff
+ *              When in CRITICALITY_TIMING_PLACE mode, what is the
+ *              tradeoff between timing and wiring costs.
+ *   @param place_cost_exp
+ *              Power to which denominator is raised for linear_cong.
+ *   @param place_chan_width
+ *              The channel width assumed if only one placement is performed.
+ *   @param pad_loc_type
+ *              Are pins FREE or fixed randomly.
+ *   @param constraints_file
+ *              File that specifies locations of locked down (constrained)
+ *              blocks for placement. Empty string means no constraints file.
+ *   @param pad_loc_file
+ *              File to read pad locations from if pad_loc_type is USER.
+ *   @param place_freq
+ *              Should the placement be skipped, done once, or done
+ *              for each channel width in the binary search.
+ *   @param recompute_crit_iter
+ *              How many temperature stages pass before we recompute
+ *              criticalities based on the current placement and its
+ *              estimated point-to-point delays.
+ *   @param inner_loop_crit_divider
+ *              (move_lim/inner_loop_crit_divider) determines how
+ *              many inner_loop iterations pass before a recompute
+ *              of criticalities is done.
+ *   @param td_place_exp_first
+ *              Exponent that is used in the CRITICALITY_TIMING_PLACE
+ *              mode to specify the initial value of `crit_exponent`.
+ *              After we map the slacks to criticalities, this value
+ *              is used to `sharpen` the criticalities, making connections
+ *              with worse slacks more critical.
+ *   @param td_place_exp_last
+ *              Value that the crit_exponent will be at the end.
+ *   @param doPlacement
+ *              True if placement is supposed to be done in the CAD flow.
+ *              False if otherwise.
+ */
 struct t_placer_opts {
-    enum e_place_algorithm place_algorithm;
+    t_place_algorithm place_algorithm;
+    t_place_algorithm place_quench_algorithm;
     float timing_tradeoff;
     float place_cost_exp;
     int place_chan_width;
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index bc8ce7e3adc..664bd725463 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -162,29 +162,31 @@ static vtr::vector<ClusterNetId, char> bb_updated_before;
  * Net connection delays based on the placement.
  * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
  */
-static ClbNetPinsMatrix<float> connection_delay;          //Delays based on commited block positions
+static ClbNetPinsMatrix<float> connection_delay;          //Delays based on committed block positions
 static ClbNetPinsMatrix<float> proposed_connection_delay; //Delays for proposed block positions (only
-                                                          // for connections effected by move, otherwise
-                                                          // INVALID_DELAY)
+// for connections effected by move, otherwise
+// INVALID_DELAY)
+
+static ClbNetPinsMatrix<float> connection_setup_slack; //Setup slacks based on most recently updated timing graph
 
 /*
  * Timing cost of connections (i.e. criticality * delay).
  * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
  */
-static PlacerTimingCosts connection_timing_cost;                 //Costs of commited block positions
+static PlacerTimingCosts connection_timing_cost;                 //Costs of committed block positions
 static ClbNetPinsMatrix<double> proposed_connection_timing_cost; //Costs for proposed block positions
-                                                                 // (only for connectsion effected by
-                                                                 // move, otherwise INVALID_DELAY)
+// (only for connection effected by
+// move, otherwise INVALID_DELAY)
 
 /*
  * Timing cost of nets (i.e. sum of criticality * delay for each net sink/connection).
  * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1]
  */
 static vtr::vector<ClusterNetId, double> net_timing_cost; //Like connection_timing_cost, but summed
-                                                          // accross net pins. Used to allow more
-                                                          // efficient recalculation of timing cost
-                                                          // if only a sub-set of nets are changed
-                                                          // while maintaining numeric stability.
+// accross net pins. Used to allow more
+// efficient recalculation of timing cost
+// if only a sub-set of nets are changed
+// while maintaining numeric stability.
 
 /* [0..cluster_ctx.clb_nlist.nets().size()-1].  Store the bounding box coordinates and the number of    *
  * blocks on each of a net's bounding box (to allow efficient updates),      *
@@ -332,40 +334,44 @@ static void update_move_nets(int num_nets_affected);
 static void reset_move_nets(int num_nets_affected);
 
 static e_move_result try_swap(float t,
+                              float crit_exponent,
                               t_placer_costs* costs,
                               t_placer_prev_inverse_costs* prev_inverse_costs,
                               float rlim,
                               MoveGenerator& move_generator,
-                              TimingInfo* timing_info,
+                              SetupTimingInfo* timing_info,
                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
                               t_pl_blocks_to_be_moved& blocks_affected,
                               const PlaceDelayModel* delay_model,
-                              const PlacerCriticalities* criticalities,
+                              PlacerCriticalities* criticalities,
+                              PlacerSetupSlacks* setup_slacks,
                               float rlim_escape_fraction,
-                              enum e_place_algorithm place_algorithm,
+                              const t_place_algorithm& place_algorithm,
                               float timing_tradeoff);
 
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
-                        enum e_place_algorithm place_algorithm);
+                        const t_place_algorithm& place_algorithm);
 
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 enum e_place_algorithm place_algorithm);
+                                 const t_place_algorithm& place_algorithm);
 static int check_placement_consistency();
 static int check_block_placement_consistency();
 static int check_macro_placement_consistency();
 
-static float starting_t(t_placer_costs* costs,
+static float starting_t(float crit_exponent,
+                        t_placer_costs* costs,
                         t_placer_prev_inverse_costs* prev_inverse_costs,
                         t_annealing_sched annealing_sched,
                         int max_moves,
                         float rlim,
                         const PlaceDelayModel* delay_model,
-                        const PlacerCriticalities* criticalities,
-                        TimingInfo* timing_info,
+                        PlacerCriticalities* criticalities,
+                        PlacerSetupSlacks* setup_slacks,
+                        SetupTimingInfo* timing_info,
                         MoveGenerator& move_generator,
                         ClusteredPinTimingInvalidator* pin_timing_invalidator,
                         t_pl_blocks_to_be_moved& blocks_affected,
@@ -389,13 +395,17 @@ static float comp_td_connection_delay(const PlaceDelayModel* delay_model, Cluste
 
 static void comp_td_connection_delays(const PlaceDelayModel* delay_model);
 
+static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks);
+
+static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks);
+
 static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
 
 static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
 
-static void invalidate_affected_connection_delays(const t_pl_blocks_to_be_moved& blocks_affected,
-                                                  ClusteredPinTimingInvalidator* pin_tedges_invalidator,
-                                                  TimingInfo* timing_info);
+static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
+                                            ClusteredPinTimingInvalidator* pin_tedges_invalidator,
+                                            TimingInfo* timing_info);
 
 static bool driven_by_moved_block(const ClusterNetId net, const t_pl_blocks_to_be_moved& blocks_affected);
 
@@ -407,13 +417,15 @@ static double comp_td_connection_cost(const PlaceDelayModel* delay_mode, const P
 static double sum_td_net_cost(ClusterNetId net);
 static double sum_td_costs();
 
+static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks);
+
 static e_move_result assess_swap(double delta_c, double t);
 
 static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
 
 static void update_bb(ClusterNetId net_id, t_bb* bb_coord_new, t_bb* bb_edge_new, int xold, int yold, int xnew, int ynew);
 
-static int find_affected_nets_and_update_costs(e_place_algorithm place_algorithm,
+static int find_affected_nets_and_update_costs(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
                                                const PlacerCriticalities* criticalities,
                                                t_pl_blocks_to_be_moved& blocks_affected,
@@ -442,23 +454,43 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr);
 
 static void free_try_swap_arrays();
 
-static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts,
-                                               t_placer_costs* costs,
-                                               t_placer_prev_inverse_costs* prev_inverse_costs,
-                                               int num_connections,
-                                               float crit_exponent,
-                                               int* outer_crit_iter_count,
-                                               const PlaceDelayModel* delay_model,
-                                               PlacerCriticalities* criticalities,
-                                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
-                                               SetupTimingInfo* timing_info);
-
-static void recompute_criticalities(float crit_exponent,
-                                    const PlaceDelayModel* delay_model,
-                                    PlacerCriticalities* criticalities,
-                                    ClusteredPinTimingInvalidator* pin_timing_invalidator,
-                                    SetupTimingInfo* timing_info,
-                                    t_placer_costs* costs);
+static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
+                                          t_placer_costs* costs,
+                                          t_placer_prev_inverse_costs* prev_inverse_costs,
+                                          int num_connections,
+                                          float crit_exponent,
+                                          int* outer_crit_iter_count,
+                                          const PlaceDelayModel* delay_model,
+                                          PlacerCriticalities* criticalities,
+                                          PlacerSetupSlacks* setup_slacks,
+                                          ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                          SetupTimingInfo* timing_info);
+
+static void initialize_timing_info(float crit_exponent,
+                                   const PlaceDelayModel* delay_model,
+                                   PlacerCriticalities* criticalities,
+                                   PlacerSetupSlacks* setup_slacks,
+                                   ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                   SetupTimingInfo* timing_info,
+                                   t_placer_costs* costs);
+
+static void update_timing_classes(float crit_exponent,
+                                  SetupTimingInfo* timing_info,
+                                  PlacerCriticalities* criticalities,
+                                  PlacerSetupSlacks* setup_slacks,
+                                  ClusteredPinTimingInvalidator* pin_timing_invalidator);
+
+static void update_timing_cost(const PlaceDelayModel* delay_model,
+                               const PlacerCriticalities* criticalities,
+                               double* timing_cost);
+
+static void perform_full_timing_update(float crit_exponent,
+                                       const PlaceDelayModel* delay_model,
+                                       PlacerCriticalities* criticalities,
+                                       PlacerSetupSlacks* setup_slacks,
+                                       ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                       SetupTimingInfo* timing_info,
+                                       t_placer_costs* costs);
 
 static void placement_inner_loop(float t,
                                  int temp_num,
@@ -474,9 +506,11 @@ static void placement_inner_loop(float t,
                                  ClusteredPinTimingInvalidator* pin_timing_invalidator,
                                  const PlaceDelayModel* delay_model,
                                  PlacerCriticalities* criticalities,
+                                 PlacerSetupSlacks* setup_slacks,
                                  MoveGenerator& move_generator,
                                  t_pl_blocks_to_be_moved& blocks_affected,
-                                 SetupTimingInfo* timing_info);
+                                 SetupTimingInfo* timing_info,
+                                 const t_place_algorithm& place_algorithm);
 
 static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const PlaceDelayModel* delay_model,
@@ -550,6 +584,7 @@ void try_place(const t_placer_opts& placer_opts,
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
     std::unique_ptr<PlaceDelayModel> place_delay_model;
     std::unique_ptr<MoveGenerator> move_generator;
+    std::unique_ptr<PlacerSetupSlacks> placer_setup_slacks;
     std::unique_ptr<PlacerCriticalities> placer_criticalities;
     std::unique_ptr<ClusteredPinTimingInvalidator> pin_timing_invalidator;
 
@@ -564,7 +599,7 @@ void try_place(const t_placer_opts& placer_opts,
     num_swap_aborted = 0;
     num_ts_called = 0;
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
         place_delay_model = alloc_lookups_and_criticalities(chan_width_dist, placer_opts, router_opts, det_routing_arch, segment_inf, directs, num_directs);
 
@@ -595,7 +630,7 @@ void try_place(const t_placer_opts& placer_opts,
 
     /* Gets initial cost and loads bounding boxes. */
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         costs.bb_cost = comp_bb_cost(NORMAL);
 
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
@@ -617,6 +652,8 @@ void try_place(const t_placer_opts& placer_opts,
 
         timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type);
 
+        placer_setup_slacks = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup);
+
         placer_criticalities = std::make_unique<PlacerCriticalities>(cluster_ctx.clb_nlist, netlist_pin_lookup);
 
         pin_timing_invalidator = std::make_unique<ClusteredPinTimingInvalidator>(cluster_ctx.clb_nlist,
@@ -624,15 +661,14 @@ void try_place(const t_placer_opts& placer_opts,
                                                                                  atom_ctx.nlist,
                                                                                  atom_ctx.lookup,
                                                                                  *timing_info->timing_graph());
-        //Update timing and costs
-        recompute_criticalities(first_crit_exponent,
-                                place_delay_model.get(),
-                                placer_criticalities.get(),
-                                pin_timing_invalidator.get(),
-                                timing_info.get(),
-                                &costs);
-
-        timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement
+        //First time compute timing and costs, compute from scratch
+        initialize_timing_info(first_crit_exponent,
+                               place_delay_model.get(),
+                               placer_criticalities.get(),
+                               placer_setup_slacks.get(),
+                               pin_timing_invalidator.get(),
+                               timing_info.get(),
+                               &costs);
 
         critical_path = timing_info->least_slack_critical_path();
 
@@ -651,9 +687,9 @@ void try_place(const t_placer_opts& placer_opts,
         prev_inverse_costs.timing_cost = 1 / costs.timing_cost;
         prev_inverse_costs.bb_cost = 1 / costs.bb_cost;
         costs.cost = 1; /*our new cost function uses normalized values of           */
-                        /*bb_cost and timing_cost, the value of cost will be reset  */
-                        /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
-    } else {            /*BOUNDING_BOX_PLACE */
+        /*bb_cost and timing_cost, the value of cost will be reset  */
+        /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */
+    } else { /*BOUNDING_BOX_PLACE */
         costs.cost = costs.bb_cost = comp_bb_cost(NORMAL);
         costs.timing_cost = 0;
         outer_crit_iter_count = 0;
@@ -670,7 +706,7 @@ void try_place(const t_placer_opts& placer_opts,
     //Initial pacement statistics
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n",
             costs.cost, costs.bb_cost, costs.timing_cost);
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n",
                 1e9 * critical_path.delay());
         VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n",
@@ -745,10 +781,12 @@ void try_place(const t_placer_opts& placer_opts,
 
     first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
 
-    float first_t = starting_t(&costs, &prev_inverse_costs,
+    float first_t = starting_t(first_crit_exponent,
+                               &costs, &prev_inverse_costs,
                                annealing_sched, move_lim, first_rlim,
                                place_delay_model.get(),
                                placer_criticalities.get(),
+                               placer_setup_slacks.get(),
                                timing_info.get(),
                                *move_generator,
                                pin_timing_invalidator.get(),
@@ -774,30 +812,33 @@ void try_place(const t_placer_opts& placer_opts,
     /* Outer loop of the simulated annealing begins */
     do {
         vtr::Timer temperature_timer;
-        if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+        if (placer_opts.place_algorithm.is_timing_driven()) {
             costs.cost = 1;
         }
 
-        outer_loop_recompute_criticalities(placer_opts, &costs, &prev_inverse_costs,
-                                           num_connections,
-                                           state.crit_exponent,
-                                           &outer_crit_iter_count,
-                                           place_delay_model.get(),
-                                           placer_criticalities.get(),
-                                           pin_timing_invalidator.get(),
-                                           timing_info.get());
+        outer_loop_update_timing_info(placer_opts,
+                                      &costs, &prev_inverse_costs,
+                                      num_connections,
+                                      state.crit_exponent,
+                                      &outer_crit_iter_count,
+                                      place_delay_model.get(),
+                                      placer_criticalities.get(),
+                                      placer_setup_slacks.get(),
+                                      pin_timing_invalidator.get(),
+                                      timing_info.get());
 
         placement_inner_loop(state.t, num_temps, state.rlim, placer_opts,
                              state.move_lim, state.crit_exponent, inner_recompute_limit, &stats,
-                             &costs,
-                             &prev_inverse_costs,
+                             &costs, &prev_inverse_costs,
                              &moves_since_cost_recompute,
                              pin_timing_invalidator.get(),
                              place_delay_model.get(),
                              placer_criticalities.get(),
+                             placer_setup_slacks.get(),
                              *move_generator,
                              blocks_affected,
-                             timing_info.get());
+                             timing_info.get(),
+                             placer_opts.place_algorithm);
 
         tot_iter += state.move_lim;
 
@@ -805,7 +846,7 @@ void try_place(const t_placer_opts& placer_opts,
 
         ++num_temps;
 
-        if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+        if (placer_opts.place_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
             sTNS = timing_info->setup_total_negative_slack();
             sWNS = timing_info->setup_worst_negative_slack();
@@ -834,15 +875,16 @@ void try_place(const t_placer_opts& placer_opts,
     { /* Quench */
         vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
-        outer_loop_recompute_criticalities(placer_opts, &costs,
-                                           &prev_inverse_costs,
-                                           num_connections,
-                                           state.crit_exponent,
-                                           &outer_crit_iter_count,
-                                           place_delay_model.get(),
-                                           placer_criticalities.get(),
-                                           pin_timing_invalidator.get(),
-                                           timing_info.get());
+        outer_loop_update_timing_info(placer_opts,
+                                      &costs, &prev_inverse_costs,
+                                      num_connections,
+                                      state.crit_exponent,
+                                      &outer_crit_iter_count,
+                                      place_delay_model.get(),
+                                      placer_criticalities.get(),
+                                      placer_setup_slacks.get(),
+                                      pin_timing_invalidator.get(),
+                                      timing_info.get());
 
         state.t = 0; /* freeze out */
 
@@ -850,22 +892,23 @@ void try_place(const t_placer_opts& placer_opts,
          * which reduce the cost of the placement */
         placement_inner_loop(state.t, num_temps, state.rlim, placer_opts,
                              move_lim, state.crit_exponent, quench_recompute_limit, &stats,
-                             &costs,
-                             &prev_inverse_costs,
+                             &costs, &prev_inverse_costs,
                              &moves_since_cost_recompute,
                              pin_timing_invalidator.get(),
                              place_delay_model.get(),
                              placer_criticalities.get(),
+                             placer_setup_slacks.get(),
                              *move_generator,
                              blocks_affected,
-                             timing_info.get());
+                             timing_info.get(),
+                             placer_opts.place_quench_algorithm);
 
         tot_iter += move_lim;
         ++num_temps;
 
         calc_placer_stats(stats, success_rat, std_dev, costs, move_lim);
 
-        if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+        if (placer_opts.place_quench_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
             sTNS = timing_info->setup_total_negative_slack();
             sWNS = timing_info->setup_worst_negative_slack();
@@ -902,17 +945,16 @@ void try_place(const t_placer_opts& placer_opts,
     VTR_LOG("Swaps called: %d\n", num_ts_called);
     report_aborted_moves();
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         //Final timing estimate
         VTR_ASSERT(timing_info);
-
-        //Update timing and costs
-        recompute_criticalities(state.crit_exponent,
-                                place_delay_model.get(),
-                                placer_criticalities.get(),
-                                pin_timing_invalidator.get(),
-                                timing_info.get(),
-                                &costs);
+        perform_full_timing_update(state.crit_exponent,
+                                   place_delay_model.get(),
+                                   placer_criticalities.get(),
+                                   placer_setup_slacks.get(),
+                                   pin_timing_invalidator.get(),
+                                   timing_info.get(),
+                                   &costs);
 
         critical_path = timing_info->least_slack_critical_path();
 
@@ -965,19 +1007,21 @@ void try_place(const t_placer_opts& placer_opts,
     VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", f_update_td_costs_connections_elapsed_sec, f_update_td_costs_nets_elapsed_sec, f_update_td_costs_sum_nets_elapsed_sec, f_update_td_costs_total_elapsed_sec);
 }
 
-/* Function to recompute the criticalities before the inner loop of the annealing */
-static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts,
-                                               t_placer_costs* costs,
-                                               t_placer_prev_inverse_costs* prev_inverse_costs,
-                                               int num_connections,
-                                               float crit_exponent,
-                                               int* outer_crit_iter_count,
-                                               const PlaceDelayModel* delay_model,
-                                               PlacerCriticalities* criticalities,
-                                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
-                                               SetupTimingInfo* timing_info) {
-    if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE)
+/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
+static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
+                                          t_placer_costs* costs,
+                                          t_placer_prev_inverse_costs* prev_inverse_costs,
+                                          int num_connections,
+                                          float crit_exponent,
+                                          int* outer_crit_iter_count,
+                                          const PlaceDelayModel* delay_model,
+                                          PlacerCriticalities* criticalities,
+                                          PlacerSetupSlacks* setup_slacks,
+                                          ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                          SetupTimingInfo* timing_info) {
+    if (!placer_opts.place_algorithm.is_timing_driven()) {
         return;
+    }
 
     /*at each temperature change we update these values to be used     */
     /*for normalizing the tradeoff between timing and wirelength (bb)  */
@@ -989,13 +1033,15 @@ static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts,
         num_connections = std::max(num_connections, 1); //Avoid division by zero
         VTR_ASSERT(num_connections > 0);
 
-        //Update timing information
-        recompute_criticalities(crit_exponent,
-                                delay_model,
-                                criticalities,
-                                pin_timing_invalidator,
-                                timing_info,
-                                costs);
+        //Update all timing related classes
+        perform_full_timing_update(crit_exponent,
+                                   delay_model,
+                                   criticalities,
+                                   setup_slacks,
+                                   pin_timing_invalidator,
+                                   timing_info,
+                                   costs);
+
         *outer_crit_iter_count = 0;
     }
     (*outer_crit_iter_count)++;
@@ -1007,29 +1053,135 @@ static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts,
     prev_inverse_costs->timing_cost = min(1 / costs->timing_cost, MAX_INV_TIMING_COST);
 }
 
-//Update timing information based on current placement by running STA to get new slacks,
-//and calculate updated criticalities and timing costs
-static void recompute_criticalities(float crit_exponent,
-                                    const PlaceDelayModel* delay_model,
-                                    PlacerCriticalities* criticalities,
-                                    ClusteredPinTimingInvalidator* pin_timing_invalidator,
-                                    SetupTimingInfo* timing_info,
-                                    t_placer_costs* costs) {
-    //Run STA to update slacks and adjusted/relaxed criticalities
+static void initialize_timing_info(float crit_exponent,
+                                   const PlaceDelayModel* delay_model,
+                                   PlacerCriticalities* criticalities,
+                                   PlacerSetupSlacks* setup_slacks,
+                                   ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                   SetupTimingInfo* timing_info,
+                                   t_placer_costs* costs) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    //As a safety measure, for the first time update,
+    //invalidate all timing edges via the pin invalidator
+    //by passing in all the clb sink pins
+    for (ClusterNetId net_id : clb_nlist.nets()) {
+        for (ClusterPinId pin_id : clb_nlist.net_sinks(net_id)) {
+            pin_timing_invalidator->invalidate_connection(pin_id, timing_info);
+        }
+    }
+
+    //Perform first time update for all timing related classes
+    perform_full_timing_update(crit_exponent,
+                               delay_model,
+                               criticalities,
+                               setup_slacks,
+                               pin_timing_invalidator,
+                               timing_info,
+                               costs);
+
+    //Don't warn again about unconstrained nodes again during placement
+    timing_info->set_warn_unconstrained(false);
+}
+
+/**
+ * @brief Update timing information based on the current block positions.
+ *
+ * Run STA to update the timing info class.
+ *
+ * Update the values stored in PlacerCriticalities and PlacerSetupSlacks
+ * if they are enabled to update. To enable updating, call their respective
+ * enable_update() method. See their documentation for more detailed info.
+ *
+ * If criticalities are updated, the timing driven costs should be updated
+ * as well by calling update_timing_cost(). Calling this routine to update
+ * timing_cost will produce round-off error in the long run due to its
+ * incremental nature, so the timing cost value will be recomputed once in
+ * a while, via other timing driven routines.
+ *
+ * If setup slacks are updated, then normally they should be committed to
+ * `connection_setup_slack` via commit_setup_slacks() routine. However,
+ * sometimes new setup slack values are not committed immediately if we
+ * expect to revert the current timing update in the near future, or if
+ * we wish to compare the new slack values to the original ones.
+ *
+ * All the pins with changed connection delays have already been added into
+ * the ClusteredPinTimingInvalidator to allow incremental STA update. These
+ * changed connection delays are a direct result of moved blocks in try_swap().
+ */
+static void update_timing_classes(float crit_exponent,
+                                  SetupTimingInfo* timing_info,
+                                  PlacerCriticalities* criticalities,
+                                  PlacerSetupSlacks* setup_slacks,
+                                  ClusteredPinTimingInvalidator* pin_timing_invalidator) {
+    /* Run STA to update slacks and adjusted/relaxed criticalities. */
     timing_info->update();
 
-    //Update placer'criticalities (e.g. sharpen with crit_exponent)
+    /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */
     criticalities->update_criticalities(timing_info, crit_exponent);
 
-    //Update connection, net and total timing costs based on new criticalities
+    /* Update the placer's raw setup slacks. */
+    setup_slacks->update_setup_slacks(timing_info);
+
+    /* Clear invalidation state. */
+    pin_timing_invalidator->reset();
+}
+
+/**
+ * @brief Update the timing driven (td) costs.
+ *
+ * This routine either uses incremental update_td_costs(), or updates
+ * from scratch using comp_td_costs(). By default, it is incremental
+ * by iterating over the set of clustered netlist connections/pins
+ * returned by PlacerCriticalities::pins_with_modified_criticality().
+ *
+ * Hence, this routine should always be called when PlacerCriticalites
+ * is enabled to be updated in update_timing_classes(). Otherwise, the
+ * incremental method will no longer be correct.
+ */
+static void update_timing_cost(const PlaceDelayModel* delay_model,
+                               const PlacerCriticalities* criticalities,
+                               double* timing_cost) {
 #ifdef INCR_COMP_TD_COSTS
-    update_td_costs(delay_model, *criticalities, &costs->timing_cost);
+    update_td_costs(delay_model, *criticalities, timing_cost);
 #else
-    comp_td_costs(delay_model, *criticalities, &costs->timing_cost);
+    comp_td_costs(delay_model, *criticalities, timing_cost);
 #endif
+}
 
-    //Clear invalidation state
-    pin_timing_invalidator->reset();
+/**
+ * @brief Updates every timing related classes, variables and structures.
+ *
+ * This routine exists to reduce code duplication, as the placer routines
+ * often require updating every timing related stuff.
+ *
+ * Updates: SetupTimingInfo, PlacerCriticalities, PlacerSetupSlacks,
+ *          timing_cost, connection_setup_slack.
+ */
+static void perform_full_timing_update(float crit_exponent,
+                                       const PlaceDelayModel* delay_model,
+                                       PlacerCriticalities* criticalities,
+                                       PlacerSetupSlacks* setup_slacks,
+                                       ClusteredPinTimingInvalidator* pin_timing_invalidator,
+                                       SetupTimingInfo* timing_info,
+                                       t_placer_costs* costs) {
+    /* Update all timing related classes. */
+    criticalities->enable_update();
+    setup_slacks->enable_update();
+    update_timing_classes(crit_exponent,
+                          timing_info,
+                          criticalities,
+                          setup_slacks,
+                          pin_timing_invalidator);
+
+    /* Update the timing cost with new connection criticalities. */
+    update_timing_cost(delay_model,
+                       criticalities,
+                       &costs->timing_cost);
+
+    /* Commit the setup slacks since they are updated. */
+    commit_setup_slacks(setup_slacks);
 }
 
 /* Function which contains the inner loop of the simulated annealing */
@@ -1047,9 +1199,11 @@ static void placement_inner_loop(float t,
                                  ClusteredPinTimingInvalidator* pin_timing_invalidator,
                                  const PlaceDelayModel* delay_model,
                                  PlacerCriticalities* criticalities,
+                                 PlacerSetupSlacks* setup_slacks,
                                  MoveGenerator& move_generator,
                                  t_pl_blocks_to_be_moved& blocks_affected,
-                                 SetupTimingInfo* timing_info) {
+                                 SetupTimingInfo* timing_info,
+                                 const t_place_algorithm& place_algorithm) {
     int inner_crit_iter_count, inner_iter;
 
     int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature?
@@ -1064,15 +1218,20 @@ static void placement_inner_loop(float t,
 
     /* Inner loop begins */
     for (inner_iter = 0; inner_iter < move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(t, costs, prev_inverse_costs, rlim,
+        e_move_result swap_result = try_swap(t,
+                                             crit_exponent,
+                                             costs,
+                                             prev_inverse_costs,
+                                             rlim,
                                              move_generator,
                                              timing_info,
                                              pin_timing_invalidator,
                                              blocks_affected,
                                              delay_model,
                                              criticalities,
+                                             setup_slacks,
                                              placer_opts.rlim_escape_fraction,
-                                             placer_opts.place_algorithm,
+                                             place_algorithm,
                                              placer_opts.timing_tradeoff);
 
         if (swap_result == ACCEPTED) {
@@ -1089,7 +1248,7 @@ static void placement_inner_loop(float t,
             num_swap_rejected++;
         }
 
-        if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+        if (place_algorithm.is_timing_driven()) {
             /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
              * We do this only once in a while, since it is expensive.
              */
@@ -1100,15 +1259,14 @@ static void placement_inner_loop(float t,
 #ifdef VERBOSE
                 VTR_LOG("Inner loop recompute criticalities\n");
 #endif
-                /* Using the delays in connection_delay, do a timing analysis to update slacks and
-                 * criticalities and update the timing cost since it will change.
-                 */
-                recompute_criticalities(crit_exponent,
-                                        delay_model,
-                                        criticalities,
-                                        pin_timing_invalidator,
-                                        timing_info,
-                                        costs);
+                //Update all timing related classes
+                perform_full_timing_update(crit_exponent,
+                                           delay_model,
+                                           criticalities,
+                                           setup_slacks,
+                                           pin_timing_invalidator,
+                                           timing_info,
+                                           costs);
             }
             inner_crit_iter_count++;
         }
@@ -1122,7 +1280,7 @@ static void placement_inner_loop(float t,
 
         /* Lines below prevent too much round-off error from accumulating
          * in the cost over many iterations (due to incremental updates).
-         * This round-off can lead to  error checks failing because the cost
+         * This round-off can lead to error checks failing because the cost
          * is different from what you get when you recompute from scratch.
          */
         ++(*moves_since_cost_recompute);
@@ -1155,7 +1313,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
     }
     costs->bb_cost = new_bb_cost;
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
         comp_td_costs(delay_model, *criticalities, &new_timing_cost);
         if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) {
@@ -1281,7 +1439,7 @@ static bool update_annealing_state(t_annealing_state* state,
     // The idea is that as the range limit shrinks (indicating we are fine-tuning a more optimized placement) we can focus more on a smaller number of critical connections, which a higher crit_exponent achieves.
     update_rlim(&state->rlim, success_rat, device_ctx.grid);
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim)
                                    * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
                                + placer_opts.td_place_exp_first;
@@ -1290,14 +1448,16 @@ static bool update_annealing_state(t_annealing_state* state,
     return true;
 }
 
-static float starting_t(t_placer_costs* costs,
+static float starting_t(float crit_exponent,
+                        t_placer_costs* costs,
                         t_placer_prev_inverse_costs* prev_inverse_costs,
                         t_annealing_sched annealing_sched,
                         int max_moves,
                         float rlim,
                         const PlaceDelayModel* delay_model,
-                        const PlacerCriticalities* criticalities,
-                        TimingInfo* timing_info,
+                        PlacerCriticalities* criticalities,
+                        PlacerSetupSlacks* setup_slacks,
+                        SetupTimingInfo* timing_info,
                         MoveGenerator& move_generator,
                         ClusteredPinTimingInvalidator* pin_timing_invalidator,
                         t_pl_blocks_to_be_moved& blocks_affected,
@@ -1318,16 +1478,23 @@ static float starting_t(t_placer_costs* costs,
     av = 0.;
     sum_of_squares = 0.;
 
-    /* Try one move per block.  Set t high so essentially all accepted. */
+    /* Try one move per block. Set the temperature high so essentially all accepted. */
+    float t = HUGE_POSITIVE_FLOAT;
 
     for (i = 0; i < move_lim; i++) {
-        e_move_result swap_result = try_swap(HUGE_POSITIVE_FLOAT, costs, prev_inverse_costs, rlim,
+        //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
+        e_move_result swap_result = try_swap(t,
+                                             crit_exponent,
+                                             costs,
+                                             prev_inverse_costs,
+                                             rlim,
                                              move_generator,
                                              timing_info,
                                              pin_timing_invalidator,
                                              blocks_affected,
                                              delay_model,
                                              criticalities,
+                                             setup_slacks,
                                              placer_opts.rlim_escape_fraction,
                                              placer_opts.place_algorithm,
                                              placer_opts.timing_tradeoff);
@@ -1392,17 +1559,19 @@ static void reset_move_nets(int num_nets_affected) {
 }
 
 static e_move_result try_swap(float t,
+                              float crit_exponent,
                               t_placer_costs* costs,
                               t_placer_prev_inverse_costs* prev_inverse_costs,
                               float rlim,
                               MoveGenerator& move_generator,
-                              TimingInfo* timing_info,
+                              SetupTimingInfo* timing_info,
                               ClusteredPinTimingInvalidator* pin_timing_invalidator,
                               t_pl_blocks_to_be_moved& blocks_affected,
                               const PlaceDelayModel* delay_model,
-                              const PlacerCriticalities* criticalities,
+                              PlacerCriticalities* criticalities,
+                              PlacerSetupSlacks* setup_slacks,
                               float rlim_escape_fraction,
-                              enum e_place_algorithm place_algorithm,
+                              const t_place_algorithm& place_algorithm,
                               float timing_tradeoff) {
     /* Picks some block and moves it to another spot.  If this spot is   *
      * occupied, switch the blocks.  Assess the change in cost function. *
@@ -1463,21 +1632,62 @@ static e_move_result try_swap(float t,
         //Update the block positions
         apply_move_blocks(blocks_affected);
 
-        // Find all the nets affected by this swap and update their costs
+        //Find all the nets affected by this swap and update their costs
+        //This routine calculates new connection delays and timing costs
+        //and store them in proposed_* data structures
+        //This routine also calculates the wiring cost, which doesn't
+        //depend on the timing driven data
         int num_nets_affected = find_affected_nets_and_update_costs(place_algorithm,
                                                                     delay_model,
                                                                     criticalities,
                                                                     blocks_affected,
                                                                     bb_delta_c,
                                                                     timing_delta_c);
-        if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+
+        //For setup slack analysis, we first do a timing analysis to get the newest slack values
+        //resulted from the proposed block moves. If the move turns out to be accepted, we keep
+        //the updated slack values and commit the block moves. If rejected, we reject the proposed
+        //block moves and revert this timing analysis.
+        if (place_algorithm == SLACK_TIMING_PLACE) {
+            //Gather all the connections with modified delays for incremental timing updates.
+            //This routine relies on comparing proposed_connection_delay and connection_delay.
+            invalidate_affected_connections(blocks_affected,
+                                            pin_timing_invalidator,
+                                            timing_info);
+
+            //Update the connection_timing_cost and connection_delay
+            //values from the temporary values.
+            commit_td_cost(blocks_affected);
+
+            //Update timing information. Since we are analyzing setup slacks,
+            //we only update those values and keep the criticalities stale
+            //so as not to interfere with the original timing driven algorithm.
+            //
+            //Note: the timing info must be updated after applying block moves
+            //and committing the timing driven delays and costs.
+            //If we wish to revert this timing update due to move rejection,
+            //we need to revert block moves and restore the timing values.
+            criticalities->disable_update();
+            setup_slacks->enable_update();
+            update_timing_classes(crit_exponent,
+                                  timing_info,
+                                  criticalities,
+                                  setup_slacks,
+                                  pin_timing_invalidator);
+
+            /* Get the setup slack analysis cost */
+            //TODO: calculate a weighted average of the slack cost and wiring cost
+            delta_c = analyze_setup_slack_cost(setup_slacks);
+
+        } else if (place_algorithm == CRITICALITY_TIMING_PLACE) {
             /*in this case we redefine delta_c as a combination of timing and bb.  *
              *additionally, we normalize all values, therefore delta_c is in       *
              *relation to 1*/
-
             delta_c = (1 - timing_tradeoff) * bb_delta_c * prev_inverse_costs->bb_cost
                       + timing_tradeoff * timing_delta_c * prev_inverse_costs->timing_cost;
+
         } else {
+            VTR_ASSERT(place_algorithm == BOUNDING_BOX_PLACE);
             delta_c = bb_delta_c;
         }
 
@@ -1488,18 +1698,28 @@ static e_move_result try_swap(float t,
             costs->cost += delta_c;
             costs->bb_cost += bb_delta_c;
 
-            if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+            if (place_algorithm == SLACK_TIMING_PLACE) {
+                /* Update the timing driven cost as usual */
+                costs->timing_cost += timing_delta_c;
+
+                //Commit the setup slack information
+                //The timing delay and cost values should be committed already
+                commit_setup_slacks(setup_slacks);
+            }
+
+            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
                 costs->timing_cost += timing_delta_c;
 
                 //Invalidates timing of modified connections for incremental timing updates
-                //Must be called before commit_td_cost since it relies on comparing
-                //proposed_connection_delay and connection_delay
-                invalidate_affected_connection_delays(blocks_affected,
-                                                      pin_timing_invalidator,
-                                                      timing_info);
-
-                /*update the connection_timing_cost and connection_delay
-                 * values from the temporary values */
+                //This routine relies on comparing proposed_connection_delay and connection_delay
+                //If the setup slack analysis was not performed, the
+                //sink pins are yet to be invalidated.
+                invalidate_affected_connections(blocks_affected,
+                                                pin_timing_invalidator,
+                                                timing_info);
+
+                //update the connection_timing_cost and connection_delay
+                //values from the temporary values
                 commit_td_cost(blocks_affected);
             }
 
@@ -1509,14 +1729,42 @@ static e_move_result try_swap(float t,
             /* Update clb data structures since we kept the move. */
             commit_move_blocks(blocks_affected);
 
-        } else { /* Move was rejected.  */
-                 /* Reset the net cost function flags first. */
+        } else { //move_outcome == REJECTED
+
+            /* Reset the net cost function flags first. */
             reset_move_nets(num_nets_affected);
 
             /* Restore the place_ctx.block_locs data structures to their state before the move. */
             revert_move_blocks(blocks_affected);
 
-            if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+            if (place_algorithm == SLACK_TIMING_PLACE) {
+                //Revert the timing delays and costs to pre-update values
+                //These routines must be called after reverting the block moves
+                //TODO: make this process incremental
+                comp_td_connection_delays(delay_model);
+                comp_td_costs(delay_model, *criticalities, &costs->timing_cost);
+
+                //Re-invalidate the affected sink pins since the proposed move is
+                //rejected, and the same blocks are reverted to their original
+                //positions. The affected sink pins should stay the same.
+                invalidate_affected_connections(blocks_affected,
+                                                pin_timing_invalidator,
+                                                timing_info);
+
+                /* Revert the timing update */
+                update_timing_classes(crit_exponent,
+                                      timing_info,
+                                      criticalities,
+                                      setup_slacks,
+                                      pin_timing_invalidator);
+
+                VTR_ASSERT_SAFE_MSG(
+                    verify_connection_setup_slacks(setup_slacks),
+                    "The current setup slacks should be identical to the values before the try swap timing info update.");
+            }
+
+            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+                /* Unstage the values stored in proposed_* data structures */
                 revert_td_cost(blocks_affected);
             }
         }
@@ -1547,14 +1795,14 @@ static e_move_result try_swap(float t,
     check_place(*costs, delay_model, place_algorithm);
 #endif
 
-    return (move_outcome);
+    return move_outcome;
 }
 
 //Puts all the nets changed by the current swap into nets_to_update,
 //and updates their bounding box.
 //
 //Returns the number of affected nets.
-static int find_affected_nets_and_update_costs(e_place_algorithm place_algorithm,
+static int find_affected_nets_and_update_costs(const t_place_algorithm& place_algorithm,
                                                const PlaceDelayModel* delay_model,
                                                const PlacerCriticalities* criticalities,
                                                t_pl_blocks_to_be_moved& blocks_affected,
@@ -1587,8 +1835,8 @@ static int find_affected_nets_and_update_costs(e_place_algorithm place_algorithm
             //once per net, not once per pin.
             update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin);
 
-            if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
-                //Determine the change in timing costs if required
+            if (place_algorithm.is_timing_driven()) {
+                /* Determine the change in connection delay and timing cost */
                 update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c);
             }
         }
@@ -1650,6 +1898,35 @@ static void update_net_bb(const ClusterNetId net,
     }
 }
 
+/**
+ * @brief Calculate the new connection delay and timing cost of all the
+ *        sink pins affected by moving a specific pin to a new location.
+ *        Also calculates the total change in the timing cost.
+ *
+ * Assumes that the blocks have been moved to the proposed new locations.
+ * Otherwise, the routine comp_td_connection_delay() will not be able to
+ * calculate the most up to date connection delay estimation value.
+ *
+ * If the moved pin is a driver pin, then all the sink connections that are
+ * driven by this driver pin are considered.
+ *
+ * If the moved pin is a sink pin, then it is the only pin considered. But
+ * in some cases, the sink is already accounted for if it is also driven
+ * by a driver pin located on a moved block. Computing it again would double
+ * count its affect on the total timing cost change (delta_timing_cost).
+ *
+ * It is possible for some connections to have unchanged delays. For instance,
+ * if we are using a dx/dy delay model, this could occur if a sink pin moved
+ * to a new position with the same dx/dy from its net's driver pin.
+ *
+ * We skip these connections with unchanged delay values as their delay need
+ * not be updated. Their timing costs also do not require any update, since
+ * the criticalities values are always kept stale/unchanged during an block
+ * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost)
+ *
+ * This is also done to minimize the number of timing node/edge invalidations
+ * for incremental static timing analysis (incremental STA).
+ */
 static void update_td_delta_costs(const PlaceDelayModel* delay_model,
                                   const PlacerCriticalities& criticalities,
                                   const ClusterNetId net,
@@ -1659,42 +1936,103 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model,
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
     if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) {
-        //This pin is a net driver on a moved block.
-        //Re-compute all point to point connections for this net.
+        /* This pin is a net driver on a moved block. */
+        /* Recompute all point to point connection delays for the net sinks. */
         for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size(); ipin++) {
             float temp_delay = comp_td_connection_delay(delay_model, net, ipin);
-            proposed_connection_delay[net][ipin] = temp_delay;
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                continue;
+            }
 
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
             proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
             delta_timing_cost += proposed_connection_timing_cost[net][ipin] - connection_timing_cost[net][ipin];
 
+            /* Record this connection in blocks_affected.affected_pins */
             ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin);
             blocks_affected.affected_pins.push_back(sink_pin);
         }
     } else {
-        //This pin is a net sink on a moved block
+        /* This pin is a net sink on a moved block */
         VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK);
 
-        //If this net is being driven by a moved block, we do not
-        //need to compute the change in the timing cost (here) since it will
-        //be computed by the net's driver pin (since the driver block moved).
-        //
-        //Computing it here would double count the change, and mess up the
-        //delta_timing_cost value.
+        /* Check if this sink's net is driven by a moved block */
         if (!driven_by_moved_block(net, blocks_affected)) {
-            int net_pin = cluster_ctx.clb_nlist.pin_net_index(pin);
+            /* Get the sink pin index in the net */
+            int ipin = cluster_ctx.clb_nlist.pin_net_index(pin);
 
-            float temp_delay = comp_td_connection_delay(delay_model, net, net_pin);
-            proposed_connection_delay[net][net_pin] = temp_delay;
+            float temp_delay = comp_td_connection_delay(delay_model, net, ipin);
+            /* If the delay hasn't changed, do not mark this pin as affected */
+            if (temp_delay == connection_delay[net][ipin]) {
+                return;
+            }
 
-            proposed_connection_timing_cost[net][net_pin] = criticalities.criticality(net, net_pin) * temp_delay;
-            delta_timing_cost += proposed_connection_timing_cost[net][net_pin] - connection_timing_cost[net][net_pin];
+            /* Calculate proposed delay and cost values */
+            proposed_connection_delay[net][ipin] = temp_delay;
+            proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay;
+            delta_timing_cost += proposed_connection_timing_cost[net][ipin] - connection_timing_cost[net][ipin];
 
+            /* Record this connection in blocks_affected.affected_pins */
             blocks_affected.affected_pins.push_back(pin);
         }
     }
 }
 
+/**
+ * @brief Check if the setup slack has gotten better or worse due to block swap.
+ *
+ * Get all the modified slack values via the PlacerSetupSlacks class, and compare
+ * then with the original values at these connections. Sort them and compare them
+ * one by one, and return the difference of the first different pair.
+ *
+ * If the new slack value is larger(better), than return a negative value so that
+ * the move will be accepted. If the new slack value is smaller(worse), return a
+ * positive value so that the move will be rejected.
+ *
+ * If no slack values have changed, then return an arbitrary positive number. A
+ * move resulting in no change in the slack values should probably be unnecessary.
+ *
+ * The sorting is need to prevent in the unlikely circumstances that a bad slack
+ * value suddenly got very good due to the block move, while a good slack value
+ * got very bad, perhaps even worse than the original worse slack value.
+ */
+static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    //Find the original/proposed setup slacks of pins with modified values
+    std::vector<float> original_setup_slacks, proposed_setup_slacks;
+
+    auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack();
+    for (ClusterPinId clb_pin : clb_pins_modified) {
+        ClusterNetId net_id = clb_nlist.pin_net(clb_pin);
+        size_t ipin = clb_nlist.pin_net_index(clb_pin);
+
+        original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]);
+        proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin));
+    }
+
+    //Sort in ascending order, from the worse slack value to the best
+    std::sort(original_setup_slacks.begin(), original_setup_slacks.end());
+    std::sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end());
+
+    //Check the first pair of slack values that are different
+    //If found, return their difference
+    for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) {
+        float slack_diff = original_setup_slacks[idiff] - proposed_setup_slacks[idiff];
+
+        if (slack_diff != 0) {
+            return slack_diff;
+        }
+    }
+
+    //If all slack values are identical (or no modified slack values),
+    //reject this move by returning an arbitrary positive number as cost.
+    return 1;
+}
+
 static e_move_result assess_swap(double delta_c, double t) {
     /* Returns: 1 -> move accepted, 0 -> rejected. */
     if (delta_c <= 0) {
@@ -1787,7 +2125,7 @@ static float comp_td_connection_delay(const PlaceDelayModel* delay_model, Cluste
 
 //Recompute all point to point delays, updating connection_delay
 static void comp_td_connection_delays(const PlaceDelayModel* delay_model) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
@@ -1796,45 +2134,83 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) {
     }
 }
 
-/* Update the connection_timing_cost values from the temporary *
- * values for all connections that have changed.                   */
-static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+/**
+ * @brief Commit all the setup slack values from the PlacerSetupSlacks
+ *        class to `connection_setup_slack`.
+ *
+ * This routine is incremental since it relies on the pins_with_modified_setup_slack()
+ * to detect which pins need to be updated and which pins do not.
+ *
+ * Therefore, it is assumed that this routine is always called immediately after
+ * each time update_timing_classes() is called with setup slack update enabled.
+ * Otherwise, pins_with_modified_setup_slack() cannot accurately account for all
+ * the pins that have their setup slacks changed, making this routine incorrect.
+ *
+ * Currently, the only exception to the rule above is when setup slack analysis is used
+ * during the placement quench. The new setup slacks might be either accepted or
+ * rejected, so for efficiency reasons, this routine is not called if the slacks are
+ * rejected in the end. For more detailed info, see the try_swap() routine.
+ */
+static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
+    const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
-    /* Go through all the blocks moved. */
-    for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) {
-        ClusterBlockId bnum = blocks_affected.moved_blocks[iblk].block_num;
-        for (ClusterPinId pin_id : cluster_ctx.clb_nlist.block_pins(bnum)) {
-            ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
+    //Incremental: only go through sink pins with modified setup slack
+    auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack();
+    for (ClusterPinId pin_id : clb_pins_modified) {
+        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
+        size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id);
 
-            if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                continue;
+        connection_setup_slack[net_id][pin_index_in_net] = setup_slacks->setup_slack(net_id, pin_index_in_net);
+    }
+}
 
-            if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::DRIVER) {
-                //This net is being driven by a moved block, recompute
-                //all point to point connections on this net.
-                for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
-                    connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
-                    proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
-                    connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
-                    proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
-                }
-            } else {
-                //This pin is a net sink on a moved block
-                VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK);
+/**
+ * @brief Verify that the values in `connection_setup_slack` matches PlacerSetupSlacks.
+ *
+ * Return true if all connection values are identical. Otherwise, return false.
+ *
+ * Currently, this routine is called to check if the timing update has been successfully
+ * reverted after a proposed move is rejected when applying setup slack analysis during
+ * the placement quench. If successful, the setup slacks in PlacerSetupSlacks should be
+ * the same as the values in `connection_setup_slack` without running commit_setup_slacks().
+ * For more detailed info, see the try_swap() routine.
+ */
+static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
+    const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+
+    //Go through every single sink pin to check that the slack values are the same
+    for (ClusterNetId net_id : clb_nlist.nets()) {
+        for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) {
+            if (connection_setup_slack[net_id][ipin] != setup_slacks->setup_slack(net_id, ipin)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
 
-                /* The following "if" prevents the value from being updated twice. */
-                if (!driven_by_moved_block(net_id, blocks_affected)) {
-                    int net_pin = cluster_ctx.clb_nlist.pin_net_index(pin_id);
+/**
+ * @brief Update the connection_timing_cost values from the temporary
+ *        values for all connections that have/haven't changed.
+ *
+ * All the connections have already been gathered by blocks_affected.affected_pins
+ * after running the routine find_affected_nets_and_update_costs() in try_swap().
+ */
+static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlist = cluster_ctx.clb_nlist;
 
-                    connection_delay[net_id][net_pin] = proposed_connection_delay[net_id][net_pin];
-                    proposed_connection_delay[net_id][net_pin] = INVALID_DELAY;
-                    connection_timing_cost[net_id][net_pin] = proposed_connection_timing_cost[net_id][net_pin];
-                    proposed_connection_timing_cost[net_id][net_pin] = INVALID_DELAY;
-                }
-            }
-        } /* Finished going through all the pins in the moved block */
-    }     /* Finished going through all the blocks moved */
+    //Go through all the sink pins affected
+    for (ClusterPinId pin_id : blocks_affected.affected_pins) {
+        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
+        int ipin = clb_nlist.pin_net_index(pin_id);
+
+        //Commit the timing delay and cost values
+        connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
+        proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
+        connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
+        proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
+    }
 }
 
 //Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on
@@ -1857,35 +2233,24 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) {
 #endif
 }
 
-//Invalidates the delays of connections effected by the specified move
-//
-//Relies on proposed_connection_delay and connection_delay to detect
-//which connections have actually had their delay changed.
-static void invalidate_affected_connection_delays(const t_pl_blocks_to_be_moved& blocks_affected,
-                                                  ClusteredPinTimingInvalidator* pin_tedges_invalidator,
-                                                  TimingInfo* timing_info) {
+/**
+ * @brief Invalidates the connections affected by the specified block moves.
+ *
+ * All the connections recorded in blocks_affected.affected_pins have different
+ * values for `proposed_connection_delay` and `connection_delay`.
+ *
+ * Invalidate all the timing graph edges associated with these connections via
+ * the ClusteredPinTimingInvalidator class.
+ */
+static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
+                                            ClusteredPinTimingInvalidator* pin_tedges_invalidator,
+                                            TimingInfo* timing_info) {
     VTR_ASSERT_SAFE(timing_info);
     VTR_ASSERT_SAFE(pin_tedges_invalidator);
 
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    //Inalidate timing graph edges affected by the move
+    /* Invalidate timing graph edges affected by the move */
     for (ClusterPinId pin : blocks_affected.affected_pins) {
-        //It is possible that some connections may not have changed delay.(e.g.
-        //For instance, if using a dx/dy delay model, this could occur if a sink
-        //moved to a new position with the same dx/dy from it's driver.
-        //
-        //To minimze work during the incremental STA update we do not invalidate
-        //such unchanged connections.
-
-        ClusterNetId net = clb_nlist.pin_net(pin);
-        int ipin = clb_nlist.pin_net_index(pin);
-
-        if (proposed_connection_delay[net][ipin] != connection_delay[net][ipin]) {
-            //Delay changed, must invalidate
-            pin_tedges_invalidator->invalidate_connection(pin, timing_info);
-        }
+        pin_tedges_invalidator->invalidate_connection(pin, timing_info);
     }
 }
 
@@ -1942,7 +2307,7 @@ static void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCrit
             if (cluster_ctx.clb_nlist.net_is_ignored(clb_net)) continue;
 
             int ipin = clb_nlist.pin_net_index(clb_pin);
-            VTR_ASSERT_SAFE(ipin >= 0 && ipin < int(clb_nlist.net_pins(clb_net).size()));
+            VTR_ASSERT_SAFE(ipin >= 1 && ipin < int(clb_nlist.net_pins(clb_net).size()));
 
             double new_timing_cost = comp_td_connection_cost(delay_model, place_crit, clb_net, ipin);
 
@@ -2111,12 +2476,14 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
         max_pins_per_clb = max(max_pins_per_clb, type.num_pins);
     }
 
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         /* Allocate structures associated with timing driven placement */
         /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]  */
         connection_delay = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, 0.f);
         proposed_connection_delay = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, 0.f);
 
+        connection_setup_slack = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, std::numeric_limits<float>::infinity());
+
         connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist);
         proposed_connection_timing_cost = make_net_pins_matrix<double>(cluster_ctx.clb_nlist, 0.);
         net_timing_cost.resize(num_nets, 0.);
@@ -2155,9 +2522,10 @@ static void alloc_and_load_placement_structs(float place_cost_exp,
 /* Frees the major structures needed by the placer (and not needed       *
  * elsewhere).   */
 static void free_placement_structs(const t_placer_opts& placer_opts) {
-    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (placer_opts.place_algorithm.is_timing_driven()) {
         vtr::release_memory(connection_timing_cost);
         vtr::release_memory(connection_delay);
+        vtr::release_memory(connection_setup_slack);
         vtr::release_memory(proposed_connection_timing_cost);
         vtr::release_memory(proposed_connection_delay);
 
@@ -2714,7 +3082,7 @@ static void alloc_and_load_for_fast_cost_update(float place_cost_exp) {
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
-                        enum e_place_algorithm place_algorithm) {
+                        const t_place_algorithm& place_algorithm) {
     /* Checks that the placement has not confused our data structures. *
      * i.e. the clb and block structures agree about the locations of  *
      * every block, blocks are in legal spots, etc.  Also recomputes   *
@@ -2741,7 +3109,7 @@ static void check_place(const t_placer_costs& costs,
 static int check_placement_costs(const t_placer_costs& costs,
                                  const PlaceDelayModel* delay_model,
                                  const PlacerCriticalities* criticalities,
-                                 enum e_place_algorithm place_algorithm) {
+                                 const t_place_algorithm& place_algorithm) {
     int error = 0;
     double bb_cost_check;
     double timing_cost_check;
@@ -2753,7 +3121,7 @@ static int check_placement_costs(const t_placer_costs& costs,
         error++;
     }
 
-    if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+    if (place_algorithm.is_timing_driven()) {
         comp_td_costs(delay_model, *criticalities, &timing_cost_check);
         //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check);
         if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * ERROR_TOL) {
@@ -3023,7 +3391,7 @@ static void init_annealing_state(t_annealing_state* state,
 }
 
 bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) {
-    return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE);
+    return (vpr_setup.PlacerOpts.place_algorithm.is_timing_driven());
 }
 
 //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state
diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp
index e62eab6c894..d4dfbcc6f52 100644
--- a/vpr/src/place/timing_place.cpp
+++ b/vpr/src/place/timing_place.cpp
@@ -1,3 +1,8 @@
+/**
+ * @file timing_place.cpp
+ * @brief Stores the method definitions of classes defined in timing_place.h.
+ */
+
 #include <cstdio>
 #include <cmath>
 
@@ -14,71 +19,46 @@
 
 #include "timing_info.h"
 
-//Use an incremental approach to updaing criticalities?
-constexpr bool INCR_UPDATE_CRITICALITIES = true;
-
-/**************************************/
-
-/* Allocates space for the timing_place_crit_ data structure *
- * I chunk the data to save space on large problems.           */
+///@brief Allocates space for the timing_place_crit_ data structure.
 PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup)
     : clb_nlist_(clb_nlist)
     , pin_lookup_(netlist_pin_lookup)
     , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
 }
 
-/**************************************/
+/**
+ * @brief Updated the criticalities in the timing_place_crit_ data structure.
+ *
+ * If the criticalities are not updated immediately after each time we call
+ * timing_info->update(), then timing_info->pins_with_modified_setup_criticality()
+ * cannot accurately account for all the pins that need to be updated. In this case,
+ * `recompute_required` would be true, and we update all criticalities from scratch.
+ *
+ * If the criticality exponent has changed, we also need to update from scratch.
+ */
 void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent) {
-    /* Performs a 1-to-1 mapping from criticality to timing_place_crit_.
-     * For every pin on every net (or, equivalently, for every tedge ending
-     * in that pin), timing_place_crit_ = criticality^(criticality exponent) */
+    /* If update is not enabled, exit the routine. */
+    if (!update_enabled) {
+        /* re-computation is required on the next iteration */
+        recompute_required = true;
+        return;
+    }
 
-    //Determine what pins need updating
-    if (INCR_UPDATE_CRITICALITIES) {
-        cluster_pins_with_modified_criticality_.clear();
-        if (crit_exponent != last_crit_exponent_) {
-            //Criticality exponent changed, must re-calculate criticalities for *all* sink pins
-            for (ClusterNetId net_id : clb_nlist_.nets()) {
-                for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
-                    cluster_pins_with_modified_criticality_.insert(pin_id);
-                }
-            }
-
-            //Record new criticality exponent
-            last_crit_exponent_ = crit_exponent;
-        } else {
-            //Criticality exponent unchanged
-            //
-            //Collect the cluster pins which need to be updated based on the latest timing
-            //analysis
-            //
-            //Note we use the set of pins reported by the *timing_info* as having modified
-            //criticality, rather than those marked as modified by the timing analyzer.
-            //Since timing_info uses shifted/relaxed criticality (which depends on max
-            //required time and worst case slacks), additional nodes may be modified
-            //when updating the atom pin criticalities.
-
-            for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) {
-                ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
-
-                //Some atom pins correspond to connections which are completely
-                //contained within a cluster, and hence have no corresponding
-                //clustered pin.
-                if (!clb_pin) continue;
-
-                cluster_pins_with_modified_criticality_.insert(clb_pin);
-            }
-        }
+    /* Determine what pins need updating */
+    if (!recompute_required && crit_exponent == last_crit_exponent_) {
+        incr_update_criticalities(timing_info);
     } else {
-        //Non-incremental: all pins and nets need updating
-        for (ClusterNetId net_id : clb_nlist_.nets()) {
-            for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
-                cluster_pins_with_modified_criticality_.insert(pin_id);
-            }
-        }
+        recompute_criticalities();
+
+        /* Record new criticality exponent */
+        last_crit_exponent_ = crit_exponent;
     }
 
-    //Update the effected pins
+    /* Performs a 1-to-1 mapping from criticality to timing_place_crit_.
+     * For every pin on every net (or, equivalently, for every tedge ending
+     * in that pin), timing_place_crit_ = criticality^(criticality exponent) */
+
+    /* Update the affected pins */
     for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) {
         ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
         int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
@@ -90,16 +70,176 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf
          * criticality by taking it to some power, crit_exponent (between 1 and 8 by default). */
         timing_place_crit_[clb_net][pin_index_in_net] = pow(clb_pin_crit, crit_exponent);
     }
+
+    /* Criticalities updated. In sync with timing info.   */
+    /* Can be incrementally updated on the next iteration */
+    recompute_required = false;
+}
+
+/**
+ * @brief Collect the cluster pins which need to be updated based on the latest timing
+ *        analysis so that incremental updates to criticalities can be performed.
+ *
+ * Note we use the set of pins reported by the *timing_info* as having modified
+ * criticality, rather than those marked as modified by the timing analyzer.
+ *
+ * Since timing_info uses shifted/relaxed criticality (which depends on max required
+ * time and worst case slacks), additional nodes may be modified when updating the
+ * atom pin criticalities.
+ */
+
+void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) {
+    cluster_pins_with_modified_criticality_.clear();
+
+    for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) {
+        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
+
+        //Some atom pins correspond to connections which are completely
+        //contained within a cluster, and hence have no corresponding
+        //clustered pin.
+        if (!clb_pin) continue;
+
+        cluster_pins_with_modified_criticality_.insert(clb_pin);
+    }
+}
+
+/**
+ * @brief Collect all the sink pins in the netlist and prepare them update.
+ *
+ * For the incremental version, see PlacerCriticalities::incr_update_criticalities().
+ */
+void PlacerCriticalities::recompute_criticalities() {
+    cluster_pins_with_modified_criticality_.clear();
+
+    /* Non-incremental: all sink pins need updating */
+    for (ClusterNetId net_id : clb_nlist_.nets()) {
+        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
+            cluster_pins_with_modified_criticality_.insert(pin_id);
+        }
+    }
 }
 
-void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float val) {
-    timing_place_crit_[net_id][ipin] = val;
+///@brief Override the criticality of a particular connection.
+void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float crit_val) {
+    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
+    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
+
+    timing_place_crit_[net_id][ipin] = crit_val;
 }
 
+/**
+ * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which
+ *        were modified by the last call to PlacerCriticalities::update_criticalities().
+ */
 PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const {
     return vtr::make_range(cluster_pins_with_modified_criticality_);
 }
 
+/**************************************/
+
+///@brief Allocates space for the timing_place_setup_slacks_ data structure.
+PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup)
+    : clb_nlist_(clb_nlist)
+    , pin_lookup_(netlist_pin_lookup)
+    , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits<float>::quiet_NaN())) {
+}
+
+/**
+ * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure.
+ *
+ * If the setup slacks are not updated immediately after each time we call
+ * timing_info->update(), then timing_info->pins_with_modified_setup_slack()
+ * cannot accurately account for all the pins that need to be updated.
+ *
+ * In this case, `recompute_required` would be true, and we update all setup slacks
+ * from scratch.
+ */
+void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) {
+    /* If update is not enabled, exit the routine. */
+    if (!update_enabled) {
+        /* re-computation is required on the next iteration */
+        recompute_required = true;
+        return;
+    }
+
+    /* Determine what pins need updating */
+    if (!recompute_required) {
+        incr_update_setup_slacks(timing_info);
+    } else {
+        recompute_setup_slacks();
+    }
+
+    /* Update the affected pins */
+    for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) {
+        ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
+        int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
+
+        float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info, pin_lookup_, clb_pin);
+
+        timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack;
+    }
+
+    /* Setup slacks updated. In sync with timing info.     */
+    /* Can be incrementally updated on the next iteration. */
+    recompute_required = false;
+}
+
+/**
+ * @brief Collect the cluster pins which need to be updated based on the latest timing
+ *        analysis so that incremental updates to setup slacks can be performed.
+ *
+ * Note we use the set of pins reported by the *timing_info* as having modified
+ * setup slacks, rather than those marked as modified by the timing analyzer.
+ */
+void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) {
+    cluster_pins_with_modified_setup_slack_.clear();
+
+    for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) {
+        ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin);
+
+        //Some atom pins correspond to connections which are completely
+        //contained within a cluster, and hence have no corresponding
+        //clustered pin.
+        if (!clb_pin) continue;
+
+        cluster_pins_with_modified_setup_slack_.insert(clb_pin);
+    }
+}
+
+/**
+ * @brief Collect all the sink pins in the netlist and prepare them update.
+ *
+ * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks().
+ */
+void PlacerSetupSlacks::recompute_setup_slacks() {
+    cluster_pins_with_modified_setup_slack_.clear();
+
+    /* Non-incremental: all sink pins need updating */
+    for (ClusterNetId net_id : clb_nlist_.nets()) {
+        for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) {
+            cluster_pins_with_modified_setup_slack_.insert(pin_id);
+        }
+    }
+}
+
+///@brief Override the setup slack of a particular connection.
+void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) {
+    VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)");
+    VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout");
+
+    timing_place_setup_slacks_[net_id][ipin] = slack_val;
+}
+
+/**
+ * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds)
+ *        which were modified by the last call to PlacerSetupSlacks::update_setup_slacks().
+ */
+PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const {
+    return vtr::make_range(cluster_pins_with_modified_setup_slack_);
+}
+
+/**************************************/
+
 std::unique_ptr<PlaceDelayModel> alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist,
                                                                  const t_placer_opts& placer_opts,
                                                                  const t_router_opts& router_opts,
diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h
index c3d8a41c3a1..74996de4a5a 100644
--- a/vpr/src/place/timing_place.h
+++ b/vpr/src/place/timing_place.h
@@ -1,3 +1,39 @@
+/**
+ * @file timing_place.h
+ * @brief Interface used by the VPR placer to query information
+ *        from the Tatum timing analyzer.
+ *
+ *   @class PlacerSetupSlacks
+ *              Queries connection **RAW** setup slacks, which can
+ *              range from negative to positive values. Also maps
+ *              atom pin setup slacks to clb pin setup slacks.
+ *   @class PlacerCriticalities
+ *              Query connection criticalities, which are calculuated
+ *              based on the raw setup slacks and ranges from 0 to 1.
+ *              Also maps atom pin crit. to clb pin crit.
+ *   @class PlacerTimingCosts
+ *              Hierarchical structure used by update_td_costs() to
+ *              maintain the order of addition operation of float values
+ *              (to avoid round-offs) while doing incremental updates.
+ *
+ * Calculating criticalities:
+ *      All the raw setup slack values across a single clock domain are gathered
+ *      and rated from the best to the worst in terms of criticalities. In order
+ *      to calculate criticalities, all the slack values need to be non-negative.
+ *      Hence, if the worst slack is negative, all the slack values are shifted
+ *      by the value of the worst slack so that the value is at least 0. If the
+ *      worst slack is positive, then no shift happens.
+ *
+ *      The best (shifted) slack (the most positive one) will have a criticality of 0.
+ *      The worst (shifted) slack value will have a criticality of 1.
+ *
+ *      Criticalities are used to calculated timing costs for each connection.
+ *      The formula is cost = delay * criticality.
+ *
+ *      For a more detailed description on how criticalities are calculated, see
+ *      calc_relaxed_criticality() in `timing_util.cpp`.
+ */
+
 #ifndef TIMING_PLACE
 #define TIMING_PLACE
 
@@ -14,32 +50,46 @@ std::unique_ptr<PlaceDelayModel> alloc_lookups_and_criticalities(t_chan_width_di
                                                                  std::vector<t_segment_inf>& segment_inf,
                                                                  const t_direct_inf* directs,
                                                                  const int num_directs);
-/* Usage
+
+/**
+ * @brief PlacerCriticalities returns the clustered netlist connection criticalities
+ *        used by the placer ('sharpened' by a criticality exponent).
+ *
+ * Usage
  * =====
- * PlacerCriticalities returns the clustered netlist connection criticalities used by 
- * the placer ('sharpened' by a criticality exponent). This also serves to map atom 
- * netlist level criticalites (i.e. on AtomPinIds) to the clustered netlist (i.e. 
- * ClusterPinIds) used during placement.
+ * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds)
+ * to the clustered netlist (i.e. ClusterPinIds) used during placement.
  *
- * Criticalities are calculated by calling update_criticalities(), which will 
- * update criticalities based on the atom netlist connection criticalities provided by
- * the passed in SetupTimingInfo. This is done incrementally, based on the modified
- * connections/AtomPinIds returned by SetupTimingInfo.
+ * Criticalities are updated by update_criticalities(), given that `update_enabled` is
+ * set to true. It will update criticalities based on the atom netlist connection
+ * criticalities provided by the passed in SetupTimingInfo.
  *
- * The criticalities of individual connections can then be queried by calling the 
- * criticality() member function.
+ * This process can be done incrementally, based on the modified connections/AtomPinIds
+ * returned by SetupTimingInfo. However, the set returned only reflects the connections
+ * changed by the last call to the timing info update.
  *
- * It also supports iterating via pins_with_modified_criticalities() through the 
- * clustered netlist pins/connections which have had their criticality modified by 
- * the last call to update_criticalities(), which is useful for incrementally 
- * re-calculating timing costs.
+ * Therefore, if SetupTimingInfo is updated twice in succession without criticalities
+ * getting updated (update_enabled = false), the returned set cannot account for all
+ * the connections that have been modified. In this case, we flag `recompute_required`
+ * as false, and we recompute the criticalities for every connection to ensure that
+ * they are all up to date. Hence, each time update_setup_slacks_and_criticalities()
+ * is called, we assign `recompute_required` the opposite value of `update_enabled`.
+ *
+ * This class also maps/transforms the modified atom connections/pins returned by the
+ * timing info into modified clustered netlist connections/pins after calling
+ * update_criticalities(). The interface then enables users to iterate over this range
+ * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating
+ * the timing costs.
+ *
+ * The criticalities of individual connections can then be queried by calling the
+ * criticality() member function.
  *
  * Implementation
  * ==============
- * To support incremental re-calculation the class saves the last criticality exponent
- * passed to update_criticalites(). If the next update uses the same exponent criticalities
- * can be incrementally updated. Otherwise they must be re-calculated from scratch, since
- * a change in exponent changes *all* criticalities.
+ * To support incremental re-calculation, the class saves the last criticality exponent
+ * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same
+ * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated
+ * from scratch, since a change in exponent changes *all* criticalities.
  */
 class PlacerCriticalities {
   public: //Types
@@ -55,40 +105,175 @@ class PlacerCriticalities {
     PlacerCriticalities& operator=(const PlacerCriticalities& clb_nlist) = delete;
 
   public: //Accessors
-    //Returns the criticality of the specified connection
+    ///@brief Returns the criticality of the specified connection.
     float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; }
 
-    //Returns the range of clustered netlist pins (i.e. ClusterPinIds) which were modified
-    //by the last call to update_criticalities()
+    /**
+     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which
+     *        were modified by the last call to PlacerCriticalities::update_criticalities().
+     */
     pin_range pins_with_modified_criticality() const;
 
   public: //Modifiers
-    //Incrementally updates criticalities based on the atom netlist criticalitites provied by
-    //timing_info and the provided criticality_exponent.
+    /**
+     * @brief Updates criticalities based on the atom netlist criticalitites
+     *        provided by timing_info and the provided criticality_exponent.
+     *
+     * Should consistently call this method after the most recent timing analysis to
+     * keep the criticalities stored in this class in sync with the timing analyzer.
+     * If out of sync, then the criticalities cannot be incrementally updated on
+     * during the next timing analysis iteration.
+     */
     void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent);
 
-    //Override the criticality of a particular connection
-    void set_criticality(ClusterNetId net, int ipin, float val);
+    ///@brief Override the criticality of a particular connection.
+    void set_criticality(ClusterNetId net, int ipin, float crit_val);
+
+    ///@brief Set `update_enabled` to true.
+    void enable_update() { update_enabled = true; }
+
+    ///@brief Set `update_enabled` to true.
+    void disable_update() { update_enabled = false; }
 
   private: //Data
+    ///@brief The clb netlist in the placement context.
     const ClusteredNetlist& clb_nlist_;
-    const ClusteredPinAtomPinsLookup& pin_lookup_;
 
-    ClbNetPinsMatrix<float> timing_place_crit_; /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */
+    ///@brief The lookup table that maps atom pins to clb pins.
+    const ClusteredPinAtomPinsLookup& pin_lookup_;
 
-    //The criticality exponent when update_criticalites() was last called (used to detect if incremental update can be used)
+    /**
+     * @brief The matrix that stores criticality value for each connection.
+     *
+     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
+     */
+    ClbNetPinsMatrix<float> timing_place_crit_;
+
+    /**
+     * The criticality exponent when update_criticalites() was last called
+     * (used to detect if incremental update can be used).
+     */
     float last_crit_exponent_ = std::numeric_limits<float>::quiet_NaN();
 
-    //Set of pins with criticaltites modified by last call to update_criticalities()
+    ///@brief Set of pins with criticaltites modified by last call to update_criticalities().
     vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_criticality_;
+
+    ///@brief Incremental update. See timing_place.cpp for more.
+    void incr_update_criticalities(const SetupTimingInfo* timing_info);
+
+    ///@brief From scratch update. See timing_place.cpp for more.
+    void recompute_criticalities();
+
+    ///@brief Flag that turns on/off the update_criticalities() routine.
+    bool update_enabled = true;
+
+    /**
+     * @brief Flag that checks if criticalities need to be recomputed for all connections.
+     *
+     * Used by the method update_criticalities(). They incremental update is not possible
+     * if this method wasn't called updated after the previous timing info update.
+     */
+    bool recompute_required = true;
 };
 
-/* Usage
+/**
+ * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection.
+ *
+ * Usage
  * =====
- * PlacerTimingCosts mimics a 2D array of connection timing costs running from:
- *      [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
+ * This class mirrors PlacerCriticalities by both its methods and its members. The only
+ * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo
+ * rather than criticalities. See the documentation on PlacerCriticalities for more.
  *
- * So it can be used similar to:
+ * RAW setup slacks are unlike criticalities. Their values are not confined between
+ * 0 and 1. Their values can be either positive or negative.
+ *
+ * This class also provides iterating over the clustered netlist connections/pins that
+ * have modified setup slacks by the last call to update_setup_slacks(). However, this
+ * utility is mainly used for incrementally committing the setup slack values into the
+ * structure `connection_setup_slack` used by many placer routines.
+ */
+class PlacerSetupSlacks {
+  public: //Types
+    typedef vtr::vec_id_set<ClusterPinId>::iterator pin_iterator;
+    typedef vtr::vec_id_set<ClusterNetId>::iterator net_iterator;
+
+    typedef vtr::Range<pin_iterator> pin_range;
+    typedef vtr::Range<net_iterator> net_range;
+
+  public: //Lifetime
+    PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup);
+    PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete;
+    PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete;
+
+  public: //Accessors
+    ///@brief Returns the setup slack of the specified connection.
+    float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; }
+
+    /**
+     * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds)
+     *        which were modified by the last call to PlacerSetupSlacks::update_setup_slacks().
+     */
+    pin_range pins_with_modified_setup_slack() const;
+
+  public: //Modifiers
+    /**
+     * @brief Updates setup slacks based on the atom netlist setup slacks provided
+     *        by timing_info.
+     *
+     * Should consistently call this method after the most recent timing analysis to
+     * keep the setup slacks stored in this class in sync with the timing analyzer.
+     * If out of sync, then the setup slacks cannot be incrementally updated on
+     * during the next timing analysis iteration.
+     */
+    void update_setup_slacks(const SetupTimingInfo* timing_info);
+
+    ///@brief Override the setup slack of a particular connection.
+    void set_setup_slack(ClusterNetId net, int ipin, float slack_val);
+
+    ///@brief Set `update_enabled` to true.
+    void enable_update() { update_enabled = true; }
+
+    ///@brief Set `update_enabled` to true.
+    void disable_update() { update_enabled = false; }
+
+  private: //Data
+    const ClusteredNetlist& clb_nlist_;
+    const ClusteredPinAtomPinsLookup& pin_lookup_;
+
+    /**
+     * @brief The matrix that stores raw setup slack values for each connection.
+     *
+     * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]
+     */
+    ClbNetPinsMatrix<float> timing_place_setup_slacks_;
+
+    ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks()
+    vtr::vec_id_set<ClusterPinId> cluster_pins_with_modified_setup_slack_;
+
+    ///@brief Incremental update. See timing_place.cpp for more.
+    void incr_update_setup_slacks(const SetupTimingInfo* timing_info);
+
+    ///@brief Incremental update. See timing_place.cpp for more.
+    void recompute_setup_slacks();
+
+    ///@brief Flag that turns on/off the update_setup_slacks() routine.
+    bool update_enabled = true;
+
+    /**
+     * @brief Flag that checks if setup slacks need to be recomputed for all connections.
+     *
+     * Used by the method update_setup_slacks(). They incremental update is not possible
+     * if this method wasn't called updated after the previous timing info update.
+     */
+    bool recompute_required = true;
+};
+
+/**
+ * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from:
+ *        [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1].
+ *
+ * It can be used similar to:
  *
  *      PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct
  *
@@ -99,53 +284,53 @@ class PlacerCriticalities {
  *
  *      //Potentially other modifications...
  *
- *      //Calculate the updated timing cost, of all connections, incrementally based 
- *      //on modifications
+ *      //Calculate the updated timing cost, of all connections,
+ *      //incrementally based on modifications
  *      float total_timing_cost = connection_timing_costs.total_cost();
- *      
+ *
  * However behind the scenes PlacerTimingCosts tracks when connection costs are modified,
  * and efficiently re-calculates the total timing cost incrementally based on the connections
  * which have had their cost modified.
  *
- * Implementaion
- * =============
- * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part 
+ * Implementation
+ * ==============
+ * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part
  * of connection_costs_.  To mimic 2d-array like access PlacerTimingCosts also uses two proxy
  * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy
  * respectively).
  *
  * The first part of connection_costs_ stores intermediate sums of the connection costs for
- * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary 
+ * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary
  * tree, where leaves correspond to individual connection costs and intermediate nodes the
- * partial sums of the connection costs. (The binary tree is stored implicitly in the 
- * connection_costs_  vector, using Eytzinger's/BFS layout.) By summing the entire binary 
+ * partial sums of the connection costs. (The binary tree is stored implicitly in the
+ * connection_costs_  vector, using Eytzinger's/BFS layout.) By summing the entire binary
  * tree we calculate the total timing cost over all connections.
  *
  * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset
  * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up
- * to the root) which have ancestors (leaves) with modified connection costs. When the 
+ * to the root) which have ancestors (leaves) with modified connection costs. When the
  * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost.
- * Only invalidated nodes are traversed, with valid nodes just returning their previously 
+ * Only invalidated nodes are traversed, with valid nodes just returning their previously
  * calculated (and unchanged) value.
  *
- * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can 
+ * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can
  * be done in O(k log K) time.
  *
- * It is important to note that due to limited floating point precision, floating point 
+ * It is important to note that due to limited floating point precision, floating point
  * arithmetic has an order dependence (due to round-off). Using a binary tree to total
- * the timing connection costs allows us to incrementally update the total timign cost while
- * maintianing the *same order of operations* as if it was re-computed from scratch. This 
+ * the timing connection costs allows us to incrementally update the total timing cost while
+ * maintianing the *same order of operations* as if it was re-computed from scratch. This
  * ensures we *always* get consistent results regardless of what/when connections are changed.
  *
  * Proxy Classes
- * -------------
+ * =============
  * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of
  * internal storage of that net's connection costs.
  *
- * ConnectionProxy is returnd by NetProxy's operator[], and holds a reference to a particular 
- * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy 
- * supports assignment, allowing clients to modify the connection cost. It also detects if the 
- * assigned value differs from the previous value and if so, calls PlacerTimingCosts's 
+ * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular
+ * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy
+ * supports assignment, allowing clients to modify the connection cost. It also detects if the
+ * assigned value differs from the previous value and if so, calls PlacerTimingCosts's
  * invalidate() method on that connection cost.
  *
  * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN)
@@ -193,7 +378,9 @@ class PlacerTimingCosts {
         size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1);
 
         VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections");
-        VTR_ASSERT_MSG(num_connections == 0 || num_level_before_leaves < num_connections, "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)");
+        VTR_ASSERT_MSG(
+            num_connections == 0 || num_level_before_leaves < num_connections,
+            "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)");
 
         //We don't need to store all possible leaves if we have fewer connections
         //(i.e. bottom-right of tree is empty)
@@ -213,16 +400,19 @@ class PlacerTimingCosts {
         }
     }
 
-    //Proxy class representing a connection cost
-    // Supports modification of connection cost while detecting changes and
-    // reporting them up to PlacerTimingCosts
+    /**
+     * @brief Proxy class representing a connection cost.
+     *
+     * Supports modification of connection cost while detecting
+     * changes and reporting them up to PlacerTimingCosts.
+     */
     class ConnectionProxy {
       public:
         ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost)
             : timing_costs_(timing_costs)
             , connection_cost_(connection_cost) {}
 
-        //Allow clients to modify the connection cost via assignment
+        ///@brief Allow clients to modify the connection cost via assignment.
         ConnectionProxy& operator=(double new_cost) {
             if (new_cost != connection_cost_) {
                 //If connection cost changed, update it, and mark it
@@ -233,9 +423,11 @@ class PlacerTimingCosts {
             return *this;
         }
 
-        //Support getting the current connection cost as a double
-        // Useful for client code operating on the cost values (e.g.
-        // difference between costs)
+        /**
+         * @brief Support getting the current connection cost as a double.
+         *
+         * Useful for client code operating on the cost values (e.g. difference between costs).
+         */
         operator double() {
             return connection_cost_;
         }
@@ -245,15 +437,18 @@ class PlacerTimingCosts {
         double& connection_cost_;
     };
 
-    //Proxy class representing the connection costs of a net
-    // Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection
+    /**
+     * @brief Proxy class representing the connection costs of a net.
+     *
+     * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection.
+     */
     class NetProxy {
       public:
         NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs)
             : timing_costs_(timing_costs)
             , net_sink_costs_(net_sink_costs) {}
 
-        //Indexes into the specific net pin/connection
+        ///@brief Indexes into the specific net pin/connection.
         ConnectionProxy operator[](size_t ipin) {
             return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]);
         }
@@ -263,7 +458,7 @@ class PlacerTimingCosts {
         double* net_sink_costs_;
     };
 
-    //Indexes into the specific net
+    ///@brief Indexes into the specific net.
     NetProxy operator[](ClusterNetId net_id) {
         VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0);
 
@@ -282,8 +477,10 @@ class PlacerTimingCosts {
         std::swap(num_levels_, other.num_levels_);
     }
 
-    //Calculates the total cost of all connections efficiently
-    //in the face of modified connection costs
+    /**
+     * @brief Calculates the total cost of all connections efficiently
+     *        in the face of modified connection costs.
+     */
     double total_cost() {
         float cost = total_cost_recurr(0); //Root
 
@@ -294,7 +491,7 @@ class PlacerTimingCosts {
     }
 
   private:
-    //Recursively calculate and update the timing cost rooted at inode
+    ///@brief Recursively calculate and update the timing cost rooted at inode.
     double total_cost_recurr(size_t inode) {
         //Prune out-of-tree
         if (inode > connection_costs_.size() - 1) {
@@ -329,12 +526,18 @@ class PlacerTimingCosts {
         return node_cost;
     }
 
-    friend ConnectionProxy; //So it can call invalidate()
+    ///@brief Friend-ed so it can call invalidate().
+    friend ConnectionProxy;
 
     void invalidate(double* invalidated_cost) {
         //Check pointer within range of internal storage
-        VTR_ASSERT_SAFE_MSG(invalidated_cost >= &connection_costs_[0], "Connection cost pointer should be after start of internal storage");
-        VTR_ASSERT_SAFE_MSG(invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], "Connection cost pointer should be before end of internal storage");
+        VTR_ASSERT_SAFE_MSG(
+            invalidated_cost >= &connection_costs_[0],
+            "Connection cost pointer should be after start of internal storage");
+
+        VTR_ASSERT_SAFE_MSG(
+            invalidated_cost <= &connection_costs_[connection_costs_.size() - 1],
+            "Connection cost pointer should be before end of internal storage");
 
         size_t icost = invalidated_cost - &connection_costs_[0];
 
@@ -343,7 +546,7 @@ class PlacerTimingCosts {
         //Invalidate parent intermediate costs up to root or first
         //already-invalidated parent
         size_t iparent = parent(icost);
-        ;
+
         while (!std::isnan(connection_costs_[iparent])) {
             //Invalidate
             connection_costs_[iparent] = std::numeric_limits<double>::quiet_NaN();
@@ -371,33 +574,41 @@ class PlacerTimingCosts {
         return (i - 1) / 2;
     }
 
-    //Returns the number of nodes in ilevel'th level
-    //If ilevel is negative, return 0, since the root shouldn't be counted
-    //as a leaf node candidate
+    /**
+     * @brief Returns the number of nodes in ilevel'th level.
+     *
+     * If ilevel is negative, return 0, since the root shouldn't
+     * be counted as a leaf node candidate.
+     */
     size_t num_nodes_in_level(int ilevel) const {
         return ilevel < 0 ? 0 : (2 << (ilevel));
     }
 
-    //Returns the total number of nodes in levels [0..ilevel] (inclusive)
+    ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive).
     size_t num_nodes_up_to_level(int ilevel) const {
         return (2 << (ilevel + 1)) - 1;
     }
 
   private:
-    //Vector storing the implicit binary tree of connection costs
-    // The actual connections are stored at the end of the vector
-    // (last level of the binary tree). The earlier portions of
-    // the tree are the intermediate nodes.
-    //
-    // The methods left_child()/right_child()/parent() can be used
-    // to traverse the tree by indicies into this vector
+    /**
+     * @brief Vector storing the implicit binary tree of connection costs.
+     *
+     * The actual connections are stored at the end of the vector
+     * (last level of the binary tree). The earlier portions of
+     * the tree are the intermediate nodes.
+     *
+     * The methods left_child()/right_child()/parent() can be used
+     * to traverse the tree by indicies into this vector.
+     */
     std::vector<double> connection_costs_;
 
-    //Vector storing the indicies of the first connection for
-    //each net in the netlist, used for indexing by net.
+    /**
+     * @brief Vector storing the indicies of the first connection
+     *        for each net in the netlist, used for indexing by net.
+     */
     vtr::vector<ClusterNetId, int> net_start_indicies_;
 
-    //Number of levels in the binary tree
+    ///@brief Number of levels in the binary tree.
     size_t num_levels_ = 0;
 };
 
diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp
index 6dd2c06d249..d1da2fbc164 100644
--- a/vpr/src/timing/timing_util.cpp
+++ b/vpr/src/timing/timing_util.cpp
@@ -564,10 +564,13 @@ std::map<tatum::DomainId, size_t> count_clock_fanouts(const tatum::TimingGraph&
 }
 
 /*
- * Slack and criticality calculation utilities
+ * Criticalities and setup slacks calculation utilities
  */
 
-//Return the criticality of a net's pin in the CLB netlist
+/**
+ * @brief Returns the criticality of a net's pin in the CLB netlist.
+ *        Assumes that the timing graph is correct and up to date.
+ */
 float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) {
     //There may be multiple atom netlist pins connected to this CLB pin
     float clb_pin_crit = 0.;
@@ -579,6 +582,21 @@ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, cons
     return clb_pin_crit;
 }
 
+/**
+ * @brief Returns the raw setup slack of a net's pin in the CLB netlist.
+ *        Assumes that the timing graph is correct and up to date.
+ */
+float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) {
+    //There may be multiple atom netlist pins connected to this CLB pin
+    float clb_pin_setup_slack = std::numeric_limits<float>::infinity();
+    for (const auto atom_pin : pin_lookup.connected_atom_pins(clb_pin)) {
+        //Take the worst/minimum of the atom pin slack as the CLB pin slack
+        clb_pin_setup_slack = std::min(clb_pin_setup_slack, timing_info.setup_pin_slack(atom_pin));
+    }
+
+    return clb_pin_setup_slack;
+}
+
 //Returns the worst (maximum) criticality of the set of slack tags specified. Requires the maximum
 //required time and worst slack for all domain pairs represent by the slack tags
 //
diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h
index 87f6b86787b..682771e9763 100644
--- a/vpr/src/timing/timing_util.h
+++ b/vpr/src/timing/timing_util.h
@@ -183,6 +183,9 @@ class ClusteredPinTimingInvalidator {
 //Return the criticality of a net's pin in the CLB netlist
 float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin);
 
+//Return the setup slack of a net's pin in the CLB netlist
+float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin);
+
 //Returns the worst (maximum) criticality of the set of slack tags specified. Requires the maximum
 //required time and worst slack for all domain pairs represent by the slack tags
 //
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/config.txt
new file mode 100644
index 00000000000..c61444daf19
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/config.txt
@@ -0,0 +1,27 @@
+##############################################
+# Configuration file for running experiments
+##############################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/verilog
+
+# Path to directory of architectures to use
+archs_dir=arch/timing
+
+# Add circuits to list to sweep
+circuit_list_add=stereovision3.v
+
+# Add architectures to list to sweep
+arch_list_add=k6_N10_mem32K_40nm.xml
+
+# Parse info and how to parse
+parse_file=vpr_standard.txt
+
+# How to parse QoR info
+qor_parse_file=qor_standard.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements.txt
+
+# Script parameters
+script_params = --place_quench_algorithm slack_timing
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt
new file mode 100644
index 00000000000..5053a6f6894
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt
@@ -0,0 +1,2 @@
+arch                  	circuit        	script_params	vtr_flow_elapsed_time	error	odin_synth_time	max_odin_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision                	vpr_build_info                	vpr_compiler                               	vpr_compiled       	hostname                           	rundir                                                                                                                                                                    	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_time	placed_wirelength_est	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	crit_path_route_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time
+k6_N10_mem32K_40nm.xml	stereovision3.v	common       	2.19                 	     	0.07           	9296        	4        	0.16          	-1          	-1          	32824      	-1      	-1         	19     	11    	0           	0       	success   	v8.0.0-2579-g270d1efd9-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-60-generic x86_64	2020-09-04T06:15:46	betzgrp-wintermute.eecg.utoronto.ca	/home/hubingra/master/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/run003/k6_N10_mem32K_40nm.xml/stereovision3.v/common	28964      	11                	30                 	262                	292                  	2                 	104                 	60                    	7           	7            	49               	clb                      	auto       	0.05     	453                  	0.24      	0.13             	2.18141       	-165.789            	-2.18141            	2.0954                                                       	0.12497                          	0.10019              	0.156789                        	0.124805            	26            	608              	25                                    	1.07788e+06           	1.02399e+06          	65453.8                          	1335.79                             	0.27                     	0.252669                                 	0.202403                     	608                        	25                               	973                        	2367                              	87670                      	24993                    	2.53264            	2.50992                                           	-189.166 	-2.53264 	0       	0       	80140.9                     	1635.53                        	0.03                	0.0187426                           	0.0157532               
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt
index 5cf098b2f77..e59cabff1c1 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt
@@ -70,3 +70,4 @@ regression_tests/vtr_reg_strong/strong_timing_update_type
 regression_tests/vtr_reg_strong/strong_timing_update_diff
 regression_tests/vtr_reg_strong/strong_blocks_with_no_inputs
 regression_tests/vtr_reg_strong/strong_fix_clusters
+regression_tests/vtr_reg_strong/strong_place_quench_slack