diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp
index 78150b70305..6981908c479 100644
--- a/vpr/src/analytical_place/full_legalizer.cpp
+++ b/vpr/src/analytical_place/full_legalizer.cpp
@@ -58,7 +58,7 @@ std::unique_ptr<FullLegalizer> make_full_legalizer(e_ap_full_legalizer full_lega
                                                    const APNetlist& ap_netlist,
                                                    const AtomNetlist& atom_netlist,
                                                    const Prepacker& prepacker,
-                                                   t_vpr_setup& vpr_setup,
+                                                   const t_vpr_setup& vpr_setup,
                                                    const t_arch& arch,
                                                    const DeviceGrid& device_grid) {
     switch (full_legalizer_type) {
@@ -513,8 +513,8 @@ void APPack::legalize(const PartialPlacement& p_placement) {
     }
 
     // Run the Packer stage with the flat placement as a hint.
-    try_pack(&vpr_setup_.PackerOpts,
-             &vpr_setup_.AnalysisOpts,
+    try_pack(vpr_setup_.PackerOpts,
+             vpr_setup_.AnalysisOpts,
              arch_,
              vpr_setup_.RoutingArch,
              vpr_setup_.PackerRRGraph,
diff --git a/vpr/src/analytical_place/full_legalizer.h b/vpr/src/analytical_place/full_legalizer.h
index 62c42d1b722..3532022760d 100644
--- a/vpr/src/analytical_place/full_legalizer.h
+++ b/vpr/src/analytical_place/full_legalizer.h
@@ -37,7 +37,7 @@ class FullLegalizer {
     FullLegalizer(const APNetlist& ap_netlist,
                   const AtomNetlist& atom_netlist,
                   const Prepacker& prepacker,
-                  t_vpr_setup& vpr_setup,
+                  const t_vpr_setup& vpr_setup,
                   const t_arch& arch,
                   const DeviceGrid& device_grid)
         : ap_netlist_(ap_netlist)
@@ -68,7 +68,7 @@ class FullLegalizer {
 
     /// @brief The VPR setup options passed into the VPR flow. This must be
     ///        mutable since some parts of packing modify the options.
-    t_vpr_setup& vpr_setup_;
+    const t_vpr_setup& vpr_setup_;
 
     /// @brief Information on the architecture of the FPGA.
     const t_arch& arch_;
@@ -84,7 +84,7 @@ std::unique_ptr<FullLegalizer> make_full_legalizer(e_ap_full_legalizer full_lega
                                                    const APNetlist& ap_netlist,
                                                    const AtomNetlist& atom_netlist,
                                                    const Prepacker& prepacker,
-                                                   t_vpr_setup& vpr_setup,
+                                                   const t_vpr_setup& vpr_setup,
                                                    const t_arch& arch,
                                                    const DeviceGrid& device_grid);
 
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index eb9af5943ad..fe046fd932e 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -591,10 +591,6 @@ void SetupPackerOpts(const t_options& Options,
     PackerOpts->feasible_block_array_size = Options.pack_feasible_block_array_size;
     PackerOpts->use_attraction_groups = Options.use_attraction_groups;
 
-    //TODO: document?
-    PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */
-    PackerOpts->auto_compute_inter_cluster_net_delay = true;
-
     PackerOpts->device_layout = Options.device_layout;
 
     PackerOpts->timing_update_type = Options.timing_update_type;
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index b1de3da9729..712fa5619c1 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -757,7 +757,6 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) {
     }
     VTR_LOG("PackerOpts.connection_driven: %s", (PackerOpts.connection_driven ? "true\n" : "false\n"));
     VTR_LOG("PackerOpts.global_clocks: %s", (PackerOpts.global_clocks ? "true\n" : "false\n"));
-    VTR_LOG("PackerOpts.inter_cluster_net_delay: %f\n", PackerOpts.inter_cluster_net_delay);
     VTR_LOG("PackerOpts.timing_driven: %s", (PackerOpts.timing_driven ? "true\n" : "false\n"));
     VTR_LOG("PackerOpts.target_external_pin_util: %s", vtr::join(PackerOpts.target_external_pin_util, " ").c_str());
     VTR_LOG("\n");
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index afd4c211160..3e2c0fd4f48 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -620,7 +620,7 @@ bool vpr_pack(t_vpr_setup& vpr_setup, const t_arch& arch) {
     const Prepacker prepacker(g_vpr_ctx.atom().netlist(),
                               g_vpr_ctx.device().logical_block_types);
 
-    return try_pack(&vpr_setup.PackerOpts, &vpr_setup.AnalysisOpts,
+    return try_pack(vpr_setup.PackerOpts, vpr_setup.AnalysisOpts,
                     arch, vpr_setup.RoutingArch,
                     vpr_setup.PackerRRGraph,
                     prepacker,
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 56e68526277..78f28407612 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -717,9 +717,7 @@ struct t_packer_opts {
     enum e_cluster_seed cluster_seed_type;
     float alpha;
     float beta;
-    float inter_cluster_net_delay;
     float target_device_utilization;
-    bool auto_compute_inter_cluster_net_delay;
     e_unrelated_clustering allow_unrelated_clustering;
     bool connection_driven;
     int pack_verbosity;
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 0a5dcd88577..dd307168a36 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -2,17 +2,13 @@
 #include <algorithm>
 #include <unordered_set>
 
-#include "PreClusterTimingGraphResolver.h"
-#include "PreClusterDelayCalculator.h"
 #include "atom_netlist.h"
 #include "attraction_groups.h"
 #include "cluster_legalizer.h"
 #include "clustered_netlist.h"
-#include "concrete_timing_info.h"
+#include "globals.h"
 #include "output_clustering.h"
 #include "prepack.h"
-#include "tatum/TimingReporter.hpp"
-#include "tatum/echo_writer.hpp"
 #include "vpr_context.h"
 
 /*Print the contents of each cluster to an echo file*/
@@ -67,58 +63,6 @@ static void echo_clusters(char* filename, const ClusterLegalizer& cluster_legali
     fclose(fp);
 }
 
-void calc_init_packing_timing(const t_packer_opts& packer_opts,
-                              const t_analysis_opts& analysis_opts,
-                              const Prepacker& prepacker,
-                              std::shared_ptr<PreClusterDelayCalculator>& clustering_delay_calc,
-                              std::shared_ptr<SetupTimingInfo>& timing_info,
-                              vtr::vector<AtomBlockId, float>& atom_criticality) {
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-
-    /*
-     * Initialize the timing analyzer
-     */
-    clustering_delay_calc = std::make_shared<PreClusterDelayCalculator>(atom_ctx.netlist(), atom_ctx.lookup(), packer_opts.inter_cluster_net_delay, prepacker);
-    timing_info = make_setup_timing_info(clustering_delay_calc, packer_opts.timing_update_type);
-
-    //Calculate the initial timing
-    timing_info->update();
-
-    if (isEchoFileEnabled(E_ECHO_PRE_PACKING_TIMING_GRAPH)) {
-        auto& timing_ctx = g_vpr_ctx.timing();
-        tatum::write_echo(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH),
-                          *timing_ctx.graph, *timing_ctx.constraints, *clustering_delay_calc, timing_info->analyzer());
-
-        tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
-        write_setup_timing_graph_dot(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH) + std::string(".dot"),
-                                     *timing_info, debug_tnode);
-    }
-
-    {
-        auto& timing_ctx = g_vpr_ctx.timing();
-        PreClusterTimingGraphResolver resolver(atom_ctx.netlist(),
-                                               atom_ctx.lookup(), *timing_ctx.graph, *clustering_delay_calc);
-        resolver.set_detail_level(analysis_opts.timing_report_detail);
-
-        tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph,
-                                              *timing_ctx.constraints);
-
-        timing_reporter.report_timing_setup(
-            "pre_pack.report_timing.setup.rpt",
-            *timing_info->setup_analyzer(),
-            analysis_opts.timing_report_npaths);
-    }
-
-    //Calculate true criticalities of each block
-    for (AtomBlockId blk : atom_ctx.netlist().blocks()) {
-        for (AtomPinId in_pin : atom_ctx.netlist().block_input_pins(blk)) {
-            //Max criticality over incoming nets
-            float crit = timing_info->setup_pin_criticality(in_pin);
-            atom_criticality[blk] = std::max(atom_criticality[blk], crit);
-        }
-    }
-}
-
 void check_and_output_clustering(ClusterLegalizer& cluster_legalizer,
                                  const t_packer_opts& packer_opts,
                                  const std::unordered_set<AtomNetId>& is_clock,
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 8f74ed9c91a..4f4c2b5bec8 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -11,10 +11,6 @@ class AttractionInfo;
 class ClusterBlockId;
 class ClusterLegalizer;
 class ClusteredNetlist;
-class PreClusterDelayCalculator;
-class Prepacker;
-class SetupTimingInfo;
-class t_pack_molecule;
 struct AtomContext;
 
 /**
@@ -26,16 +22,6 @@ struct AtomContext;
 /*   Clustering helper functions   */
 /***********************************/
 
-/*
- * @brief Calculate the initial timing at the start of packing stage.
- */
-void calc_init_packing_timing(const t_packer_opts& packer_opts,
-                              const t_analysis_opts& analysis_opts,
-                              const Prepacker& prepacker,
-                              std::shared_ptr<PreClusterDelayCalculator>& clustering_delay_calc,
-                              std::shared_ptr<SetupTimingInfo>& timing_info,
-                              vtr::vector<AtomBlockId, float>& atom_criticality);
-
 /*
  * @brief Check clustering legality and output it.
  */
diff --git a/vpr/src/pack/greedy_candidate_selector.cpp b/vpr/src/pack/greedy_candidate_selector.cpp
index 26a0f7f2ec3..b202035ec59 100644
--- a/vpr/src/pack/greedy_candidate_selector.cpp
+++ b/vpr/src/pack/greedy_candidate_selector.cpp
@@ -10,6 +10,7 @@
 #include <cmath>
 #include <queue>
 #include <vector>
+#include "PreClusterTimingManager.h"
 #include "appack_context.h"
 #include "flat_placement_types.h"
 #include "flat_placement_utils.h"
@@ -90,7 +91,7 @@ GreedyCandidateSelector::GreedyCandidateSelector(
     const std::unordered_set<AtomNetId>& is_clock,
     const std::unordered_set<AtomNetId>& is_global,
     const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input,
-    const SetupTimingInfo& timing_info,
+    const PreClusterTimingManager& pre_cluster_timing_manager,
     const APPackContext& appack_ctx,
     int log_verbosity)
     : atom_netlist_(atom_netlist)
@@ -103,7 +104,7 @@ GreedyCandidateSelector::GreedyCandidateSelector(
     , is_clock_(is_clock)
     , is_global_(is_global)
     , net_output_feeds_driving_block_input_(net_output_feeds_driving_block_input)
-    , timing_info_(timing_info)
+    , pre_cluster_timing_manager_(pre_cluster_timing_manager)
     , appack_ctx_(appack_ctx)
     , rng_(0) {
 
@@ -544,12 +545,15 @@ void GreedyCandidateSelector::update_timing_gain_values(
     if (net_output_feeds_driving_block_input_.count(net_id) != 0)
         pins = atom_netlist_.net_sinks(net_id);
 
+    // Get the setup timing info used to compute timing gain terms.
+    const SetupTimingInfo& timing_info = pre_cluster_timing_manager_.get_timing_info();
+
     if (net_relation_to_clustered_block == e_net_relation_to_clustered_block::OUTPUT
         && !is_global_.count(net_id)) {
         for (AtomPinId pin_id : pins) {
             AtomBlockId blk_id = atom_netlist_.pin_block(pin_id);
             if (!cluster_legalizer.is_atom_clustered(blk_id)) {
-                double timing_gain = timing_info_.setup_pin_criticality(pin_id);
+                double timing_gain = timing_info.setup_pin_criticality(pin_id);
 
                 if (cluster_gain_stats.timing_gain.count(blk_id) == 0) {
                     cluster_gain_stats.timing_gain[blk_id] = 0;
@@ -569,7 +573,7 @@ void GreedyCandidateSelector::update_timing_gain_values(
 
         if (!cluster_legalizer.is_atom_clustered(new_blk_id)) {
             for (AtomPinId pin_id : atom_netlist_.net_sinks(net_id)) {
-                double timing_gain = timing_info_.setup_pin_criticality(pin_id);
+                double timing_gain = timing_info.setup_pin_criticality(pin_id);
 
                 if (cluster_gain_stats.timing_gain.count(new_blk_id) == 0) {
                     cluster_gain_stats.timing_gain[new_blk_id] = 0;
diff --git a/vpr/src/pack/greedy_candidate_selector.h b/vpr/src/pack/greedy_candidate_selector.h
index 89931662a54..2b3eb23a1f5 100644
--- a/vpr/src/pack/greedy_candidate_selector.h
+++ b/vpr/src/pack/greedy_candidate_selector.h
@@ -26,8 +26,8 @@
 class AtomNetlist;
 class AttractionInfo;
 class FlatPlacementInfo;
+class PreClusterTimingManager;
 class Prepacker;
-class SetupTimingInfo;
 class t_pack_high_fanout_thresholds;
 struct t_model;
 struct t_molecule_stats;
@@ -225,9 +225,10 @@ class GreedyCandidateSelector {
      *              The set of nets whose output feeds the block that drives
      *              itself. This may cause double-counting in the gain
      *              calculations and needs special handling.
-     *  @param timing_info
-     *              Setup timing info for this Atom Netlist. Used to incorporate
-     *              timing / criticality into the gain calculation.
+     *  @param pre_cluster_timing_manager
+     *              Timing manager that holds the information on timing of
+     *              different connections in the circuit. Used for computing
+     *              the timing gain terms.
      *  @param appack_ctx
      *              The APPack context which contains options for the flat
      *              placement guided packing.
@@ -244,7 +245,7 @@ class GreedyCandidateSelector {
                             const std::unordered_set<AtomNetId>& is_clock,
                             const std::unordered_set<AtomNetId>& is_global,
                             const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input,
-                            const SetupTimingInfo& timing_info,
+                            const PreClusterTimingManager& pre_cluster_timing_manager,
                             const APPackContext& appack_ctx,
                             int log_verbosity);
 
@@ -565,8 +566,9 @@ class GreedyCandidateSelector {
     ///        drive them.
     const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input_;
 
-    /// @brief Setup timing info used to help select critical candidates to pack.
-    const SetupTimingInfo& timing_info_;
+    /// @brief The pre-clustering timing manager which holds the timing information
+    ///        of the primitive netlist.
+    const PreClusterTimingManager& pre_cluster_timing_manager_;
 
     /// @brief Inter-block nets within a finalized cluster. Used for finding
     ///        transitive candidates.
diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp
index 6312c8be79c..7673005af93 100644
--- a/vpr/src/pack/greedy_clusterer.cpp
+++ b/vpr/src/pack/greedy_clusterer.cpp
@@ -79,6 +79,7 @@ GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts,
                                  const t_pack_high_fanout_thresholds& high_fanout_thresholds,
                                  const std::unordered_set<AtomNetId>& is_clock,
                                  const std::unordered_set<AtomNetId>& is_global,
+                                 const PreClusterTimingManager& pre_cluster_timing_manager,
                                  const APPackContext& appack_ctx)
     : packer_opts_(packer_opts)
     , analysis_opts_(analysis_opts)
@@ -87,6 +88,7 @@ GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts,
     , high_fanout_thresholds_(high_fanout_thresholds)
     , is_clock_(is_clock)
     , is_global_(is_global)
+    , pre_cluster_timing_manager_(pre_cluster_timing_manager)
     , appack_ctx_(appack_ctx)
     , primitive_candidate_block_types_(identify_primitive_candidate_block_types())
     , log_verbosity_(packer_opts.pack_verbosity)
@@ -113,18 +115,6 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
     t_cluster_progress_stats clustering_stats;
     clustering_stats.num_molecules = prepacker.molecules().size();
 
-    // TODO: Create a ClusteringTimingManager class.
-    //       This code relies on the prepacker, once the prepacker is moved to
-    //       the constructor, this code can also move to the constructor.
-    std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
-    std::shared_ptr<SetupTimingInfo> timing_info;
-    // Default criticalities set to zero (e.g. if not timing driven)
-    vtr::vector<AtomBlockId, float> atom_criticality(atom_netlist_.blocks().size(), 0.f);
-    if (packer_opts_.timing_driven) {
-        calc_init_packing_timing(packer_opts_, analysis_opts_, prepacker,
-                                 clustering_delay_calc, timing_info, atom_criticality);
-    }
-
     // Calculate the max molecule stats, which is used for gain calculation.
     const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_);
 
@@ -140,7 +130,7 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
                                                is_clock_,
                                                is_global_,
                                                net_output_feeds_driving_block_input_,
-                                               *timing_info,
+                                               pre_cluster_timing_manager_,
                                                appack_ctx_,
                                                log_verbosity_);
 
@@ -149,7 +139,7 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
                                      prepacker,
                                      packer_opts_.cluster_seed_type,
                                      max_molecule_stats,
-                                     atom_criticality);
+                                     pre_cluster_timing_manager_);
 
     // Pick the first seed molecule.
     PackMoleculeId seed_mol_id = seed_selector.get_next_seed(prepacker,
diff --git a/vpr/src/pack/greedy_clusterer.h b/vpr/src/pack/greedy_clusterer.h
index e246d9c679d..4c805ffa594 100644
--- a/vpr/src/pack/greedy_clusterer.h
+++ b/vpr/src/pack/greedy_clusterer.h
@@ -22,7 +22,7 @@ class AtomNetlist;
 class AttractionInfo;
 class DeviceContext;
 class GreedyCandidateSelector;
-class SetupTimingInfo;
+class PreClusterTimingManager;
 class t_pack_high_fanout_thresholds;
 struct t_analysis_opts;
 struct t_clustering_data;
@@ -76,6 +76,11 @@ class GreedyClusterer {
      *              The set of global nets in the Atom Netlist. These will be
      *              routed on special dedicated networks, and hence are less
      *              relavent to locality / attraction.
+     *  @param pre_cluster_timing_manager
+     *              Timing manager class which holds the timing information of
+     *              the primitive netlist. Used by the seed selector to select
+     *              critical seeds and the candidate selector to select
+     *              timing critical candidates.
      *  @param appack_ctx
      *              The APPack state. This contains the options used to
      *              configure APPack and the flat placement.
@@ -87,6 +92,7 @@ class GreedyClusterer {
                     const t_pack_high_fanout_thresholds& high_fanout_thresholds,
                     const std::unordered_set<AtomNetId>& is_clock,
                     const std::unordered_set<AtomNetId>& is_global,
+                    const PreClusterTimingManager& pre_cluster_timing_manager,
                     const APPackContext& appack_ctx);
 
     /**
@@ -233,6 +239,9 @@ class GreedyClusterer {
     /// @brief A set of atom nets which are considered as global nets.
     const std::unordered_set<AtomNetId>& is_global_;
 
+    /// @brief Timing manager class which holds the primitive-level timing information.
+    const PreClusterTimingManager& pre_cluster_timing_manager_;
+
     /// @brief The APPack state. This is used by the candidate selector to try
     ///        and propose better candidates based on a flat placement.
     const APPackContext& appack_ctx_;
diff --git a/vpr/src/pack/greedy_seed_selector.cpp b/vpr/src/pack/greedy_seed_selector.cpp
index 592ddced59d..c9d1b9397c5 100644
--- a/vpr/src/pack/greedy_seed_selector.cpp
+++ b/vpr/src/pack/greedy_seed_selector.cpp
@@ -9,6 +9,7 @@
 
 #include <algorithm>
 #include <cmath>
+#include "PreClusterTimingManager.h"
 #include "flat_placement_types.h"
 #include "atom_netlist.h"
 #include "cluster_legalizer.h"
@@ -167,10 +168,21 @@ GreedySeedSelector::GreedySeedSelector(const AtomNetlist& atom_netlist,
                                        const Prepacker& prepacker,
                                        const e_cluster_seed seed_type,
                                        const t_molecule_stats& max_molecule_stats,
-                                       const vtr::vector<AtomBlockId, float>& atom_criticality)
+                                       const PreClusterTimingManager& pre_cluster_timing_manager)
     : seed_atoms_(atom_netlist.blocks().begin(), atom_netlist.blocks().end()) {
     // Seed atoms list is initialized with all atoms in the atom netlist.
 
+    // Pre-compute the criticality of each atom
+    // Default criticalities set to zero (e.g. if not timing driven)
+    vtr::vector<AtomBlockId, float> atom_criticality(atom_netlist.blocks().size(), 0.0f);
+    if (pre_cluster_timing_manager.is_valid()) {
+        // If the timing manager is valid (meaning the packing is timing driven)
+        // compute the criticality of each atom.
+        for (AtomBlockId atom_blk_id : atom_netlist.blocks()) {
+            atom_criticality[atom_blk_id] = pre_cluster_timing_manager.calc_atom_setup_criticality(atom_blk_id, atom_netlist);
+        }
+    }
+
     // Maintain a lookup table of the seed gain for each atom. This will be
     // used to sort the seed atoms.
     // Initially all gains are zero.
diff --git a/vpr/src/pack/greedy_seed_selector.h b/vpr/src/pack/greedy_seed_selector.h
index 16bbbc7cf19..5f152f65236 100644
--- a/vpr/src/pack/greedy_seed_selector.h
+++ b/vpr/src/pack/greedy_seed_selector.h
@@ -14,6 +14,7 @@
 // Forward declarations
 class AtomNetlist;
 class ClusterLegalizer;
+class PreClusterTimingManager;
 struct t_molecule_stats;
 
 /**
@@ -44,14 +45,15 @@ class GreedySeedSelector {
      *  @param max_molecule_stats
      *              The maximum stats over all molecules. Used for normalizing
      *              terms in the gain.
-     *  @param atom_criticality
-     *              The timing criticality of each atom.
+     *  @param pre_cluster_timing_manager
+     *              Timing manager class for the primitive netlist. Used to
+     *              compute the criticalities of seeds.
      */
     GreedySeedSelector(const AtomNetlist& atom_netlist,
                        const Prepacker& prepacker,
                        const e_cluster_seed seed_type,
                        const t_molecule_stats& max_molecule_stats,
-                       const vtr::vector<AtomBlockId, float>& atom_criticality);
+                       const PreClusterTimingManager& pre_cluster_timing_manager);
 
     /**
      * @brief Propose a new seed molecule to start a new cluster with. If no
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 5f4f2849b52..ae1cde8244d 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -2,23 +2,23 @@
 #include "pack.h"
 
 #include <unordered_set>
-#include "appack_context.h"
-#include "flat_placement_types.h"
+#include "PreClusterTimingManager.h"
 #include "SetupGrid.h"
+#include "appack_context.h"
 #include "attraction_groups.h"
 #include "cluster_legalizer.h"
 #include "cluster_util.h"
 #include "constraints_report.h"
+#include "flat_placement_types.h"
 #include "globals.h"
 #include "greedy_clusterer.h"
 #include "partition_region.h"
-#include "physical_types_util.h"
 #include "prepack.h"
+#include "stats.h"
 #include "verify_flat_placement.h"
 #include "vpr_context.h"
 #include "vpr_error.h"
 #include "vpr_types.h"
-#include "stats.h"
 #include "vtr_assert.h"
 #include "vtr_log.h"
 
@@ -27,32 +27,8 @@ static bool try_size_device_grid(const t_arch& arch,
                                  float target_device_utilization,
                                  const std::string& device_layout_name);
 
-/**
- * Since the parameters of a switch may change as a function of its fanin,
- * to get an estimation of inter-cluster delays we need a reasonable estimation
- * of the fan-ins of switches that connect clusters together. These switches are
- * 1) opin to wire switch
- * 2) wire to wire switch
- * 3) wire to ipin switch
- * We can estimate the fan-in of these switches based on the Fc_in/Fc_out of
- * a logic block, and the switch block Fs value
- */
-static void get_intercluster_switch_fanin_estimates(const t_arch& arch,
-                                                    const t_det_routing_arch& routing_arch,
-                                                    const std::string& device_layout,
-                                                    const int wire_segment_length,
-                                                    int* opin_switch_fanin,
-                                                    int* wire_switch_fanin,
-                                                    int* ipin_switch_fanin);
-
-static float get_arch_switch_info(short switch_index, int switch_fanin, float& Tdel_switch, float& R_switch, float& Cout_switch);
-
-static float approximate_inter_cluster_delay(const t_arch& arch,
-                                             const t_det_routing_arch& routing_arch,
-                                             const std::string& device_layout);
-
-bool try_pack(t_packer_opts* packer_opts,
-              const t_analysis_opts* analysis_opts,
+bool try_pack(const t_packer_opts& packer_opts,
+              const t_analysis_opts& analysis_opts,
               const t_arch& arch,
               const t_det_routing_arch& routing_arch,
               std::vector<t_lb_type_rr_node>* lb_type_rr_graphs,
@@ -65,7 +41,7 @@ bool try_pack(t_packer_opts* packer_opts,
     DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device();
 
     std::unordered_set<AtomNetId> is_clock, is_global;
-    VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());
+    VTR_LOG("Begin packing '%s'.\n", packer_opts.circuit_file_name.c_str());
 
     is_clock = alloc_and_load_is_clock();
     is_global.insert(is_clock.begin(), is_clock.end());
@@ -91,6 +67,17 @@ bool try_pack(t_packer_opts* packer_opts,
      */
     AttractionInfo attraction_groups(false);
 
+    // Setup pre-clustering timing analysis
+    PreClusterTimingManager pre_cluster_timing_manager(packer_opts.timing_driven,
+                                                       atom_ctx.netlist(),
+                                                       atom_ctx.lookup(),
+                                                       prepacker,
+                                                       packer_opts.timing_update_type,
+                                                       arch,
+                                                       routing_arch,
+                                                       packer_opts.device_layout,
+                                                       analysis_opts);
+
     // We keep track of the overfilled partition regions from all pack iterations in
     // this vector. This is so that if the first iteration fails due to overfilled
     // partition regions, and it fails again, we can carry over the previous failed
@@ -115,34 +102,23 @@ bool try_pack(t_packer_opts* packer_opts,
         }
     }
 
-    if (packer_opts->auto_compute_inter_cluster_net_delay) {
-        float interc_delay = UNDEFINED;
-        if (packer_opts->timing_driven) {
-            interc_delay = approximate_inter_cluster_delay(arch,
-                                                           routing_arch,
-                                                           packer_opts->device_layout);
-        }
-        packer_opts->inter_cluster_net_delay = interc_delay;
-        VTR_LOG("Using inter-cluster delay: %g\n", packer_opts->inter_cluster_net_delay);
-    }
-
     // During clustering, a block is related to un-clustered primitives with nets.
     // This relation has three types: low fanout, high fanout, and transitive
     // high_fanout_thresholds stores the threshold for nets to a block type to
     // be considered high fanout.
-    t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts->high_fanout_threshold);
+    t_pack_high_fanout_thresholds high_fanout_thresholds(packer_opts.high_fanout_threshold);
 
     bool allow_unrelated_clustering = false;
-    if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::ON) {
+    if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::ON) {
         allow_unrelated_clustering = true;
-    } else if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::OFF) {
+    } else if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::OFF) {
         allow_unrelated_clustering = false;
     }
 
     bool balance_block_type_util = false;
-    if (packer_opts->balance_block_type_utilization == e_balance_block_type_util::ON) {
+    if (packer_opts.balance_block_type_utilization == e_balance_block_type_util::ON) {
         balance_block_type_util = true;
-    } else if (packer_opts->balance_block_type_utilization == e_balance_block_type_util::OFF) {
+    } else if (packer_opts.balance_block_type_utilization == e_balance_block_type_util::OFF) {
         balance_block_type_util = false;
     }
 
@@ -151,11 +127,11 @@ bool try_pack(t_packer_opts* packer_opts,
     ClusterLegalizer cluster_legalizer(atom_ctx.netlist(),
                                        prepacker,
                                        lb_type_rr_graphs,
-                                       packer_opts->target_external_pin_util,
+                                       packer_opts.target_external_pin_util,
                                        high_fanout_thresholds,
                                        ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE,
-                                       packer_opts->enable_pin_feasibility_filter,
-                                       packer_opts->pack_verbosity);
+                                       packer_opts.enable_pin_feasibility_filter,
+                                       packer_opts.pack_verbosity);
     VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str());
     VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str());
 
@@ -163,13 +139,14 @@ bool try_pack(t_packer_opts* packer_opts,
     APPackContext appack_ctx(flat_placement_info, device_ctx.grid);
 
     // Initialize the greedy clusterer.
-    GreedyClusterer clusterer(*packer_opts,
-                              *analysis_opts,
+    GreedyClusterer clusterer(packer_opts,
+                              analysis_opts,
                               atom_ctx.netlist(),
                               arch,
                               high_fanout_thresholds,
                               is_clock,
                               is_global,
+                              pre_cluster_timing_manager,
                               appack_ctx);
 
     g_vpr_ctx.mutable_atom().mutable_lookup().set_atom_pb_bimap_lock(true);
@@ -187,7 +164,7 @@ bool try_pack(t_packer_opts* packer_opts,
                                                           mutable_device_ctx);
 
         //Try to size/find a device
-        bool fits_on_device = try_size_device_grid(arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
+        bool fits_on_device = try_size_device_grid(arch, num_used_type_instances, packer_opts.target_device_utilization, packer_opts.device_layout);
 
         /* We use this bool to determine the cause for the clustering not being dense enough. If the clustering
          * is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause
@@ -205,11 +182,11 @@ bool try_pack(t_packer_opts* packer_opts,
             //1st pack attempt was unsuccessful (i.e. not dense enough) and we have control of unrelated clustering
             //
             //Turn it on to increase packing density
-            if (packer_opts->allow_unrelated_clustering == e_unrelated_clustering::AUTO) {
+            if (packer_opts.allow_unrelated_clustering == e_unrelated_clustering::AUTO) {
                 VTR_ASSERT(allow_unrelated_clustering == false);
                 allow_unrelated_clustering = true;
             }
-            if (packer_opts->balance_block_type_utilization == e_balance_block_type_util::AUTO) {
+            if (packer_opts.balance_block_type_utilization == e_balance_block_type_util::AUTO) {
                 VTR_ASSERT(balance_block_type_util == false);
                 balance_block_type_util = true;
             }
@@ -313,7 +290,7 @@ bool try_pack(t_packer_opts* packer_opts,
     g_vpr_ctx.mutable_atom().mutable_lookup().set_atom_pb_bimap_lock(false);
     g_vpr_ctx.mutable_atom().mutable_lookup().set_atom_to_pb_bimap(cluster_legalizer.atom_pb_lookup());
     //check clustering and output it
-    check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, &arch);
+    check_and_output_clustering(cluster_legalizer, packer_opts, is_clock, &arch);
 
     VTR_LOG("\n");
     VTR_LOG("Netlist conversion complete.\n");
@@ -322,24 +299,6 @@ bool try_pack(t_packer_opts* packer_opts,
     return true;
 }
 
-static float get_arch_switch_info(short switch_index, int switch_fanin, float& Tdel_switch, float& R_switch, float& Cout_switch) {
-    /* Fetches delay, resistance and output capacitance of the architecture switch at switch_index.
-     * Returns the total delay through the switch. Used to calculate inter-cluster net delay. */
-
-    /* The intrinsic delay may depend on fanin to the switch. If the delay map of a
-     * switch from the architecture file has multiple (#inputs, delay) entries, we
-     * interpolate/extrapolate to get the delay at 'switch_fanin'. */
-    auto& device_ctx = g_vpr_ctx.device();
-
-    Tdel_switch = device_ctx.arch_switch_inf[switch_index].Tdel(switch_fanin);
-    R_switch = device_ctx.arch_switch_inf[switch_index].R;
-    Cout_switch = device_ctx.arch_switch_inf[switch_index].Cout;
-
-    /* The delay through a loaded switch is its intrinsic (unloaded)
-     * delay plus the product of its resistance and output capacitance. */
-    return Tdel_switch + R_switch * Cout_switch;
-}
-
 std::unordered_set<AtomNetId> alloc_and_load_is_clock() {
     /* Looks through all the atom blocks to find and mark all the clocks, by setting
      * the corresponding entry by adding the clock to is_clock.
@@ -409,132 +368,3 @@ static bool try_size_device_grid(const t_arch& arch,
 
     return fits_on_device;
 }
-
-static void get_intercluster_switch_fanin_estimates(const t_arch& arch,
-                                                    const t_det_routing_arch& routing_arch,
-                                                    const std::string& device_layout,
-                                                    const int wire_segment_length,
-                                                    int* opin_switch_fanin,
-                                                    int* wire_switch_fanin,
-                                                    int* ipin_switch_fanin) {
-    // W is unknown pre-packing, so *if* we need W here, we will assume a value of 100
-    constexpr int W = 100;
-
-    //Build a dummy 10x10 device to determine the 'best' block type to use
-    auto grid = create_device_grid(device_layout, arch.grid_layouts, 10, 10);
-
-    auto type = find_most_common_tile_type(grid);
-    /* get Fc_in/out for most common block (e.g. logic blocks) */
-    VTR_ASSERT(!type->fc_specs.empty());
-
-    //Estimate the maximum Fc_in/Fc_out
-    float Fc_in = 0.f;
-    float Fc_out = 0.f;
-    for (const t_fc_specification& fc_spec : type->fc_specs) {
-        float Fc = fc_spec.fc_value;
-
-        if (fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE) {
-            //Convert to estimated fractional
-            Fc /= W;
-        }
-        VTR_ASSERT_MSG(Fc >= 0 && Fc <= 1., "Fc should be fractional");
-
-        for (int ipin : fc_spec.pins) {
-            e_pin_type pin_type = get_pin_type_from_pin_physical_num(type, ipin);
-
-            if (pin_type == DRIVER) {
-                Fc_out = std::max(Fc, Fc_out);
-            } else {
-                VTR_ASSERT(pin_type == RECEIVER);
-                Fc_in = std::max(Fc, Fc_in);
-            }
-        }
-    }
-
-    /* Estimates of switch fan-in are done as follows:
-     * 1) opin to wire switch:
-     * 2 CLBs connect to a channel, each with #opins/4 pins. Each pin has Fc_out*W
-     * switches, and then we assume the switches are distributed evenly over the W wires.
-     * In the unidirectional case, all these switches are then crammed down to W/wire_segment_length wires.
-     *
-     * Unidirectional: 2 * #opins_per_side * Fc_out * wire_segment_length
-     * Bidirectional:  2 * #opins_per_side * Fc_out
-     *
-     * 2) wire to wire switch
-     * A wire segment in a switchblock connects to Fs other wires. Assuming these connections are evenly
-     * distributed, each target wire receives Fs connections as well. In the unidirectional case,
-     * source wires can only connect to W/wire_segment_length wires.
-     *
-     * Unidirectional: Fs * wire_segment_length
-     * Bidirectional:  Fs
-     *
-     * 3) wire to ipin switch
-     * An input pin of a CLB simply receives Fc_in connections.
-     *
-     * Unidirectional: Fc_in
-     * Bidirectional:  Fc_in
-     */
-
-    /* Fan-in to opin/ipin/wire switches depends on whether the architecture is unidirectional/bidirectional */
-    (*opin_switch_fanin) = 2.f * type->num_drivers / 4.f * Fc_out;
-    (*wire_switch_fanin) = routing_arch.Fs;
-    (*ipin_switch_fanin) = Fc_in;
-    if (routing_arch.directionality == UNI_DIRECTIONAL) {
-        /* adjustments to opin-to-wire and wire-to-wire switch fan-ins */
-        (*opin_switch_fanin) *= wire_segment_length;
-        (*wire_switch_fanin) *= wire_segment_length;
-    } else if (routing_arch.directionality == BI_DIRECTIONAL) {
-        /* no adjustments need to be made here */
-    } else {
-        VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unrecognized directionality: %d\n",
-                        (int)routing_arch.directionality);
-    }
-}
-
-static float approximate_inter_cluster_delay(const t_arch& arch,
-                                             const t_det_routing_arch& routing_arch,
-                                             const std::string& device_layout) {
-
-    /* If needed, estimate inter-cluster delay. Assume the average routing hop goes out of
-     * a block through an opin switch to a length-4 wire, then through a wire switch to another
-     * length-4 wire, then through a wire-to-ipin-switch into another block. */
-    constexpr int wire_segment_length = 4;
-
-    /* We want to determine a reasonable fan-in to the opin, wire, and ipin switches, based
-     * on which the intercluster delays can be estimated. The fan-in of a switch influences its
-     * delay.
-     *
-     * The fan-in of the switch depends on the architecture (unidirectional/bidirectional), as
-     * well as Fc_in/out and Fs */
-    int opin_switch_fanin, wire_switch_fanin, ipin_switch_fanin;
-    get_intercluster_switch_fanin_estimates(arch, routing_arch, device_layout, wire_segment_length, &opin_switch_fanin,
-                                            &wire_switch_fanin, &ipin_switch_fanin);
-
-    float Tdel_opin_switch, R_opin_switch, Cout_opin_switch;
-    float opin_switch_del = get_arch_switch_info(arch.Segments[0].arch_opin_switch, opin_switch_fanin,
-                                                 Tdel_opin_switch, R_opin_switch, Cout_opin_switch);
-
-    float Tdel_wire_switch, R_wire_switch, Cout_wire_switch;
-    float wire_switch_del = get_arch_switch_info(arch.Segments[0].arch_wire_switch, wire_switch_fanin,
-                                                 Tdel_wire_switch, R_wire_switch, Cout_wire_switch);
-
-    float Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch;
-    float wtoi_switch_del = get_arch_switch_info(routing_arch.wire_to_arch_ipin_switch, ipin_switch_fanin,
-                                                 Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch);
-
-    float Rmetal = arch.Segments[0].Rmetal;
-    float Cmetal = arch.Segments[0].Cmetal;
-
-    /* The delay of a wire with its driving switch is the switch delay plus the
-     * product of the equivalent resistance and capacitance experienced by the wire. */
-
-    float first_wire_seg_delay = opin_switch_del
-                                 + (R_opin_switch + Rmetal * (float)wire_segment_length / 2)
-                                       * (Cout_opin_switch + Cmetal * (float)wire_segment_length);
-    float second_wire_seg_delay = wire_switch_del
-                                  + (R_wire_switch + Rmetal * (float)wire_segment_length / 2)
-                                        * (Cout_wire_switch + Cmetal * (float)wire_segment_length);
-
-    /* multiply by 4 to get a more conservative estimate */
-    return 4 * (first_wire_seg_delay + second_wire_seg_delay + wtoi_switch_del);
-}
diff --git a/vpr/src/pack/pack.h b/vpr/src/pack/pack.h
index 2d22a8dc230..c0cb1a4581f 100644
--- a/vpr/src/pack/pack.h
+++ b/vpr/src/pack/pack.h
@@ -31,8 +31,8 @@ struct t_packer_opts;
  *              provided by the user as a hint for packing. Will be invalid if
  *              there is no flat placement information provided.
  */
-bool try_pack(t_packer_opts* packer_opts,
-              const t_analysis_opts* analysis_opts,
+bool try_pack(const t_packer_opts& packer_opts,
+              const t_analysis_opts& analysis_opts,
               const t_arch& arch,
               const t_det_routing_arch& routing_arch,
               std::vector<t_lb_type_rr_node>* lb_type_rr_graphs,
diff --git a/vpr/src/timing/PreClusterTimingManager.cpp b/vpr/src/timing/PreClusterTimingManager.cpp
new file mode 100644
index 00000000000..ec3b6a44958
--- /dev/null
+++ b/vpr/src/timing/PreClusterTimingManager.cpp
@@ -0,0 +1,276 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    April 2025
+ * @brief   Implementation of the pre-cluster timing manager class.
+ */
+
+#include "PreClusterTimingManager.h"
+#include <algorithm>
+#include <memory>
+#include "PreClusterDelayCalculator.h"
+#include "PreClusterTimingGraphResolver.h"
+#include "SetupGrid.h"
+#include "atom_lookup.h"
+#include "atom_netlist.h"
+#include "atom_netlist_fwd.h"
+#include "concrete_timing_info.h"
+#include "physical_types_util.h"
+#include "prepack.h"
+#include "tatum/TimingReporter.hpp"
+#include "tatum/echo_writer.hpp"
+#include "vpr_types.h"
+#include "vtr_assert.h"
+
+/**
+ * Since the parameters of a switch may change as a function of its fanin,
+ * to get an estimation of inter-cluster delays we need a reasonable estimation
+ * of the fan-ins of switches that connect clusters together. These switches are
+ * 1) opin to wire switch
+ * 2) wire to wire switch
+ * 3) wire to ipin switch
+ * We can estimate the fan-in of these switches based on the Fc_in/Fc_out of
+ * a logic block, and the switch block Fs value
+ */
+static void get_intercluster_switch_fanin_estimates(const t_arch& arch,
+                                                    const t_det_routing_arch& routing_arch,
+                                                    const std::string& device_layout,
+                                                    const int wire_segment_length,
+                                                    int* opin_switch_fanin,
+                                                    int* wire_switch_fanin,
+                                                    int* ipin_switch_fanin);
+
+static float get_arch_switch_info(short switch_index, int switch_fanin, float& Tdel_switch, float& R_switch, float& Cout_switch);
+
+static float approximate_inter_cluster_delay(const t_arch& arch,
+                                             const t_det_routing_arch& routing_arch,
+                                             const std::string& device_layout);
+
+PreClusterTimingManager::PreClusterTimingManager(bool timing_driven,
+                                                 const AtomNetlist& atom_netlist,
+                                                 const AtomLookup& atom_lookup,
+                                                 const Prepacker& prepacker,
+                                                 e_timing_update_type timing_update_type,
+                                                 const t_arch& arch,
+                                                 const t_det_routing_arch& routing_arch,
+                                                 const std::string& device_layout,
+                                                 const t_analysis_opts& analysis_opts) {
+
+    // If the flow is not timing driven, do not initialize any of the timing
+    // objects and set the valid flag to false. This allows this object to be
+    // passed through the VPR flow when timing is turned off.
+    if (!timing_driven) {
+        is_valid_ = false;
+        return;
+    }
+    is_valid_ = true;
+
+    // Approximate the inter-cluster delay
+    // FIXME: This can probably be simplified. It can also be improved using
+    //        AP information.
+    float inter_cluster_net_delay = approximate_inter_cluster_delay(arch, routing_arch, device_layout);
+    VTR_LOG("Using inter-cluster delay: %g\n", inter_cluster_net_delay);
+
+    // Initialize the timing analyzer
+    clustering_delay_calc_ = std::make_shared<PreClusterDelayCalculator>(atom_netlist,
+                                                                         atom_lookup,
+                                                                         inter_cluster_net_delay,
+                                                                         prepacker);
+    timing_info_ = make_setup_timing_info(clustering_delay_calc_, timing_update_type);
+
+    // Calculate the initial timing
+    timing_info_->update();
+
+    // Create the echo file if requested.
+    if (isEchoFileEnabled(E_ECHO_PRE_PACKING_TIMING_GRAPH)) {
+        auto& timing_ctx = g_vpr_ctx.timing();
+        tatum::write_echo(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH),
+                          *timing_ctx.graph, *timing_ctx.constraints, *clustering_delay_calc_, timing_info_->analyzer());
+
+        tatum::NodeId debug_tnode = id_or_pin_name_to_tnode(analysis_opts.echo_dot_timing_graph_node);
+        write_setup_timing_graph_dot(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH) + std::string(".dot"),
+                                     *timing_info_, debug_tnode);
+    }
+
+    // Write a timing report.
+    {
+        auto& timing_ctx = g_vpr_ctx.timing();
+        PreClusterTimingGraphResolver resolver(atom_netlist,
+                                               atom_lookup,
+                                               *timing_ctx.graph,
+                                               *clustering_delay_calc_);
+        resolver.set_detail_level(analysis_opts.timing_report_detail);
+
+        tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph,
+                                              *timing_ctx.constraints);
+
+        timing_reporter.report_timing_setup(
+            "pre_pack.report_timing.setup.rpt",
+            *timing_info_->setup_analyzer(),
+            analysis_opts.timing_report_npaths);
+    }
+}
+
+static float approximate_inter_cluster_delay(const t_arch& arch,
+                                             const t_det_routing_arch& routing_arch,
+                                             const std::string& device_layout) {
+
+    /* If needed, estimate inter-cluster delay. Assume the average routing hop goes out of
+     * a block through an opin switch to a length-4 wire, then through a wire switch to another
+     * length-4 wire, then through a wire-to-ipin-switch into another block. */
+    constexpr int wire_segment_length = 4;
+
+    /* We want to determine a reasonable fan-in to the opin, wire, and ipin switches, based
+     * on which the intercluster delays can be estimated. The fan-in of a switch influences its
+     * delay.
+     *
+     * The fan-in of the switch depends on the architecture (unidirectional/bidirectional), as
+     * well as Fc_in/out and Fs */
+    int opin_switch_fanin, wire_switch_fanin, ipin_switch_fanin;
+    get_intercluster_switch_fanin_estimates(arch, routing_arch, device_layout, wire_segment_length, &opin_switch_fanin,
+                                            &wire_switch_fanin, &ipin_switch_fanin);
+
+    float Tdel_opin_switch, R_opin_switch, Cout_opin_switch;
+    float opin_switch_del = get_arch_switch_info(arch.Segments[0].arch_opin_switch, opin_switch_fanin,
+                                                 Tdel_opin_switch, R_opin_switch, Cout_opin_switch);
+
+    float Tdel_wire_switch, R_wire_switch, Cout_wire_switch;
+    float wire_switch_del = get_arch_switch_info(arch.Segments[0].arch_wire_switch, wire_switch_fanin,
+                                                 Tdel_wire_switch, R_wire_switch, Cout_wire_switch);
+
+    float Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch;
+    float wtoi_switch_del = get_arch_switch_info(routing_arch.wire_to_arch_ipin_switch, ipin_switch_fanin,
+                                                 Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch);
+
+    float Rmetal = arch.Segments[0].Rmetal;
+    float Cmetal = arch.Segments[0].Cmetal;
+
+    /* The delay of a wire with its driving switch is the switch delay plus the
+     * product of the equivalent resistance and capacitance experienced by the wire. */
+
+    float first_wire_seg_delay = opin_switch_del
+                                 + (R_opin_switch + Rmetal * (float)wire_segment_length / 2)
+                                       * (Cout_opin_switch + Cmetal * (float)wire_segment_length);
+    float second_wire_seg_delay = wire_switch_del
+                                  + (R_wire_switch + Rmetal * (float)wire_segment_length / 2)
+                                        * (Cout_wire_switch + Cmetal * (float)wire_segment_length);
+
+    /* multiply by 4 to get a more conservative estimate */
+    return 4 * (first_wire_seg_delay + second_wire_seg_delay + wtoi_switch_del);
+}
+
+static float get_arch_switch_info(short switch_index, int switch_fanin, float& Tdel_switch, float& R_switch, float& Cout_switch) {
+    /* Fetches delay, resistance and output capacitance of the architecture switch at switch_index.
+     * Returns the total delay through the switch. Used to calculate inter-cluster net delay. */
+
+    /* The intrinsic delay may depend on fanin to the switch. If the delay map of a
+     * switch from the architecture file has multiple (#inputs, delay) entries, we
+     * interpolate/extrapolate to get the delay at 'switch_fanin'. */
+    auto& device_ctx = g_vpr_ctx.device();
+
+    Tdel_switch = device_ctx.arch_switch_inf[switch_index].Tdel(switch_fanin);
+    R_switch = device_ctx.arch_switch_inf[switch_index].R;
+    Cout_switch = device_ctx.arch_switch_inf[switch_index].Cout;
+
+    /* The delay through a loaded switch is its intrinsic (unloaded)
+     * delay plus the product of its resistance and output capacitance. */
+    return Tdel_switch + R_switch * Cout_switch;
+}
+
+static void get_intercluster_switch_fanin_estimates(const t_arch& arch,
+                                                    const t_det_routing_arch& routing_arch,
+                                                    const std::string& device_layout,
+                                                    const int wire_segment_length,
+                                                    int* opin_switch_fanin,
+                                                    int* wire_switch_fanin,
+                                                    int* ipin_switch_fanin) {
+    // W is unknown pre-packing, so *if* we need W here, we will assume a value of 100
+    constexpr int W = 100;
+
+    //Build a dummy 10x10 device to determine the 'best' block type to use
+    auto grid = create_device_grid(device_layout, arch.grid_layouts, 10, 10);
+
+    auto type = find_most_common_tile_type(grid);
+    /* get Fc_in/out for most common block (e.g. logic blocks) */
+    VTR_ASSERT(!type->fc_specs.empty());
+
+    //Estimate the maximum Fc_in/Fc_out
+    float Fc_in = 0.f;
+    float Fc_out = 0.f;
+    for (const t_fc_specification& fc_spec : type->fc_specs) {
+        float Fc = fc_spec.fc_value;
+
+        if (fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE) {
+            //Convert to estimated fractional
+            Fc /= W;
+        }
+        VTR_ASSERT_MSG(Fc >= 0 && Fc <= 1., "Fc should be fractional");
+
+        for (int ipin : fc_spec.pins) {
+            e_pin_type pin_type = get_pin_type_from_pin_physical_num(type, ipin);
+
+            if (pin_type == DRIVER) {
+                Fc_out = std::max(Fc, Fc_out);
+            } else {
+                VTR_ASSERT(pin_type == RECEIVER);
+                Fc_in = std::max(Fc, Fc_in);
+            }
+        }
+    }
+
+    /* Estimates of switch fan-in are done as follows:
+     * 1) opin to wire switch:
+     * 2 CLBs connect to a channel, each with #opins/4 pins. Each pin has Fc_out*W
+     * switches, and then we assume the switches are distributed evenly over the W wires.
+     * In the unidirectional case, all these switches are then crammed down to W/wire_segment_length wires.
+     *
+     * Unidirectional: 2 * #opins_per_side * Fc_out * wire_segment_length
+     * Bidirectional:  2 * #opins_per_side * Fc_out
+     *
+     * 2) wire to wire switch
+     * A wire segment in a switchblock connects to Fs other wires. Assuming these connections are evenly
+     * distributed, each target wire receives Fs connections as well. In the unidirectional case,
+     * source wires can only connect to W/wire_segment_length wires.
+     *
+     * Unidirectional: Fs * wire_segment_length
+     * Bidirectional:  Fs
+     *
+     * 3) wire to ipin switch
+     * An input pin of a CLB simply receives Fc_in connections.
+     *
+     * Unidirectional: Fc_in
+     * Bidirectional:  Fc_in
+     */
+
+    /* Fan-in to opin/ipin/wire switches depends on whether the architecture is unidirectional/bidirectional */
+    (*opin_switch_fanin) = 2.f * type->num_drivers / 4.f * Fc_out;
+    (*wire_switch_fanin) = routing_arch.Fs;
+    (*ipin_switch_fanin) = Fc_in;
+    if (routing_arch.directionality == UNI_DIRECTIONAL) {
+        /* adjustments to opin-to-wire and wire-to-wire switch fan-ins */
+        (*opin_switch_fanin) *= wire_segment_length;
+        (*wire_switch_fanin) *= wire_segment_length;
+    } else if (routing_arch.directionality == BI_DIRECTIONAL) {
+        /* no adjustments need to be made here */
+    } else {
+        VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unrecognized directionality: %d\n",
+                        (int)routing_arch.directionality);
+    }
+}
+
+float PreClusterTimingManager::calc_atom_setup_criticality(AtomBlockId blk_id,
+                                                           const AtomNetlist& atom_netlist) const {
+    VTR_ASSERT_SAFE_MSG(is_valid_,
+                        "PreClusterTimingManager has not been initialized");
+    VTR_ASSERT_SAFE_MSG(blk_id.is_valid(),
+                        "Invalid block ID");
+
+    float crit = 0.0f;
+    for (AtomPinId in_pin : atom_netlist.block_input_pins(blk_id)) {
+        // Max criticality over incoming nets
+        float pin_crit = timing_info_->setup_pin_criticality(in_pin);
+        crit = std::max(crit, pin_crit);
+    }
+
+    return crit;
+}
diff --git a/vpr/src/timing/PreClusterTimingManager.h b/vpr/src/timing/PreClusterTimingManager.h
new file mode 100644
index 00000000000..f76489b8ee9
--- /dev/null
+++ b/vpr/src/timing/PreClusterTimingManager.h
@@ -0,0 +1,108 @@
+/**
+ * @file
+ * @author  Alex Singer
+ * @date    April 2025
+ * @brief   Manager class for pre-cluster (primitive-level) timing analysis.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "vpr_types.h"
+#include "vtr_assert.h"
+
+// Forward declarations.
+class AtomLookup;
+class AtomNetlist;
+class PreClusterDelayCalculator;
+class Prepacker;
+class SetupTimingInfo;
+
+/**
+ * @brief Pre-cluster timing manager class.
+ *
+ * This class encapsulates the timing computations used prior to clustering.
+ * This maintains all of the state necessary to perform these timing computations.
+ */
+class PreClusterTimingManager {
+  public:
+    /**
+     * @brief Constructor for the manager class.
+     *
+     * If timing_driven is set to true, this constructor will perform a setup
+     * timing analysis with a pre-clustered delay model. The delay model uses
+     * the primitive delays specified in the architecture file and a simple
+     * estimate of routing (a typical routing delay based on the wire delays
+     * found in the architecture, and more specific delays for direct connections
+     * like carry chains whose use we already know from the pre-packing).
+     *
+     *  @param timing_driven
+     *          Whether this class should compute timing information or not. This
+     *          may seem counter-intuitive, but this class still needs to exist
+     *          even if timing is turned off. This will not initialize anything
+     *          and set the valid flag to false if we are not timing driven.
+     *  @param atom_netlist
+     *          The primitive netlist to perform timing analysis over.
+     *  @param atom_lookup
+     *          A lookup between the primitives and their timing nodes.
+     *  @param prepacker
+     *          The prepacker object used to prepack primitives into molecules.
+     *  @param timing_update_type
+     *          The type of timing update this class should perform.
+     *  @param arch
+     *          The architecture.
+     *  @param routing_arch
+     *          The routing architecture.
+     *  @param analysis opts
+     *          Options for the timing analysis in VPR.
+     */
+    PreClusterTimingManager(bool timing_driven,
+                            const AtomNetlist& atom_netlist,
+                            const AtomLookup& atom_lookup,
+                            const Prepacker& prepacker,
+                            e_timing_update_type timing_update_type,
+                            const t_arch& arch,
+                            const t_det_routing_arch& routing_arch,
+                            const std::string& device_layout,
+                            const t_analysis_opts& analysis_opts);
+
+    /**
+     * @brief Calculates the setup criticality of the given primitive block.
+     *
+     * Currently defined as the maximum criticality over the block inputs.
+     */
+    float calc_atom_setup_criticality(AtomBlockId blk_id,
+                                      const AtomNetlist& atom_netlist) const;
+
+    /**
+     * @brief Returns whether or not the pre-cluster timing manager was
+     *        initialized (i.e. timing information can be computed).
+     */
+    bool is_valid() const {
+        return is_valid_;
+    }
+
+    /**
+     * @brief Get a reference to the setup timing info.
+     */
+    const SetupTimingInfo& get_timing_info() const {
+        VTR_ASSERT_SAFE_MSG(is_valid_,
+                            "Timing manager has not been initialized");
+        return *timing_info_;
+    }
+
+  private:
+    /// @brief A valid flag used to signify if the pre-cluster timing manager
+    ///        class has been initialized or not. For example, if the flow is
+    ///        not timing-driven, then this class will just be a shell which
+    ///        should not have any timing information (but the object exists).
+    bool is_valid_;
+
+    /// @brief The delay calculator used for computing timing.
+    std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc_;
+
+    /// @brief The setup timing info used for getting the timing of edges
+    ///        in the timing graph.
+    std::shared_ptr<SetupTimingInfo> timing_info_;
+};