diff --git a/.gitignore b/.gitignore
index 3582f2ee54d..ff6abf7f9a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,3 +155,8 @@ tags
 cmake-build-debug
 cmake-build-release
 /.metadata/
+
+#
+# Clangd
+#
+compile_commands.json
diff --git a/doc/src/api/vpr/route_tree.rst b/doc/src/api/vpr/route_tree.rst
index 7be12dda86a..8515381bac7 100644
--- a/doc/src/api/vpr/route_tree.rst
+++ b/doc/src/api/vpr/route_tree.rst
@@ -20,3 +20,10 @@ RouteTreeNode
 .. doxygenclass:: RouteTreeNode
    :project: vpr
    :members:
+
+RTExploredNode
+-------------
+
+.. doxygenclass:: RTExploredNode
+   :project: vpr
+   :members:
diff --git a/doc/src/api/vprinternals/router_heap.rst b/doc/src/api/vprinternals/router_heap.rst
index cb652811e6c..1d213379a89 100644
--- a/doc/src/api/vprinternals/router_heap.rst
+++ b/doc/src/api/vprinternals/router_heap.rst
@@ -2,30 +2,13 @@
 Router Heap
 ==============
 
-t_heap
-----------
-.. doxygenstruct:: t_heap
-   :project: vpr
-   :members:
-
 HeapInterface
 ----------
 .. doxygenclass:: HeapInterface
    :project: vpr
    :members:
 
-HeapStorage
-----------
-.. doxygenclass:: HeapStorage
-   :project: vpr
-   :members:
-
-KAryHeap
+DAryHeap
 ----------
-.. doxygenclass:: KAryHeap
+.. doxygenclass:: DAryHeap
    :project: vpr
-
-FourAryHeap
-----------
-.. doxygenclass:: FourAryHeap
-   :project: vpr
\ No newline at end of file
diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
index be5724431f4..16d02691001 100644
--- a/doc/src/vpr/command_line_usage.rst
+++ b/doc/src/vpr/command_line_usage.rst
@@ -1396,7 +1396,7 @@ The following options are only valid when the router is in timing-driven mode (t
 
     **Default:** ``safe``
 
-.. option:: --routing_budgets_algorithm { disable | minimax | scale_delay }
+.. option:: --routing_budgets_algorithm { disable | minimax | yoyo | scale_delay }
 
     .. warning:: Experimental
 
@@ -1404,7 +1404,9 @@ The following options are only valid when the router is in timing-driven mode (t
 
     ``disable`` is used to disable the budget feature. This uses the default VPR and ignores hold time constraints.
 
-    ``minimax`` sets the minimum and maximum budgets by distributing the long path and short path slacks depending on the the current delay values. This uses the routing cost valleys and Minimax-PERT algorithm :cite:`minimax_pert,RCV_algorithm`.
+    ``minimax`` sets the minimum and maximum budgets by distributing the long path and short path slacks depending on the the current delay values. This uses the Minimax-PERT algorithm :cite:`minimax_pert`.
+
+    ``yoyo`` allocates budgets using minimax algorithm (as above), and enables hold slack resolution in the router using the Routing Cost Valleys (RCV) algorithm :cite:`RCV_algorithm`.
 
     ``scale_delay`` has the minimum budgets set to 0 and the maximum budgets is set to the delay of a net scaled by the pin criticality (net delay/pin criticality).
 
diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index 7b9c170fbe7..d0a6ff6034e 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -114,7 +114,7 @@ static void do_one_route(const Netlist<>& net_list,
             is_flat);
     enable_router_debug(router_opts, ParentNetId(), sink_node, 1, &router);
     bool found_path;
-    t_heap cheapest;
+    RTExploredNode cheapest;
     ConnectionParameters conn_params(ParentNetId::INVALID(),
                                      -1,
                                      false,
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 3af8faa8713..f7af0074b55 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -468,9 +468,6 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
             case e_heap_type::FOUR_ARY_HEAP:
                 VTR_LOG("FOUR_ARY_HEAP\n");
                 break;
-            case e_heap_type::BUCKET_HEAP_APPROXIMATION:
-                VTR_LOG("BUCKET_HEAP_APPROXIMATION\n");
-                break;
             default:
                 VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown router_heap\n");
         }
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 97b3c9babaa..7ddaa08feff 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -272,7 +272,7 @@ struct RouteBudgetsAlgorithm {
     }
 
     std::vector<std::string> default_choices() {
-        return {"minimax", "scale_delay", "disable"};
+        return {"minimax", "yoyo", "scale_delay", "disable"};
     }
 };
 
@@ -1063,8 +1063,6 @@ struct ParseRouterHeap {
             conv_value.set_value(e_heap_type::BINARY_HEAP);
         else if (str == "four_ary")
             conv_value.set_value(e_heap_type::FOUR_ARY_HEAP);
-        else if (str == "bucket")
-            conv_value.set_value(e_heap_type::BUCKET_HEAP_APPROXIMATION);
         else {
             std::stringstream msg;
             msg << "Invalid conversion from '" << str << "' to e_heap_type (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -1077,11 +1075,9 @@ struct ParseRouterHeap {
         ConvertedValue<std::string> conv_value;
         if (val == e_heap_type::BINARY_HEAP)
             conv_value.set_value("binary");
-        else if (val == e_heap_type::FOUR_ARY_HEAP)
-            conv_value.set_value("four_ary");
         else {
-            VTR_ASSERT(val == e_heap_type::BUCKET_HEAP_APPROXIMATION);
-            conv_value.set_value("bucket");
+            VTR_ASSERT(val == e_heap_type::FOUR_ARY_HEAP);
+            conv_value.set_value("four_ary");
         }
         return conv_value;
     }
diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp
index b137c007327..d2d3bc14d54 100644
--- a/vpr/src/base/read_route.cpp
+++ b/vpr/src/base/read_route.cpp
@@ -20,7 +20,6 @@
 #include <ctime>
 #include <sstream>
 #include <string>
-#include <unordered_set>
 
 #include "atom_netlist.h"
 #include "atom_netlist_utils.h"
@@ -46,7 +45,6 @@
 #include "route_common.h"
 #include "route_tree.h"
 #include "read_route.h"
-#include "four_ary_heap.h"
 
 #include "old_traceback.h"
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index d2bc5f03da7..73e52554e90 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1607,7 +1607,10 @@ constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == S
  *                     is being used.
  *   @param backward_path_cost  Total cost of the path up to and including this
  *                     node.
- *   @param occ        The current occupancy of the associated rr node
+ *   @param R_upstream Upstream resistance to ground from this node in the current
+ *                     path search (connection routing), including the resistance
+ *                     of the node itself (device_ctx.rr_nodes[index].R).
+ *   @param occ        The current occupancy of the associated rr node.
  */
 struct t_rr_node_route_inf {
     RREdgeId prev_edge;
@@ -1615,6 +1618,7 @@ struct t_rr_node_route_inf {
     float acc_cost;
     float path_cost;
     float backward_path_cost;
+    float R_upstream;
 
   public: //Accessors
     short occ() const { return occ_; }
diff --git a/vpr/src/route/binary_heap.cpp b/vpr/src/route/binary_heap.cpp
deleted file mode 100644
index 8053960d955..00000000000
--- a/vpr/src/route/binary_heap.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#include "binary_heap.h"
-#include "vtr_log.h"
-
-// child indices of a heap
-static inline size_t left(size_t i) { return i << 1; }
-static inline size_t right(size_t i) { return (i << 1) + 1; }
-
-inline size_t BinaryHeap::parent(size_t i) const { return i >> 1; }
-
-bool BinaryHeap::is_valid() const {
-    if (heap_.empty()) {
-        return false;
-    }
-
-    for (size_t i = 1; i <= heap_tail_ >> 1; ++i) {
-        if (left(i) < heap_tail_ && heap_[left(i)].cost < heap_[i].cost) return false;
-        if (right(i) < heap_tail_ && heap_[right(i)].cost < heap_[i].cost) return false;
-    }
-
-    return true;
-}
-
-t_heap* BinaryHeap::get_heap_head() {
-    /* Returns a pointer to the smallest element on the heap, or NULL if the     *
-     * heap is empty.  Invalid (index == OPEN) entries on the heap are never     *
-     * returned -- they are just skipped over.                                   */
-
-    t_heap* cheapest;
-    size_t hole, child;
-
-    do {
-        if (heap_tail_ == 1) { /* Empty heap. */
-            VTR_LOG_WARN("Empty heap occurred in get_heap_head.\n");
-            return (nullptr);
-        }
-
-        cheapest = heap_[1].elem_ptr;
-
-        hole = 1;
-        child = 2;
-
-        --heap_tail_;
-
-        while (child < heap_tail_) {
-            if (heap_[child + 1].cost < heap_[child].cost)
-                ++child; // become right child
-
-            heap_[hole] = heap_[child];
-            hole = child;
-            child = left(child);
-        }
-
-        sift_up(hole, heap_[heap_tail_]);
-    } while (!cheapest->index.is_valid()); /* Get another one if invalid entry. */
-
-    return (cheapest);
-}
-
-// make a heap rooted at index hole by **sifting down** in O(lgn) time
-void BinaryHeap::sift_down(size_t hole) {
-    heap_elem head{heap_[hole]};
-    size_t child{left(hole)};
-
-    while (child < heap_tail_) {
-        if (child + 1 < heap_tail_ && heap_[child + 1].cost < heap_[child].cost)
-            ++child;
-
-        if (heap_[child].cost < head.cost) {
-            heap_[hole] = heap_[child];
-            hole = child;
-            child = left(child);
-        } else
-            break;
-    }
-
-    heap_[hole] = head;
-}
\ No newline at end of file
diff --git a/vpr/src/route/binary_heap.h b/vpr/src/route/binary_heap.h
deleted file mode 100644
index 2857200c0a3..00000000000
--- a/vpr/src/route/binary_heap.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef VTR_BINARY_HEAP_H
-#define VTR_BINARY_HEAP_H
-
-#include "k_ary_heap.h"
-#include <vector>
-
-class BinaryHeap : public KAryHeap {
-  public:
-    bool is_valid() const final;
-    t_heap* get_heap_head() final;
-
-  private:
-    void sift_down(size_t hole) final;
-    size_t parent(size_t i) const final;
-};
-
-#endif //VTR_BINARY_HEAP_H
diff --git a/vpr/src/route/bucket.cpp b/vpr/src/route/bucket.cpp
deleted file mode 100644
index 1804a74e4ca..00000000000
--- a/vpr/src/route/bucket.cpp
+++ /dev/null
@@ -1,550 +0,0 @@
-#include "bucket.h"
-
-#include <cmath>
-#include "rr_graph_fwd.h"
-#include "vtr_log.h"
-#include "vpr_error.h"
-
-/* Bucket spacing algorithm:
- *
- * The size in cost each bucket consumes is a fixed width determined by
- * conv_factor_.  The bucket index equation is simply:
- *
- *  bucket index = cost * conv_factor_
- *
- * The default conv_factor_ is 1e12, e.g. each bucket is 1 picosecond wide.
- *
- * There two reasons to change conv_factor_:
- *  - The maximum cost item in the bucket would require too many buckets in
- *    heap_, and would cause memory usage to climb higher than desired.
- *  - The front bucket contains too many items, making the pop operation too
- *    cost insenstive.
- *
- * The other consideration is to avoid rescaling the buckets too often, as
- * that operation consumes time without delivering useful work.
- *
- * To prevent rescaling constantly, the bucket heap determines if a rescaling
- * is needed based on two conditions:
- *
- *  - The maximum item cost (max_cost_) would require a bucket index that is
- *    greater than max_buckets_.  When this occurs, a rescaling is done to
- *    make the width of the buckets larger so that the cost index for the
- *    max_cost_ item fits within max_buckets_.
- *
- *     - A larger max_buckets_ results in more memory consumption, but
- *       accomidates a wider range of items without needing to rescale.
- *
- *  - The number of items in the first bucket exceeds kIncreaseFocusLimit. In
- *    this case, the bucket heap will shrink the width of the buckets so that
- *    the number of entries in the first bucket drops below
- *    kIncreaseFocusLimit.
- *
- * In both of the above cases, rescaling is determined by the following
- * (simplified) equation:
- *
- *    conv_factor_ = division_scaling_ / max_cost_
- *
- * The default division_scaling_ is kInitialDivisionScaling (50k).  This
- * can be read as using 50k buckets to evenly divided based on the
- * maximum cost.  For example, if max cost = 100 ns, then each bucket would
- * be 2 picosecond wide.
- *
- * When the number of elements in the first bucket exceeds
- * kIncreaseFocusLimit, division_scaling_ is multiplied by two, effectively
- * halving the bucket size for a given max_cost_.  In addition max_buckets_
- * is also multiplied by two to result in a similiar rescaling rate as
- * max_cost_ increases.
- *
- * This multiply by two logic could result in an unbounded memory consumption
- * in the number of buckets. To limit this, the 2x scaling for
- * division_scaling_ and max_buckets_ is limited such that max_buckets_ never
- * exceeds kMaxMaxBuckets
- *
- */
-
-// Initial bucket scaling.  A larger division scaling results in smaller cost
-// range per bucket.
-static constexpr float kInitialDivisionScaling = 50000.f;
-// Initial maximum number of buckets before bucket rescaling.
-static constexpr ssize_t kInitialMaxBuckets = 1000000;
-// If the division scaling results in more than kIncreaseFocusLimit elements
-// in the first bucket, than division scaling is increased by 2x to try to
-// lower the size of the first bucket.
-//
-// This is an attempt to dynamically scale the bucket widths to prevent the
-// bucket heap from being too cost insenstive / imprecise.
-//
-// When the division scaling is increased by 2x, the maximum number of buckets
-// also is increased by 2x to prevent excessive rescaling during runtime.
-static constexpr size_t kIncreaseFocusLimit = 2048;
-// To prevent unbounded division scaling, the 2x when the first bucket is too
-// large is limited by kMaxMaxBuckets.  If increasing the division scaling
-// will result in max_buckets_ exceeding kMaxMaxBuckets, then division scaling
-// will not be increased again.
-static constexpr ssize_t kMaxMaxBuckets = 16000000;
-
-BucketItems::BucketItems() noexcept
-    : alloced_items_(0)
-    , num_heap_allocated_(0)
-    , heap_free_head_(nullptr) {}
-
-Bucket::Bucket() noexcept
-    : outstanding_items_(0)
-    , seed_(1231)
-    , heap_(nullptr)
-    , heap_size_(0)
-    , heap_head_(std::numeric_limits<size_t>::max())
-    , heap_tail_(0)
-    , conv_factor_(0.f)
-    , division_scaling_(kInitialDivisionScaling)
-    , max_buckets_(kInitialMaxBuckets)
-    , min_cost_(0.f)
-    , max_cost_(0.f)
-    , num_items_(0)
-    , max_index_(std::numeric_limits<size_t>::max())
-    , prune_limit_(std::numeric_limits<size_t>::max())
-    , prune_count_(0)
-    , front_head_(std::numeric_limits<size_t>::max()) {}
-
-Bucket::~Bucket() {
-    free_all_memory();
-}
-
-void Bucket::init_heap(const DeviceGrid& grid) {
-    delete[] heap_;
-    heap_ = nullptr;
-
-    heap_size_ = (grid.width() - 1) * (grid.height() - 1);
-
-    heap_ = new BucketItem*[heap_size_];
-    for (size_t i = 0; i < (size_t)heap_size_; i++)
-        heap_[i] = 0;
-
-    heap_head_ = std::numeric_limits<size_t>::max();
-    front_head_ = std::numeric_limits<size_t>::max();
-    heap_tail_ = 0;
-    num_items_ = 0;
-    prune_count_ = 0;
-
-    conv_factor_ = kDefaultConvFactor;
-    division_scaling_ = kInitialDivisionScaling;
-    max_buckets_ = kInitialMaxBuckets;
-
-    min_cost_ = std::numeric_limits<float>::max();
-    max_cost_ = std::numeric_limits<float>::min();
-}
-
-void Bucket::free_all_memory() {
-    delete[] heap_;
-    heap_ = nullptr;
-
-    items_.free();
-}
-
-void Bucket::expand(size_t required_number_of_buckets) {
-    auto old_size = heap_size_;
-    heap_size_ = required_number_of_buckets * 2;
-    size_t i;
-
-    std::vector<BucketItem*> temp(heap_, heap_ + old_size);
-    delete[] heap_;
-    heap_ = new BucketItem*[heap_size_];
-    for (i = 0; i < old_size; i++)
-        heap_[i] = temp[i];
-    for (i = temp.size(); i < heap_size_; i++)
-        heap_[i] = nullptr;
-}
-
-void Bucket::verify() {
-    for (size_t bucket = heap_head_; bucket <= heap_tail_; ++bucket) {
-        for (BucketItem* data = heap_[bucket]; data != nullptr;
-             data = data->next_bucket) {
-            VTR_ASSERT(data->item.cost >= 0);
-            int bucket_idx = cost_to_int(data->item.cost);
-            if (bucket_idx != static_cast<ssize_t>(bucket)) {
-                VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
-                                "Wrong bucket for cost %g bucket_idx %d bucket %zu conv_factor %g",
-                                data->item.cost, bucket_idx, bucket, conv_factor_);
-            }
-        }
-    }
-}
-
-void Bucket::empty_heap() {
-    VTR_ASSERT(outstanding_items_ == 0);
-
-    if (heap_head_ != std::numeric_limits<size_t>::max()) {
-        std::fill(heap_ + heap_head_, heap_ + heap_tail_ + 1, nullptr);
-    }
-    heap_head_ = std::numeric_limits<size_t>::max();
-    front_head_ = std::numeric_limits<size_t>::max();
-    heap_tail_ = 0;
-    num_items_ = 0;
-    prune_count_ = 0;
-    min_push_cost_.clear();
-
-    // Quickly reset all items to being free'd
-    items_.clear();
-
-    conv_factor_ = kDefaultConvFactor;
-    division_scaling_ = kInitialDivisionScaling;
-    max_buckets_ = kInitialMaxBuckets;
-
-    min_cost_ = std::numeric_limits<float>::max();
-    max_cost_ = std::numeric_limits<float>::min();
-}
-
-float Bucket::rescale_func() const {
-    // Choose a scaling factor that accomidates division_scaling_ buckets
-    // between min_cost_ and max_cost_.
-    //
-    // If min and max are close to each other, assume 3 orders of
-    // magnitude between min and max.  The goal is to rescale less often
-    // when the larger costs haven't been seen yet.
-    //
-    // If min and max are at least 3 orders of magnitude apart, scale
-    // soley based on max cost.  The goal at this point is to keep the
-    // number of buckets between division_scaling_ and division_scaling_*2.
-    return division_scaling_ / max_cost_ / std::max(1.f, 1000.f / (max_cost_ / min_cost_));
-}
-
-void Bucket::check_conv_factor() const {
-    VTR_ASSERT(cost_to_int(min_cost_) >= 0);
-    VTR_ASSERT(cost_to_int(max_cost_) >= 0);
-    VTR_ASSERT(cost_to_int(max_cost_) < max_buckets_);
-}
-
-// Checks if the scaling factor for cost results in a reasonable
-// number of buckets based on the maximum cost value seen.
-//
-// Target number of buckets is between 50k and 100k buckets.
-// Default scaling is each bucket is around ~1 ps wide.
-//
-// Designs with scaled costs less than 100000 (e.g. 100 ns) shouldn't require
-// a bucket resize.
-void Bucket::check_scaling() {
-    float min_cost = min_cost_;
-    float max_cost = max_cost_;
-    VTR_ASSERT(max_cost != std::numeric_limits<float>::min());
-    if (min_cost == std::numeric_limits<float>::max()) {
-        min_cost = max_cost;
-    }
-    auto min_bucket = cost_to_int(min_cost);
-    auto max_bucket = cost_to_int(max_cost);
-
-    // If scaling is invalid or more than 100k buckets are needed, rescale.
-    if (min_bucket < 0 || max_bucket < 0 || max_bucket > max_buckets_) {
-        rescale();
-    }
-}
-
-void Bucket::rescale() {
-    conv_factor_ = rescale_func();
-    check_conv_factor();
-    front_head_ = std::numeric_limits<size_t>::max();
-
-    // Reheap after adjusting scaling.
-    if (heap_head_ != std::numeric_limits<size_t>::max()) {
-        std::vector<BucketItem*> reheap;
-        for (size_t bucket = heap_head_; bucket <= heap_tail_; ++bucket) {
-            for (BucketItem* item = heap_[bucket]; item != nullptr; item = item->next_bucket) {
-                reheap.push_back(item);
-            }
-        }
-
-        std::fill(heap_ + heap_head_, heap_ + heap_tail_ + 1, nullptr);
-        heap_head_ = std::numeric_limits<size_t>::max();
-        heap_tail_ = 0;
-
-        for (BucketItem* item : reheap) {
-            outstanding_items_ += 1;
-            push_back(&item->item);
-        }
-    }
-}
-
-void Bucket::push_back(t_heap* hptr) {
-    VTR_ASSERT(outstanding_items_ > 0);
-    outstanding_items_ -= 1;
-
-    float cost = hptr->cost;
-    if (!std::isfinite(cost)) {
-        BucketItem* item = reinterpret_cast<BucketItem*>(hptr);
-        items_.free_item(item);
-        return;
-    }
-
-    if (!min_push_cost_.empty()) {
-        if (hptr->cost > min_push_cost_[size_t(hptr->index)]) {
-            BucketItem* item = reinterpret_cast<BucketItem*>(hptr);
-            items_.free_item(item);
-            return;
-        }
-
-        min_push_cost_[size_t(hptr->index)] = hptr->cost;
-    }
-
-    // Check to see if the range of costs observed by the heap has changed.
-    bool check_scale = false;
-
-    // Exclude 0 cost from min_cost to provide useful scaling factor.
-    if (cost < min_cost_ && cost > 0) {
-        min_cost_ = cost;
-        check_scale = true;
-    }
-    if (cost > max_cost_) {
-        max_cost_ = cost;
-        check_scale = true;
-    }
-
-    // Rescale the number and size of buckets if needed based on the new
-    // cost range.
-    if (check_scale) {
-        check_scaling();
-    }
-
-    // Which bucket should this go into?
-    auto int_cost = cost_to_int(cost);
-
-    if (int_cost < 0) {
-        VTR_LOG_WARN("Cost is negative? cost = %g, bucket = %d\n", cost, int_cost);
-        int_cost = 0;
-    }
-
-    size_t uint_cost = int_cost;
-
-    // Is that bucket allocated?
-    if (uint_cost >= heap_size_) {
-        // Not enough buckets!
-        expand(uint_cost);
-    }
-
-    // Insert into bucket
-    auto* prev = heap_[uint_cost];
-
-    // Static assert ensures that BucketItem::item is at offset 0,
-    // so this cast is safe.
-    BucketItem* item = reinterpret_cast<BucketItem*>(hptr);
-
-    if (front_head_ == uint_cost) {
-        VTR_ASSERT(prev != nullptr);
-        front_list_.back()->next_bucket = item;
-        item->next_bucket = nullptr;
-        front_list_.push_back(item);
-    } else {
-        // Otherwise just add to front list.
-        item->next_bucket = prev;
-        heap_[uint_cost] = item;
-    }
-
-    if (uint_cost < heap_head_) {
-        heap_head_ = uint_cost;
-    }
-    if (uint_cost > heap_tail_) {
-        heap_tail_ = uint_cost;
-    }
-
-    num_items_ += 1;
-    if (num_items_ > prune_limit_) {
-        prune_heap();
-    }
-}
-
-t_heap* Bucket::get_heap_head() {
-    auto heap_head = heap_head_;
-    auto heap_tail = heap_tail_;
-    BucketItem** heap = heap_;
-
-    // Check empty
-    if (heap_head == std::numeric_limits<size_t>::max()) {
-        return nullptr;
-    }
-
-    if (front_head_ != heap_head) {
-        front_list_.clear();
-        for (BucketItem* item = heap[heap_head]; item != nullptr; item = item->next_bucket) {
-            front_list_.push_back(item);
-            VTR_ASSERT(front_list_.size() <= num_items_);
-        }
-
-        // If the front bucket is more than kIncreaseFocusLimit, then change
-        // the division scaling to attempt to shrink the front bucket size.
-        //
-        // kMaxMaxBuckets prevents this scaling from continuing without limit.
-        if (front_list_.size() > kIncreaseFocusLimit && max_buckets_ < kMaxMaxBuckets) {
-            division_scaling_ *= 2;
-            max_buckets_ *= 2;
-            rescale();
-            return get_heap_head();
-        }
-        VTR_ASSERT(!front_list_.empty());
-        front_head_ = heap_head;
-        VTR_ASSERT_DEBUG(check_front_list());
-    }
-
-    // Find first non-empty bucket
-
-    // Randomly remove element
-    size_t count = fast_rand() % front_list_.size();
-    BucketItem* item = front_list_[count];
-
-    // If the element is the back of the list, just remove it.
-    if (count + 1 == front_list_.size()) {
-        if (front_list_.size() > 1) {
-            // Stitch into list.
-            front_list_[count - 1]->next_bucket = nullptr;
-        } else {
-            // List is now empty.
-            heap[heap_head] = nullptr;
-        }
-    } else {
-        // This is not the back element, so swap the element we are popping
-        // with the back element, then remove it.
-        BucketItem* swap = front_list_.back();
-        if (front_list_.size() > 2) {
-            front_list_[front_list_.size() - 2]->next_bucket = nullptr;
-        }
-
-        // Update the front_list_
-        front_list_[count] = swap;
-
-        if (count == 0) {
-            // Swap this element to the front of the list.
-            heap[heap_head] = swap;
-        } else {
-            // Stitch this element back into the list
-            front_list_[count - 1]->next_bucket = swap;
-        }
-
-        swap->next_bucket = item->next_bucket;
-    }
-
-    front_list_.pop_back();
-
-    VTR_ASSERT_DEBUG(check_front_list());
-
-    // Update first non-empty bucket if bucket is now empty
-    if (heap[heap_head] == nullptr) {
-        heap_head += 1;
-        while (heap_head <= heap_tail && heap[heap_head] == nullptr) {
-            heap_head += 1;
-        }
-
-        if (heap_head > heap_tail) {
-            heap_head = std::numeric_limits<size_t>::max();
-        }
-
-        heap_head_ = heap_head;
-        front_head_ = std::numeric_limits<size_t>::max();
-    }
-
-    outstanding_items_ += 1;
-    num_items_ -= 1;
-    return &item->item;
-}
-
-void Bucket::print() {
-    for (size_t i = heap_head_; i < heap_tail_; ++i) {
-        if (heap_[heap_head_] != nullptr) {
-            VTR_LOG("B:%d ", i);
-            for (auto* item = heap_[i]; item != nullptr; item = item->next_bucket) {
-                VTR_LOG(" %e", item->item.cost);
-            }
-        }
-    }
-    VTR_LOG("\n");
-}
-
-void Bucket::set_prune_limit(size_t max_index, size_t prune_limit) {
-    if (prune_limit != std::numeric_limits<size_t>::max()) {
-        VTR_ASSERT(max_index < prune_limit);
-    }
-    max_index_ = max_index;
-    prune_limit_ = prune_limit;
-}
-
-void Bucket::prune_heap() {
-    std::vector<BucketItem*> best_heap_item(max_index_, nullptr);
-
-    for (size_t bucket = heap_head_; bucket <= heap_tail_; ++bucket) {
-        for (BucketItem* item = heap_[bucket]; item != nullptr; item = item->next_bucket) {
-            auto idx = size_t(item->item.index);
-            VTR_ASSERT(idx < max_index_);
-            if (best_heap_item[idx] == nullptr
-                || best_heap_item[idx]->item.cost > item->item.cost) {
-                best_heap_item[idx] = item;
-            }
-        }
-    }
-
-    min_cost_ = std::numeric_limits<float>::max();
-    max_cost_ = std::numeric_limits<float>::min();
-    for (size_t bucket = heap_head_; bucket <= heap_tail_; ++bucket) {
-        BucketItem* item = heap_[bucket];
-        while (item != nullptr) {
-            BucketItem* next_item = item->next_bucket;
-            auto idx = size_t(item->item.index);
-
-            if (best_heap_item[idx] != item) {
-                // This item isn't the cheapest, return it to the free list.
-                items_.free_item(item);
-            } else {
-                // Update min_cost_ and max_cost_
-                if (min_cost_ > item->item.cost) {
-                    min_cost_ = item->item.cost;
-                }
-                if (max_cost_ < item->item.cost) {
-                    max_cost_ = item->item.cost;
-                }
-            }
-
-            item = next_item;
-        }
-    }
-
-    // Rescale heap after pruning.
-    conv_factor_ = rescale_func();
-    check_conv_factor();
-
-    std::fill(heap_, heap_ + heap_size_, nullptr);
-    heap_head_ = std::numeric_limits<size_t>::max();
-    front_head_ = std::numeric_limits<size_t>::max();
-    front_list_.clear();
-    heap_tail_ = 0;
-    num_items_ = 0;
-    prune_count_ += 1;
-
-    // Re-heap the pruned elements.
-    for (BucketItem* item : best_heap_item) {
-        if (item == nullptr) {
-            continue;
-        }
-
-        outstanding_items_ += 1;
-        push_back(&item->item);
-    }
-
-    verify();
-
-    if (prune_count_ >= 1) {
-        // If pruning is happening repeatedly, start pruning at entry.
-        min_push_cost_.resize(max_index_, std::numeric_limits<float>::infinity());
-    }
-}
-
-bool Bucket::check_front_list() const {
-    VTR_ASSERT(heap_head_ == front_head_);
-    size_t i = 0;
-    BucketItem* item = heap_[heap_head_];
-    while (item != nullptr) {
-        if (front_list_.at(i) != item) {
-            VTR_LOG(
-                "front_list_ (%p size %zu) [%zu] %p != item %p\n",
-                front_list_.data(), front_list_.size(), i, front_list_[i], item);
-            VTR_ASSERT(front_list_[i] == item);
-        }
-        i += 1;
-        item = item->next_bucket;
-    }
-    return false;
-}
diff --git a/vpr/src/route/bucket.h b/vpr/src/route/bucket.h
deleted file mode 100644
index b712d54eb7b..00000000000
--- a/vpr/src/route/bucket.h
+++ /dev/null
@@ -1,307 +0,0 @@
-#ifndef _BUCKET_H
-#define _BUCKET_H
-
-#include <vector>
-
-#include "heap_type.h"
-#include "vtr_log.h"
-
-struct BucketItem {
-    t_heap item;
-    BucketItem* next_bucket;
-};
-
-// Allocator for t_heap items.
-//
-// This allocator supports fast clearing by maintaining an explicit object
-// pool and a free list.
-//
-// The object pool maintained in heap_items_.  Whenever a new object is
-// created from the chunk allocator heap_ch_ it is added to heap_items_.
-//
-// When a client of BucketItems requests an objet, BucketItems first checks
-// if there are any objects in the object pool that have not been allocated
-// to the client (alloced_items_ < heap_items_.size()).  If there are objects
-// in the object pool that have not been alloced, these are use first.
-//
-// Once all objects from the object pool have been released, future allocations
-// come from the free list (maintained in heap_free_head_).  When the free list
-// is empty, only then is a new item allocated from the chunk allocator.
-//
-// BucketItems::clear provides a fast way to reset the object pool under the
-// assumption that no live references exists.  It does this by mark the free
-// list as empty and the object pool as being fully returned to BucketItems.
-// This operation is extremely fast compared with putting all elements back
-// onto the free list, as it only involves setting 3 values.
-//
-// This faster clear **requires** that all previous references to t_heap objects
-// are dropped prior to calling clear, otherwise a silent use-after-free issue
-// may occur. However because BucketItems is used in conjunction with Bucket,
-// and the typical use case is for the heap to be fully emptied between
-// routing, this optimization is safe.
-//
-class BucketItems {
-  public:
-    BucketItems() noexcept;
-
-    // Returns all allocated items to be available for allocation.
-    //
-    // This operation is only safe if all outstanding references are discarded.
-    // This is true when the router is starting on a new net, as all outstanding
-    // items should in the bucket will be cleared at the start of routing.
-    void clear() {
-        heap_free_head_ = nullptr;
-        num_heap_allocated_ = 0;
-        alloced_items_ = 0;
-    }
-
-    // Iterators over all items ever allocated.  This is not the list of alive
-    // items, but can be used for fast invalidation if needed.
-    std::vector<BucketItem*>::iterator begin() {
-        return heap_items_.begin();
-    }
-    std::vector<BucketItem*>::iterator end() {
-        return heap_items_.end();
-    }
-
-    // Deallocate all items.  Outstanding references to items will become
-    // invalid.
-    void free() {
-        // Free each individual heap item.
-        for (auto* item : heap_items_) {
-            vtr::chunk_delete(item, &heap_ch_);
-        }
-        heap_items_.clear();
-
-        /*free the memory chunks that were used by heap and linked f pointer */
-        free_chunk_memory(&heap_ch_);
-    }
-
-    // Allocate an item.  This may cause a dynamic allocation if no previously
-    // allocated items are available.
-    BucketItem* alloc_item() {
-        BucketItem* temp_ptr;
-        if (alloced_items_ < heap_items_.size()) {
-            // Return an unused object from the object pool.
-            temp_ptr = heap_items_[alloced_items_++];
-        } else {
-            if (heap_free_head_ == nullptr) { /* No elements on the free list */
-                heap_free_head_ = vtr::chunk_new<BucketItem>(&heap_ch_);
-                heap_free_head_->next_bucket = nullptr;
-                heap_items_.push_back(heap_free_head_);
-                alloced_items_ += 1;
-            }
-
-            temp_ptr = heap_free_head_;
-            heap_free_head_ = heap_free_head_->next_bucket;
-        }
-
-        num_heap_allocated_++;
-
-        return temp_ptr;
-    }
-
-    // Return a free'd item to be reallocated.
-    void free_item(BucketItem* hptr) {
-        hptr->next_bucket = heap_free_head_;
-        heap_free_head_ = hptr;
-        num_heap_allocated_--;
-    }
-
-    // Number of outstanding allocations.
-    int num_heap_allocated() {
-        return num_heap_allocated_;
-    }
-
-  private:
-    /* Vector of all items ever allocated. Used for full item iteration and
-     * for reuse after a `clear` invocation. */
-    std::vector<BucketItem*> heap_items_;
-
-    /* Tracks how many items from heap_items_ are in use. */
-    size_t alloced_items_;
-
-    /* Number of outstanding allocated items. */
-    int num_heap_allocated_;
-
-    /* For managing my own list of currently free heap data structures. */
-    BucketItem* heap_free_head_;
-
-    /* For keeping track of the sudo malloc memory for the heap*/
-    vtr::t_chunk heap_ch_;
-};
-
-// Prority queue approximation using cost buckets and randomization.
-//
-// The cost buckets are each a linked lists for costs at kDefaultConvFactor
-// intervals. Given that cost is approximately delay, each bucket contains ~1
-// picosecond (1e12) worth of items.
-//
-// Items are pushed into the linked list that matches their cost [0, 1)
-// picosecond.  When popping the Bucket, a random item in the cheapest bucket
-// with items is returned.  This randomization exists to prevent the router
-// from following identical paths when operating with identical costs.
-// Consider two parallel paths to a node.
-//
-// To ensure that number of buckets do not get too large, whenever is element
-// is added to the heap, the number of buckets required is checked.  If more
-// than 100k buckets are required, then the width of the buckets (conv_factor_)
-// are rescaled such that ~50k buckets are required.
-//
-// Important node: This approximation makes some assumptions about the
-// structure of costs.
-//
-// Assumptions:
-//  1. 0 is the minimum cost
-//  2. Costs that are different by 0.1 % of the maximum cost are effectively
-//     equivilant
-//  3. The cost function is roughly linear.
-//
-class Bucket : public HeapInterface {
-  public:
-    Bucket() noexcept;
-    ~Bucket();
-
-    t_heap* alloc() final {
-        outstanding_items_ += 1;
-        t_heap* hptr = &items_.alloc_item()->item;
-        return hptr;
-    }
-    void free(t_heap* hptr) final {
-        // Static assert ensures that BucketItem::item is at offset 0,
-        // so this cast is safe.
-        outstanding_items_ -= 1;
-        items_.free_item(reinterpret_cast<BucketItem*>(hptr));
-    }
-
-    // Allocate initial buckets for items.
-    void init_heap(const DeviceGrid& grid) final;
-
-    // Deallocate memory for buckets.
-    void free_all_memory() final;
-
-    // Empties all buckets of items.
-    //
-    // This does NOT call BucketItems::free_item on contained items.  The
-    // assumption is that when Bucket::clear is called, BucketItems::clear
-    // is also called.
-    void empty_heap() final;
-
-    // Push an item onto a bucket.
-    void push_back(t_heap* hptr) final;
-
-    void add_to_heap(t_heap* hptr) final {
-        push_back(hptr);
-    }
-
-    void build_heap() final {
-    }
-
-    void set_prune_limit(size_t max_index, size_t prune_limit) final;
-
-    // Pop an item from the cheapest non-empty bucket.
-    //
-    // Returns nullptr if empty.
-    t_heap* get_heap_head() final;
-
-    // True if all buckets are empty.
-    bool is_empty_heap() const final {
-        return heap_head_ == std::numeric_limits<size_t>::max();
-    }
-
-    bool is_valid() const final {
-        return true;
-    }
-
-    // Sanity check state of buckets (e.g. all items within each bucket have
-    // a cost that matches their bucket index.
-    void verify();
-
-    // Print items contained in buckets.
-    void print();
-
-  private:
-    // Factor used to convert cost from float to int.  Should be scaled to
-    // enable sufficent precision in bucketting.
-    static constexpr float kDefaultConvFactor = 1e12;
-
-    // Convert cost from float to integer bucket id.
-    int cost_to_int(float cost) const {
-        return (int)(cost * conv_factor_);
-    }
-
-    // Simple fast random function used for randomizing item selection on pop.
-    size_t fast_rand() {
-        seed_ = (0x234ab32a1 * seed_) ^ (0x12acbade);
-        return seed_;
-    }
-
-    void check_scaling();
-    void rescale();
-    float rescale_func() const;
-    void check_conv_factor() const;
-    bool check_front_list() const;
-
-    // Expand the number of buckets.
-    //
-    // Only call if insufficient buckets exist.
-    void expand(size_t required_number_of_buckets);
-
-    void prune_heap();
-
-    BucketItems items_; /* Item storage */
-
-    /* Number of t_heap objects alloc'd but not returned to Bucket.
-     * Used to verify that clearing is safe. */
-    ssize_t outstanding_items_;
-
-    size_t seed_; /* Seed for fast_rand, should be non-zero */
-
-    BucketItem** heap_;      /* Buckets for linked lists*/
-    size_t heap_size_;       /* Number of buckets */
-    size_t heap_head_;       /* First non-empty bucket */
-    size_t heap_tail_;       /* Last non-empty bucket */
-    float conv_factor_;      /* Cost bucket scaling factor.
-                              *
-                              * Larger conv_factor_ means each bucket is
-                              * smaller.
-                              *
-                              * bucket index = cost * conv_factor_
-                              *
-                              */
-    float division_scaling_; /* Scaling factor used during rescaling.
-                              * Larger division scaling results in larger
-                              * conversion factor.
-                              */
-    ssize_t max_buckets_;    /* Maximum number of buckets to control when to
-                              * rescale.
-                              */
-
-    float min_cost_; /* Smallest cost seen */
-    float max_cost_; /* Largest cost seen */
-
-    size_t num_items_;                 /* Number of items in the bucket heap. */
-    size_t max_index_;                 /* Maximum value for index. */
-    size_t prune_limit_;               /* Maximum number of elements this bucket heap should
-                                        * have before the heap self compacts.
-                                        */
-    size_t prune_count_;               /* The number of times the bucket heap has self
-                                        * compacted.
-                                        */
-    std::vector<float> min_push_cost_; /* Lowest push cost for each index.
-                                        * Only used if the bucket has
-                                        * self-pruned.
-                                        */
-
-    /* In order to quickly randomly pop an element from the front bucket,
-     * a list of items is made.
-     *
-     * front_head_ points to the heap_ index this array was constructed from.
-     * If front_head_ is size_t::max or doesn't equal heap_head_, front_list_
-     * needs to be re-computed.
-     * */
-    size_t front_head_;
-    std::vector<BucketItem*> front_list_;
-};
-
-#endif /* _BUCKET_H */
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 210783648ad..23fedf6c262 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -2,9 +2,6 @@
 
 #include <algorithm>
 #include "rr_graph.h"
-#include "binary_heap.h"
-#include "four_ary_heap.h"
-#include "bucket.h"
 #include "rr_graph_fwd.h"
 
 static bool relevant_node_to_target(const RRGraphView* rr_graph,
@@ -25,7 +22,7 @@ static void update_router_stats(RouterStats* router_stats,
 
 /** return tuple <found_path, retry_with_full_bb, cheapest> */
 template<typename Heap>
-std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_connection_from_route_tree(
+std::tuple<bool, bool, RTExploredNode> ConnectionRouter<Heap>::timing_driven_route_connection_from_route_tree(
     const RouteTreeNode& rt_root,
     RRNodeId sink_node,
     const t_conn_cost_params& cost_params,
@@ -36,28 +33,33 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
     conn_params_ = &conn_params;
 
     bool retry = false;
-    t_heap* cheapest;
-    std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box);
-
-    if (cheapest != nullptr) {
-        rcv_path_manager.update_route_tree_set(cheapest->path_data);
-        update_cheapest(cheapest);
-        t_heap out = *cheapest;
-        heap_.free(cheapest);
+    retry = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box);
+
+    if (!std::isinf(rr_node_route_inf_[sink_node].path_cost)) {
+        // Only the `index`, `prev_edge`, and `rcv_path_backward_delay` fields of `out`
+        // are used after this function returns.
+        RTExploredNode out;
+        out.index = sink_node;
+        out.prev_edge = rr_node_route_inf_[sink_node].prev_edge;
+        if (rcv_path_manager.is_enabled()) {
+            out.rcv_path_backward_delay = rcv_path_data[sink_node]->backward_delay;
+            rcv_path_manager.update_route_tree_set(rcv_path_data[sink_node]);
+            rcv_path_manager.empty_heap();
+        }
         heap_.empty_heap();
-        rcv_path_manager.empty_heap();
         return std::make_tuple(true, /*retry=*/false, out);
     } else {
         reset_path_costs();
-        modified_rr_node_inf_.clear();
+        clear_modified_rr_node_info();
         heap_.empty_heap();
-        return std::make_tuple(false, retry, t_heap());
+        rcv_path_manager.empty_heap();
+        return std::make_tuple(false, retry, RTExploredNode());
     }
 }
 
-/** Return <retry with full bb?, cheapest> */
+/** Return whether to retry with full bb */
 template<typename Heap>
-std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection_common_setup(
+bool ConnectionRouter<Heap>::timing_driven_route_connection_common_setup(
     const RouteTreeNode& rt_root,
     RRNodeId sink_node,
     const t_conn_cost_params& cost_params,
@@ -72,18 +74,18 @@ std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection
 
     if (heap_.is_empty_heap()) {
         VTR_LOG("No source in route tree: %s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str());
-        return std::make_tuple(false, nullptr);
+        return false;
     }
 
     VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as normal net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node,
                    bounding_box.layer_min, bounding_box.xmin, bounding_box.ymin,
                    bounding_box.layer_max, bounding_box.xmax, bounding_box.ymax);
 
-    t_heap* cheapest = timing_driven_route_connection_from_heap(sink_node,
-                                                                cost_params,
-                                                                bounding_box);
+    timing_driven_route_connection_from_heap(sink_node,
+                                             cost_params,
+                                             bounding_box);
 
-    if (cheapest == nullptr) {
+    if (std::isinf(rr_node_route_inf_[sink_node].path_cost)) {
         // No path found within the current bounding box.
         //
         // If the bounding box is already max size, just fail
@@ -94,15 +96,15 @@ std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection
             && bounding_box.layer_min == 0
             && bounding_box.layer_max == (int)(grid_.get_num_layers() - 1)) {
             VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str());
-            return std::make_tuple(false, nullptr);
+            return false;
         }
 
         // Otherwise, leave unrouted and bubble up a signal to retry this net with a full-device bounding box
         VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry later\n", sink_node);
-        return std::make_tuple(true, nullptr);
+        return true;
     }
 
-    return std::make_tuple(false, cheapest);
+    return false;
 }
 
 // Finds a path from the route tree rooted at rt_root to sink_node for a high fanout net.
@@ -111,7 +113,7 @@ std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection
 // which is spatially close to the sink is added to the heap.
 // Returns a  tuple of <found_path?, retry_with_full_bb?, cheapest> */
 template<typename Heap>
-std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_connection_from_route_tree_high_fanout(
+std::tuple<bool, bool, RTExploredNode> ConnectionRouter<Heap>::timing_driven_route_connection_from_route_tree_high_fanout(
     const RouteTreeNode& rt_root,
     RRNodeId sink_node,
     const t_conn_cost_params& cost_params,
@@ -131,7 +133,7 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
 
     if (heap_.is_empty_heap()) {
         VTR_LOG("No source in route tree: %s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str());
-        return std::make_tuple(false, false, t_heap());
+        return std::make_tuple(false, false, RTExploredNode());
     }
 
     VTR_LOGV_DEBUG(router_debug_, "  Routing to %d as high fanout net (BB: %d,%d,%d x %d,%d,%d)\n", sink_node,
@@ -139,12 +141,11 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
                    high_fanout_bb.layer_max, high_fanout_bb.xmax, high_fanout_bb.ymax);
 
     bool retry_with_full_bb = false;
-    t_heap* cheapest;
-    cheapest = timing_driven_route_connection_from_heap(sink_node,
-                                                        cost_params,
-                                                        high_fanout_bb);
+    timing_driven_route_connection_from_heap(sink_node,
+                                             cost_params,
+                                             high_fanout_bb);
 
-    if (cheapest == nullptr) {
+    if (std::isinf(rr_node_route_inf_[sink_node].path_cost)) {
         //Found no path, that may be due to an unlucky choice of existing route tree sub-set,
         //try again with the full route tree to be sure this is not an artifact of high-fanout routing
         VTR_LOG_WARN("No routing path found in high-fanout mode for net %zu connection (to sink_rr %d), retrying with full route tree\n", size_t(conn_params.net_id_), sink_node);
@@ -152,42 +153,41 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
         //Reset any previously recorded node costs so timing_driven_route_connection()
         //starts over from scratch.
         reset_path_costs();
-        modified_rr_node_inf_.clear();
+        clear_modified_rr_node_info();
 
-        std::tie(retry_with_full_bb, cheapest) = timing_driven_route_connection_common_setup(rt_root,
-                                                                                             sink_node,
-                                                                                             cost_params,
-                                                                                             net_bounding_box);
+        retry_with_full_bb = timing_driven_route_connection_common_setup(rt_root,
+                                                                         sink_node,
+                                                                         cost_params,
+                                                                         net_bounding_box);
     }
 
-    if (cheapest == nullptr) {
+    if (std::isinf(rr_node_route_inf_[sink_node].path_cost)) {
         VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str());
 
         heap_.empty_heap();
         rcv_path_manager.empty_heap();
-        return std::make_tuple(false, retry_with_full_bb, t_heap());
+        return std::make_tuple(false, retry_with_full_bb, RTExploredNode());
     }
 
-    rcv_path_manager.update_route_tree_set(cheapest->path_data);
-    update_cheapest(cheapest);
-
-    t_heap out = *cheapest;
-    heap_.free(cheapest);
+    RTExploredNode out;
+    out.index = sink_node;
+    out.prev_edge = rr_node_route_inf_[sink_node].prev_edge;
+    if (rcv_path_manager.is_enabled()) {
+        out.rcv_path_backward_delay = rcv_path_data[sink_node]->backward_delay;
+        rcv_path_manager.update_route_tree_set(rcv_path_data[sink_node]);
+        rcv_path_manager.empty_heap();
+    }
     heap_.empty_heap();
-    rcv_path_manager.empty_heap();
 
     return std::make_tuple(true, retry_with_full_bb, out);
 }
 
-//Finds a path to sink_node, starting from the elements currently in the heap.
-//
+// Finds a path to sink_node, starting from the elements currently in the heap.
 // This is the core maze routing routine.
-//
-// Returns either the last element of the path, or nullptr if no path is found
 template<typename Heap>
-t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeId sink_node,
-                                                                         const t_conn_cost_params& cost_params,
-                                                                         const t_bb& bounding_box) {
+void ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeId sink_node,
+                                                                      const t_conn_cost_params& cost_params,
+                                                                      const t_bb& bounding_box) {
     VTR_ASSERT_SAFE(heap_.is_valid());
 
     if (heap_.is_empty_heap()) { //No source
@@ -220,18 +220,20 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeI
     target_bb.layer_min = rr_graph_->node_layer(RRNodeId(sink_node));
     target_bb.layer_max = rr_graph_->node_layer(RRNodeId(sink_node));
 
-    t_heap* cheapest = nullptr;
-    while (!heap_.is_empty_heap()) {
-        // cheapest t_heap in current route tree to be expanded on
-        cheapest = heap_.get_heap_head();
+    // Start measuring path search time
+    std::chrono::steady_clock::time_point begin_time = std::chrono::steady_clock::now();
+
+    HeapNode cheapest;
+    while (heap_.try_pop(cheapest)) {
+        // inode with cheapest total cost in current route tree to be expanded on
+        const auto& [ new_total_cost, inode ] = cheapest;
         update_router_stats(router_stats_,
                             false,
-                            cheapest->index,
+                            inode,
                             rr_graph_);
 
-        RRNodeId inode = cheapest->index;
         VTR_LOGV_DEBUG(router_debug_, "  Popping node %d (cost: %g)\n",
-                       inode, cheapest->cost);
+                       inode, new_total_cost);
 
         // Have we found the target?
         if (inode == sink_node) {
@@ -239,40 +241,32 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeI
             // This is then placed into the traceback so that the correct path is returned
             // TODO: This can be eliminated by modifying the actual traceback function in route_timing
             if (rcv_path_manager.is_enabled()) {
-                rcv_path_manager.insert_backwards_path_into_traceback(cheapest->path_data, cheapest->cost, cheapest->backward_path_cost, route_ctx);
+                rcv_path_manager.insert_backwards_path_into_traceback(rcv_path_data[inode],
+                                                                      rr_node_route_inf_[inode].path_cost,
+                                                                      rr_node_route_inf_[inode].backward_path_cost,
+                                                                      route_ctx);
             }
             VTR_LOGV_DEBUG(router_debug_, "  Found target %8d (%s)\n", inode, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, inode, is_flat_).c_str());
             break;
         }
 
         // If not, keep searching
-        timing_driven_expand_cheapest(cheapest,
+        timing_driven_expand_cheapest(inode,
+                                      new_total_cost,
                                       sink_node,
                                       cost_params,
                                       bounding_box,
                                       target_bb);
-
-        rcv_path_manager.free_path_struct(cheapest->path_data);
-        heap_.free(cheapest);
-        cheapest = nullptr;
     }
 
-    if (router_debug_) {
-        //Update known path costs for nodes pushed but not popped, useful for debugging
-        empty_heap_annotating_node_route_inf();
-    }
-
-    if (cheapest == nullptr) { /* Impossible routing.  No path for net. */
-        VTR_LOGV_DEBUG(router_debug_, "  Empty heap (no path found)\n");
-        return nullptr;
-    }
-
-    return cheapest;
+    // Stop measuring path search time
+    std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
+    path_search_cumulative_time += std::chrono::duration_cast<std::chrono::microseconds>(end_time - begin_time);
 }
 
 // Find shortest paths from specified route tree to all nodes in the RR graph
 template<typename Heap>
-vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_shortest_paths_from_route_tree(
+vtr::vector<RRNodeId, RTExploredNode> ConnectionRouter<Heap>::timing_driven_find_all_shortest_paths_from_route_tree(
     const RouteTreeNode& rt_root,
     const t_conn_cost_params& cost_params,
     const t_bb& bounding_box,
@@ -296,14 +290,11 @@ vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_sho
 //
 // Since there is no single *target* node this uses Dijkstra's algorithm
 // with a modified exit condition (runs until heap is empty).
-//
-// Note that to re-use code used for the regular A*-based router we use a
-// no-operation lookahead which always returns zero.
 template<typename Heap>
-vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_shortest_paths_from_heap(
+vtr::vector<RRNodeId, RTExploredNode> ConnectionRouter<Heap>::timing_driven_find_all_shortest_paths_from_heap(
     const t_conn_cost_params& cost_params,
     const t_bb& bounding_box) {
-    vtr::vector<RRNodeId, t_heap> cheapest_paths(rr_nodes_.size());
+    vtr::vector<RRNodeId, RTExploredNode> cheapest_paths(rr_nodes_.size());
 
     VTR_ASSERT_SAFE(heap_.is_valid());
 
@@ -311,17 +302,20 @@ vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_sho
         VTR_LOGV_DEBUG(router_debug_, "  Initial heap empty (no source)\n");
     }
 
-    while (!heap_.is_empty_heap()) {
-        // cheapest t_heap in current route tree to be expanded on
-        t_heap* cheapest = heap_.get_heap_head();
+    // Start measuring path search time
+    std::chrono::steady_clock::time_point begin_time = std::chrono::steady_clock::now();
+
+    HeapNode cheapest;
+    while (heap_.try_pop(cheapest)) {
+        // inode with cheapest total cost in current route tree to be expanded on
+        const auto& [ new_total_cost, inode ] = cheapest;
         update_router_stats(router_stats_,
                             false,
-                            cheapest->index,
+                            inode,
                             rr_graph_);
 
-        RRNodeId inode = cheapest->index;
         VTR_LOGV_DEBUG(router_debug_, "  Popping node %d (cost: %g)\n",
-                       inode, cheapest->cost);
+                       inode, new_total_cost);
 
         // Since we want to find shortest paths to all nodes in the graph
         // we do not specify a target node.
@@ -330,78 +324,80 @@ vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_sho
         // lookahead we can re-use the node exploration code from the regular router
         RRNodeId target_node = RRNodeId::INVALID();
 
-        timing_driven_expand_cheapest(cheapest,
+        timing_driven_expand_cheapest(inode,
+                                      new_total_cost,
                                       target_node,
                                       cost_params,
                                       bounding_box,
                                       t_bb());
 
-        if (cheapest_paths[inode].index == RRNodeId::INVALID() || cheapest_paths[inode].cost >= cheapest->cost) {
-            VTR_LOGV_DEBUG(router_debug_, "  Better cost to node %d: %g (was %g)\n", inode, cheapest->cost, cheapest_paths[inode].cost);
-            cheapest_paths[inode] = *cheapest;
+        if (cheapest_paths[inode].index == RRNodeId::INVALID() || cheapest_paths[inode].total_cost >= new_total_cost) {
+            VTR_LOGV_DEBUG(router_debug_, "  Better cost to node %d: %g (was %g)\n", inode, new_total_cost, cheapest_paths[inode].total_cost);
+            // Only the `index` and `prev_edge` fields of `cheapest_paths[inode]` are used after this function returns
+            cheapest_paths[inode].index = inode;
+            cheapest_paths[inode].prev_edge = rr_node_route_inf_[inode].prev_edge;
         } else {
-            VTR_LOGV_DEBUG(router_debug_, "  Worse cost to node %d: %g (better %g)\n", inode, cheapest->cost, cheapest_paths[inode].cost);
+            VTR_LOGV_DEBUG(router_debug_, "  Worse cost to node %d: %g (better %g)\n", inode, new_total_cost, cheapest_paths[inode].total_cost);
         }
-
-        rcv_path_manager.free_path_struct(cheapest->path_data);
-        heap_.free(cheapest);
     }
 
+    // Stop measuring path search time
+    std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();
+    path_search_cumulative_time += std::chrono::duration_cast<std::chrono::microseconds>(end_time - begin_time);
+
     return cheapest_paths;
 }
 
 template<typename Heap>
-void ConnectionRouter<Heap>::timing_driven_expand_cheapest(t_heap* cheapest,
+void ConnectionRouter<Heap>::timing_driven_expand_cheapest(RRNodeId from_node,
+                                                           float new_total_cost,
                                                            RRNodeId target_node,
                                                            const t_conn_cost_params& cost_params,
                                                            const t_bb& bounding_box,
                                                            const t_bb& target_bb) {
-    RRNodeId inode = cheapest->index;
-
-    t_rr_node_route_inf* route_inf = &rr_node_route_inf_[inode];
-    float best_total_cost = route_inf->path_cost;
-    float best_back_cost = route_inf->backward_path_cost;
-
-    float new_total_cost = cheapest->cost;
-    float new_back_cost = cheapest->backward_path_cost;
-
-    /* I only re-expand a node if both the "known" backward cost is lower  *
-     * in the new expansion (this is necessary to prevent loops from       *
-     * forming in the routing and causing havoc) *and* the expected total  *
-     * cost to the sink is lower than the old value.  Different R_upstream *
-     * values could make a path with lower back_path_cost less desirable   *
-     * than one with higher cost.  Test whether or not I should disallow   *
-     * re-expansion based on a higher total cost.                          */
-
-    if (best_total_cost > new_total_cost && ((rcv_path_manager.is_enabled()) || best_back_cost > new_back_cost)) {
-        // Explore from this node, since the current/new partial path has the best cost
-        // found so far
-        VTR_LOGV_DEBUG(router_debug_, "    Better cost to %d\n", inode);
+    float best_total_cost = rr_node_route_inf_[from_node].path_cost;
+    if (best_total_cost == new_total_cost) {
+        // Explore from this node, since its total cost is exactly the same as
+        // the best total cost ever seen for this node. Otherwise, prune this node
+        // to reduce redundant work (i.e., unnecessary neighbor exploration).
+        // `new_total_cost` is used here as an identifier to detect if the pair
+        // (from_node or inode, new_total_cost) was the most recently pushed
+        // element for the corresponding node.
+        //
+        // Note: For RCV, it often isn't searching for a shortest path; it is
+        // searching for a path in the target delay range. So it might find a
+        // path to node n that has a higher `backward_path_cost` but the `total_cost`
+        // (including expected delay to sink, going through a cost function that
+        // checks that against the target delay) might be lower than the previously
+        // stored value. In that case we want to re-expand the node so long as
+        // it doesn't create a loop. That `rcv_path_manager` should store enough
+        // info for us to avoid loops.
+        RTExploredNode current;
+        current.index = from_node;
+        current.backward_path_cost = rr_node_route_inf_[from_node].backward_path_cost;
+        current.prev_edge = rr_node_route_inf_[from_node].prev_edge;
+        current.R_upstream = rr_node_route_inf_[from_node].R_upstream;
+
+        VTR_LOGV_DEBUG(router_debug_, "    Better cost to %d\n", from_node);
         VTR_LOGV_DEBUG(router_debug_, "    New total cost: %g\n", new_total_cost);
-        VTR_LOGV_DEBUG(router_debug_, "    New back cost: %g\n", new_back_cost);
-        VTR_LOGV_DEBUG(router_debug_ && (cheapest->prev_edge() != RREdgeId::INVALID()), 
-                    "      Setting path costs for associated node %d (from %d edge %zu)\n",
-                    cheapest->index,
-                    static_cast<size_t>(rr_graph_->edge_src_node(cheapest->prev_edge())),
-                    static_cast<size_t>(cheapest->prev_edge()));
+        VTR_LOGV_DEBUG(router_debug_ && (current.prev_edge != RREdgeId::INVALID()),
+                       "      Setting path costs for associated node %d (from %d edge %zu)\n",
+                       from_node,
+                       static_cast<size_t>(rr_graph_->edge_src_node(current.prev_edge)),
+                       static_cast<size_t>(current.prev_edge));
 
-        update_cheapest(cheapest, route_inf);
-
-        timing_driven_expand_neighbours(cheapest, cost_params, bounding_box,
-                                        target_node, target_bb);
+        timing_driven_expand_neighbours(current, cost_params, bounding_box, target_node, target_bb);
     } else {
         // Post-heap prune, do not re-explore from the current/new partial path as it
         // has worse cost than the best partial path to this node found so far
-        VTR_LOGV_DEBUG(router_debug_, "    Worse cost to %d\n", inode);
+        VTR_LOGV_DEBUG(router_debug_, "    Worse cost to %d\n", from_node);
         VTR_LOGV_DEBUG(router_debug_, "    Old total cost: %g\n", best_total_cost);
-        VTR_LOGV_DEBUG(router_debug_, "    Old back cost: %g\n", best_back_cost);
         VTR_LOGV_DEBUG(router_debug_, "    New total cost: %g\n", new_total_cost);
-        VTR_LOGV_DEBUG(router_debug_, "    New back cost: %g\n", new_back_cost);
     }
 }
 
 template<typename Heap>
-void ConnectionRouter<Heap>::timing_driven_expand_neighbours(t_heap* current,
+void ConnectionRouter<Heap>::timing_driven_expand_neighbours(const RTExploredNode& current,
                                                              const t_conn_cost_params& cost_params,
                                                              const t_bb& bounding_box,
                                                              RRNodeId target_node,
@@ -409,8 +405,7 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbours(t_heap* current,
     /* Puts all the rr_nodes adjacent to current on the heap. */
 
     // For each node associated with the current heap element, expand all of it's neighbors
-    RRNodeId from_node = current->index;
-    auto edges = rr_nodes_.edge_range(from_node);
+    auto edges = rr_nodes_.edge_range(current.index);
 
     // This is a simple prefetch that prefetches:
     //  - RR node data reachable from this node
@@ -440,7 +435,6 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbours(t_heap* current,
     for (RREdgeId from_edge : edges) {
         RRNodeId to_node = rr_nodes_.edge_sink_node(from_edge);
         timing_driven_expand_neighbour(current,
-                                       from_node,
                                        from_edge,
                                        to_node,
                                        cost_params,
@@ -454,8 +448,7 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbours(t_heap* current,
 // RR nodes outside the expanded bounding box specified in bounding_box are not added
 // to the heap.
 template<typename Heap>
-void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
-                                                            RRNodeId from_node,
+void ConnectionRouter<Heap>::timing_driven_expand_neighbour(const RTExploredNode& current,
                                                             RREdgeId from_edge,
                                                             RRNodeId to_node,
                                                             const t_conn_cost_params& cost_params,
@@ -464,6 +457,8 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
                                                             const t_bb& target_bb) {
     VTR_ASSERT(bounding_box.layer_max < g_vpr_ctx.device().grid.get_num_layers());
 
+    const RRNodeId& from_node = current.index;
+
     // BB-pruning
     // Disable BB-pruning if RCV is enabled, as this can make it harder for circuits with high negative hold slack to resolve this
     // TODO: Only disable pruning if the net has negative hold slack, maybe go off budgets
@@ -522,14 +517,13 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
     // Other pruning methods have been disabled when RCV is on, so this method is required to prevent "loops" from being created
     bool node_exists = false;
     if (rcv_path_manager.is_enabled()) {
-        node_exists = rcv_path_manager.node_exists_in_tree(current->path_data,
+        node_exists = rcv_path_manager.node_exists_in_tree(rcv_path_data[from_node],
                                                            to_node);
     }
 
     if (!node_exists || !rcv_path_manager.is_enabled()) {
         timing_driven_add_to_heap(cost_params,
                                   current,
-                                  from_node,
                                   to_node,
                                   from_edge,
                                   target_node);
@@ -539,43 +533,53 @@ void ConnectionRouter<Heap>::timing_driven_expand_neighbour(t_heap* current,
 // Add to_node to the heap, and also add any nodes which are connected by non-configurable edges
 template<typename Heap>
 void ConnectionRouter<Heap>::timing_driven_add_to_heap(const t_conn_cost_params& cost_params,
-                                                       const t_heap* current,
-                                                       RRNodeId from_node,
+                                                       const RTExploredNode& current,
                                                        RRNodeId to_node,
                                                        const RREdgeId from_edge,
                                                        RRNodeId target_node) {
     const auto& device_ctx = g_vpr_ctx.device();
-    t_heap next;
+    const RRNodeId& from_node = current.index;
+
+    // Initialized to current
+    RTExploredNode next;
+    next.R_upstream = current.R_upstream;
+    next.index = to_node;
+    next.prev_edge = from_edge;
+    next.total_cost = std::numeric_limits<float>::infinity(); // Not used directly
+    next.backward_path_cost = current.backward_path_cost;
 
     // Initalize RCV data struct if needed, otherwise it's set to nullptr
     rcv_path_manager.alloc_path_struct(next.path_data);
-
-    // Costs initialized to current
-    next.cost = std::numeric_limits<float>::infinity(); //Not used directly
-    next.backward_path_cost = current->backward_path_cost;
-
     // path_data variables are initialized to current values
-    if (rcv_path_manager.is_enabled() && current->path_data) {
-        next.path_data->backward_cong = current->path_data->backward_cong;
-        next.path_data->backward_delay = current->path_data->backward_delay;
+    if (rcv_path_manager.is_enabled() && rcv_path_data[from_node]) {
+        next.path_data->backward_cong = rcv_path_data[from_node]->backward_cong;
+        next.path_data->backward_delay = rcv_path_data[from_node]->backward_delay;
     }
 
-    next.R_upstream = current->R_upstream;
-
     evaluate_timing_driven_node_costs(&next,
                                       cost_params,
                                       from_node,
-                                      to_node,
-                                      from_edge,
                                       target_node);
 
     float best_total_cost = rr_node_route_inf_[to_node].path_cost;
     float best_back_cost = rr_node_route_inf_[to_node].backward_path_cost;
 
-    float new_total_cost = next.cost;
+    float new_total_cost = next.total_cost;
     float new_back_cost = next.backward_path_cost;
 
-    if (new_total_cost < best_total_cost && ((rcv_path_manager.is_enabled()) || (new_back_cost < best_back_cost))) {
+    // We need to only expand this node if it is a better path. And we need to
+    // update its `rr_node_route_inf` data as we put it into the heap; there may
+    // be other (previously explored) paths to this node in the heap already,
+    // but they will be pruned when we pop those heap nodes later as we'll see
+    // they have inferior costs to what is in the `rr_node_route_inf` data for
+    // this node.
+    // FIXME: Adding a link to the FPT paper when it is public
+    //
+    // When RCV is enabled, prune based on the RCV-specific total path cost (see
+    // in `compute_node_cost_using_rcv` in `evaluate_timing_driven_node_costs`)
+    // to allow detours to get better QoR.
+    if ((!rcv_path_manager.is_enabled() && best_back_cost > new_back_cost) ||
+        (rcv_path_manager.is_enabled() && best_total_cost > new_total_cost)) {
         VTR_LOGV_DEBUG(router_debug_, "      Expanding to node %d (%s)\n", to_node,
                        describe_rr_node(device_ctx.rr_graph,
                                         device_ctx.grid,
@@ -589,26 +593,10 @@ void ConnectionRouter<Heap>::timing_driven_add_to_heap(const t_conn_cost_params&
         //
         //Pre-heap prune to keep the heap small, by not putting paths which are known to be
         //sub-optimal (at this point in time) into the heap.
-        t_heap* next_ptr = heap_.alloc();
-
-        // Use the already created next path structure pointer when RCV is enabled
-        if (rcv_path_manager.is_enabled()) rcv_path_manager.move(next_ptr->path_data, next.path_data);
-
-        //Record how we reached this node
-        next_ptr->cost = next.cost;
-        next_ptr->R_upstream = next.R_upstream;
-        next_ptr->backward_path_cost = next.backward_path_cost;
-        next_ptr->index = to_node;
-        next_ptr->set_prev_edge(from_edge);
-
-        if (rcv_path_manager.is_enabled() && current->path_data) {
-            next_ptr->path_data->path_rr = current->path_data->path_rr;
-            next_ptr->path_data->edge = current->path_data->edge;
-            next_ptr->path_data->path_rr.emplace_back(from_node);
-            next_ptr->path_data->edge.emplace_back(from_edge);
-        }
 
-        heap_.add_to_heap(next_ptr);
+        update_cheapest(next, from_node);
+
+        heap_.add_to_heap({new_total_cost, to_node});
         update_router_stats(router_stats_,
                             true,
                             to_node,
@@ -694,15 +682,16 @@ void ConnectionRouter<Heap>::empty_rcv_route_tree_set() {
 template<typename Heap>
 void ConnectionRouter<Heap>::set_rcv_enabled(bool enable) {
     rcv_path_manager.set_enabled(enable);
+    if (enable) {
+        rcv_path_data.resize(rr_node_route_inf_.size());
+    }
 }
 
-//Calculates the cost of reaching to_node
+//Calculates the cost of reaching to_node (i.e., to->index)
 template<typename Heap>
-void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
+void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(RTExploredNode* to,
                                                                const t_conn_cost_params& cost_params,
                                                                RRNodeId from_node,
-                                                               RRNodeId to_node,
-                                                               RREdgeId from_edge,
                                                                RRNodeId target_node) {
     /* new_costs.backward_cost: is the "known" part of the cost to this node -- the
      * congestion cost of all the routing resources back to the existing route
@@ -713,8 +702,8 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
      * new_costs.R_upstream: is the upstream resistance at the end of this node
      */
 
-    //Info for the switch connecting from_node to_node
-    int iswitch = rr_nodes_.edge_switch(from_edge);
+    //Info for the switch connecting from_node to_node (i.e., to->index)
+    int iswitch = rr_nodes_.edge_switch(to->prev_edge);
     bool switch_buffered = rr_switch_inf_[iswitch].buffered();
     bool reached_configurably = rr_switch_inf_[iswitch].configurable();
     float switch_R = rr_switch_inf_[iswitch].R;
@@ -722,7 +711,7 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
     float switch_Cinternal = rr_switch_inf_[iswitch].Cinternal;
 
     //To node info
-    auto rc_index = rr_graph_->node_rc_index(to_node);
+    auto rc_index = rr_graph_->node_rc_index(to->index);
     float node_C = rr_rc_data_[rc_index].C;
     float node_R = rr_rc_data_[rc_index].R;
 
@@ -761,12 +750,12 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
 
     float cong_cost = 0.;
     if (reached_configurably) {
-        cong_cost = get_rr_cong_cost(to_node, cost_params.pres_fac);
+        cong_cost = get_rr_cong_cost(to->index, cost_params.pres_fac);
     } else {
         //Reached by a non-configurable edge.
         //Therefore the from_node and to_node are part of the same non-configurable node set.
 #ifdef VTR_ASSERT_SAFE_ENABLED
-        VTR_ASSERT_SAFE_MSG(same_non_config_node_set(from_node, to_node),
+        VTR_ASSERT_SAFE_MSG(same_non_config_node_set(from_node, to->index),
                             "Non-configurably connected edges should be part of the same node set");
 #endif
 
@@ -775,8 +764,8 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
         //cost.
         cong_cost = 0.;
     }
-    if (conn_params_->router_opt_choke_points_ && is_flat_ && rr_graph_->node_type(to_node) == IPIN) {
-        auto find_res = conn_params_->connection_choking_spots_.find(to_node);
+    if (conn_params_->router_opt_choke_points_ && is_flat_ && rr_graph_->node_type(to->index) == IPIN) {
+        auto find_res = conn_params_->connection_choking_spots_.find(to->index);
         if (find_res != conn_params_->connection_choking_spots_.end()) {
             cong_cost = cong_cost / pow(2, (float)find_res->second);
         }
@@ -788,7 +777,7 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
 
     if (cost_params.bend_cost != 0.) {
         t_rr_type from_type = rr_graph_->node_type(from_node);
-        t_rr_type to_type = rr_graph_->node_type(to_node);
+        t_rr_type to_type = rr_graph_->node_type(to->index);
         if ((from_type == CHANX && to_type == CHANY) || (from_type == CHANY && to_type == CHANX)) {
             to->backward_path_cost += cost_params.bend_cost; //Bend cost
         }
@@ -798,46 +787,23 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(t_heap* to,
 
     if (rcv_path_manager.is_enabled() && to->path_data != nullptr) {
         to->path_data->backward_delay += cost_params.criticality * Tdel;
-        to->path_data->backward_cong += (1. - cost_params.criticality) * get_rr_cong_cost(to_node, cost_params.pres_fac);
+        to->path_data->backward_cong += (1. - cost_params.criticality) * get_rr_cong_cost(to->index, cost_params.pres_fac);
 
-        total_cost = compute_node_cost_using_rcv(cost_params, to_node, target_node, to->path_data->backward_delay, to->path_data->backward_cong, to->R_upstream);
+        total_cost = compute_node_cost_using_rcv(cost_params, to->index, target_node, to->path_data->backward_delay, to->path_data->backward_cong, to->R_upstream);
     } else {
         const auto& device_ctx = g_vpr_ctx.device();
         //Update total cost
-        float expected_cost = router_lookahead_.get_expected_cost(to_node,
-                                                                  target_node,
-                                                                  cost_params,
-                                                                  to->R_upstream);
+        float expected_cost = router_lookahead_.get_expected_cost(to->index, target_node, cost_params, to->R_upstream);
         VTR_LOGV_DEBUG(router_debug_ && !std::isfinite(expected_cost),
-                       "        Lookahead from %s (%s) to %s (%s) is non-finite, expected_cost = %f, to->R_upstream = %f\n",
-                       rr_node_arch_name(to_node, is_flat_).c_str(),
-                       describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, to_node, is_flat_).c_str(),
-                       rr_node_arch_name(target_node, is_flat_).c_str(),
-                       describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, target_node, is_flat_).c_str(),
-                       expected_cost, to->R_upstream);
+                        "        Lookahead from %s (%s) to %s (%s) is non-finite, expected_cost = %f, to->R_upstream = %f\n",
+                        rr_node_arch_name(to->index, is_flat_).c_str(),
+                        describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, to->index, is_flat_).c_str(),
+                        rr_node_arch_name(target_node, is_flat_).c_str(),
+                        describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, target_node, is_flat_).c_str(),
+                        expected_cost, to->R_upstream);
         total_cost += to->backward_path_cost + cost_params.astar_fac * std::max(0.f, expected_cost - cost_params.astar_offset);
     }
-    to->cost = total_cost;
-}
-
-template<typename Heap>
-void ConnectionRouter<Heap>::empty_heap_annotating_node_route_inf() {
-    //Pop any remaining nodes in the heap and annotate their costs
-    //
-    //Useful for visualizing router expansion in graphics, as it shows
-    //the cost of all nodes considered by the router (e.g. nodes never
-    //expanded, such as parts of the initial route tree far from the
-    //target).
-    while (!heap_.is_empty_heap()) {
-        t_heap* tmp = heap_.get_heap_head();
-
-        rr_node_route_inf_[tmp->index].path_cost = tmp->cost;
-        rr_node_route_inf_[tmp->index].backward_path_cost = tmp->backward_path_cost;
-        modified_rr_node_inf_.push_back(tmp->index);
-
-        rcv_path_manager.free_path_struct(tmp->path_data);
-        heap_.free(tmp);
-    }
+    to->total_cost = total_cost;
 }
 
 //Adds the route tree rooted at rt_node to the heap, preparing it to be
@@ -914,14 +880,35 @@ void ConnectionRouter<Heap>::add_route_tree_node_to_heap(
                        tot_cost,
                        describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, inode, is_flat_).c_str());
 
-        push_back_node(&heap_, rr_node_route_inf_,
-                       inode, tot_cost, RREdgeId::INVALID(),
-                       backward_path_cost, R_upstream);
+        if (tot_cost > rr_node_route_inf_[inode].path_cost) {
+            return ;
+        }
+        add_to_mod_list(inode);
+        rr_node_route_inf_[inode].path_cost = tot_cost;
+        rr_node_route_inf_[inode].prev_edge = RREdgeId::INVALID();
+        rr_node_route_inf_[inode].backward_path_cost = backward_path_cost;
+        rr_node_route_inf_[inode].R_upstream = R_upstream;
+        heap_.push_back({tot_cost, inode});
+
+        // push_back_node(&heap_, rr_node_route_inf_,
+        //                inode, tot_cost, RREdgeId::INVALID(),
+        //                backward_path_cost, R_upstream);
     } else {
         float expected_total_cost = compute_node_cost_using_rcv(cost_params, inode, target_node, rt_node.Tdel, 0, R_upstream);
 
-        push_back_node_with_info(&heap_, inode, expected_total_cost,
-                                 backward_path_cost, R_upstream, rt_node.Tdel, &rcv_path_manager);
+        add_to_mod_list(inode);
+        rr_node_route_inf_[inode].path_cost = expected_total_cost;
+        rr_node_route_inf_[inode].prev_edge = RREdgeId::INVALID();
+        rr_node_route_inf_[inode].backward_path_cost = backward_path_cost;
+        rr_node_route_inf_[inode].R_upstream = R_upstream;
+
+        rcv_path_manager.alloc_path_struct(rcv_path_data[inode]);
+        rcv_path_data[inode]->backward_delay = rt_node.Tdel;
+
+        heap_.push_back({expected_total_cost, inode});
+
+        // push_back_node_with_info(&heap_, inode, expected_total_cost,
+        //                          backward_path_cost, R_upstream, rt_node.Tdel, &rcv_path_manager);
     }
 
     update_router_stats(router_stats_,
@@ -1144,16 +1131,6 @@ std::unique_ptr<ConnectionRouterInterface> make_connection_router(e_heap_type he
                 rr_switch_inf,
                 rr_node_route_inf,
                 is_flat);
-        case e_heap_type::BUCKET_HEAP_APPROXIMATION:
-            return std::make_unique<ConnectionRouter<Bucket>>(
-                grid,
-                router_lookahead,
-                rr_nodes,
-                rr_graph,
-                rr_rc_data,
-                rr_switch_inf,
-                rr_node_route_inf,
-                is_flat);
         default:
             VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap_type %d",
                             heap_type);
diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h
index 4118c5a7c7f..cee93384974 100644
--- a/vpr/src/route/connection_router.h
+++ b/vpr/src/route/connection_router.h
@@ -10,10 +10,9 @@
 #include "router_stats.h"
 #include "spatial_route_tree_lookup.h"
 
-// Prune the heap when it contains 4x the number of nodes in the RR graph.
-constexpr size_t kHeapPruneFactor = 4;
+#include "d_ary_heap.h"
 
-// This class encapsolates the timing driven connection router. This class
+// This class encapsulates the timing driven connection router. This class
 // routes from some initial set of sources (via the input rt tree) to a
 // particular sink.
 //
@@ -44,12 +43,17 @@ class ConnectionRouter : public ConnectionRouterInterface {
         , rr_node_route_inf_(rr_node_route_inf)
         , is_flat_(is_flat)
         , router_stats_(nullptr)
-        , router_debug_(false) {
+        , router_debug_(false)
+        , path_search_cumulative_time(0) {
         heap_.init_heap(grid);
-        heap_.set_prune_limit(rr_nodes_.size(), kHeapPruneFactor * rr_nodes_.size());
         only_opin_inter_layer = (grid.get_num_layers() > 1) && inter_layer_connections_limited_to_opin(*rr_graph);
     }
 
+    ~ConnectionRouter() {
+        VTR_LOG("Serial Connection Router is being destroyed. Time spent on path search: %.3f seconds.\n",
+                std::chrono::duration<float/*convert to seconds by default*/>(path_search_cumulative_time).count());
+    }
+
     // Clear's the modified list.  Should be called after reset_path_costs
     // have been called.
     void clear_modified_rr_node_info() final {
@@ -58,7 +62,14 @@ class ConnectionRouter : public ConnectionRouterInterface {
 
     // Reset modified data in rr_node_route_inf based on modified_rr_node_inf.
     void reset_path_costs() final {
+        // Reset the node info stored in rr_node_route_inf variable
         ::reset_path_costs(modified_rr_node_inf_);
+        // Reset the node info stored inside the connection router
+        if (rcv_path_manager.is_enabled()) {
+            for (const auto& node : modified_rr_node_inf_) {
+                rcv_path_data[node] = nullptr;
+            }
+        }
     }
 
     /** Finds a path from the route tree rooted at rt_root to sink_node.
@@ -68,8 +79,8 @@ class ConnectionRouter : public ConnectionRouterInterface {
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
      * bool: should retry with full bounding box? (only used in parallel routing)
-     * t_heap: heap element of cheapest path */
-    std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree(
+     * RTExploredNode: the explored sink node, from which the cheapest path can be found via back-tracing */
+    std::tuple<bool, bool, RTExploredNode> timing_driven_route_connection_from_route_tree(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
@@ -86,8 +97,8 @@ class ConnectionRouter : public ConnectionRouterInterface {
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
      * bool: should retry with full bounding box? (only used in parallel routing)
-     * t_heap: heap element of cheapest path */
-    std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree_high_fanout(
+     * RTExploredNode: the explored sink node, from which the cheapest path can be found via back-tracing */
+    std::tuple<bool, bool, RTExploredNode> timing_driven_route_connection_from_route_tree_high_fanout(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
@@ -105,7 +116,10 @@ class ConnectionRouter : public ConnectionRouterInterface {
     // Dijkstra's algorithm with a modified exit condition (runs until heap is
     // empty).  When using cost_params.astar_fac = 0, for efficiency the
     // RouterLookahead used should be the NoOpLookahead.
-    vtr::vector<RRNodeId, t_heap> timing_driven_find_all_shortest_paths_from_route_tree(
+    //
+    // Note: This routine is currently used only to generate information that
+    // may be helpful in debugging an architecture.
+    vtr::vector<RRNodeId, RTExploredNode> timing_driven_find_all_shortest_paths_from_route_tree(
         const RouteTreeNode& rt_root,
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box,
@@ -136,18 +150,24 @@ class ConnectionRouter : public ConnectionRouterInterface {
         }
     }
 
-    // Update the route path to the node pointed to by cheapest.
-    inline void update_cheapest(t_heap* cheapest) {
-        update_cheapest(cheapest, &rr_node_route_inf_[cheapest->index]);
-    }
-
-    inline void update_cheapest(t_heap* cheapest, t_rr_node_route_inf* route_inf) {
-        //Record final link to target
-        add_to_mod_list(cheapest->index);
-
-        route_inf->prev_edge = cheapest->prev_edge();
-        route_inf->path_cost = cheapest->cost;
-        route_inf->backward_path_cost = cheapest->backward_path_cost;
+    // Update the route path to the node `cheapest.index` via the path from
+    // `from_node` via `cheapest.prev_edge`.
+    inline void update_cheapest(RTExploredNode& cheapest, const RRNodeId& from_node) {
+        const RRNodeId& inode = cheapest.index;
+        add_to_mod_list(inode);
+        rr_node_route_inf_[inode].prev_edge = cheapest.prev_edge;
+        rr_node_route_inf_[inode].path_cost = cheapest.total_cost;
+        rr_node_route_inf_[inode].backward_path_cost = cheapest.backward_path_cost;
+
+        // Use the already created next path structure pointer when RCV is enabled
+        if (rcv_path_manager.is_enabled()) {
+            rcv_path_manager.move(rcv_path_data[inode], cheapest.path_data);
+
+            rcv_path_data[inode]->path_rr = rcv_path_data[from_node]->path_rr;
+            rcv_path_data[inode]->edge = rcv_path_data[from_node]->edge;
+            rcv_path_data[inode]->path_rr.push_back(from_node);
+            rcv_path_data[inode]->edge.push_back(cheapest.prev_edge);
+        }
     }
 
     /** Common logic from timing_driven_route_connection_from_route_tree and
@@ -157,9 +177,8 @@ class ConnectionRouter : public ConnectionRouterInterface {
      * @param[in] sink_node Sink node ID to route to
      * @param[in] cost_params
      * @param[in] bounding_box Keep search confined to this bounding box
-     * @return bool Signal to retry this connection with a full-device bounding box,
-     * @return t_heap* Heap element describing the path found. */
-    std::tuple<bool, t_heap*> timing_driven_route_connection_common_setup(
+     * @return bool Signal to retry this connection with a full-device bounding box */
+    bool timing_driven_route_connection_common_setup(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
@@ -168,20 +187,23 @@ class ConnectionRouter : public ConnectionRouterInterface {
     // Finds a path to sink_node, starting from the elements currently in the
     // heap.
     //
+    // If the path is not found, which means that the path_cost of sink_node in
+    // RR node route info has never been updated, `rr_node_route_inf_[sink_node]
+    // .path_cost` will be the initial value (i.e., float infinity). This case
+    // can be detected by `std::isinf(rr_node_route_inf_[sink_node].path_cost)`.
+    //
     // This is the core maze routing routine.
     //
     // Note: For understanding the connection router, start here.
-    //
-    // Returns either the last element of the path, or nullptr if no path is
-    // found
-    t_heap* timing_driven_route_connection_from_heap(
+    void timing_driven_route_connection_from_heap(
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box);
 
     // Expand this current node if it is a cheaper path.
     void timing_driven_expand_cheapest(
-        t_heap* cheapest,
+        RRNodeId from_node,
+        float new_total_cost,
         RRNodeId target_node,
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box,
@@ -189,20 +211,19 @@ class ConnectionRouter : public ConnectionRouterInterface {
 
     // Expand each neighbor of the current node.
     void timing_driven_expand_neighbours(
-        t_heap* current,
+        const RTExploredNode& current,
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box,
         RRNodeId target_node,
         const t_bb& target_bb);
 
-    // Conditionally adds to_node to the router heap (via path from from_node
+    // Conditionally adds to_node to the router heap (via path from current.index
     // via from_edge).
     //
     // RR nodes outside bounding box specified in bounding_box are not added
     // to the heap.
     void timing_driven_expand_neighbour(
-        t_heap* current,
-        RRNodeId from_node,
+        const RTExploredNode& current,
         RREdgeId from_edge,
         RRNodeId to_node,
         const t_conn_cost_params& cost_params,
@@ -214,28 +235,23 @@ class ConnectionRouter : public ConnectionRouterInterface {
     // non-configurable edges
     void timing_driven_add_to_heap(
         const t_conn_cost_params& cost_params,
-        const t_heap* current,
-        RRNodeId from_node,
+        const RTExploredNode& current,
         RRNodeId to_node,
         RREdgeId from_edge,
         RRNodeId target_node);
 
     // Calculates the cost of reaching to_node
     void evaluate_timing_driven_node_costs(
-        t_heap* to,
+        RTExploredNode* to,
         const t_conn_cost_params& cost_params,
         RRNodeId from_node,
-        RRNodeId to_node,
-        RREdgeId from_edge,
         RRNodeId target_node);
 
     // Find paths from current heap to all nodes in the RR graph
-    vtr::vector<RRNodeId, t_heap> timing_driven_find_all_shortest_paths_from_heap(
+    vtr::vector<RRNodeId, RTExploredNode> timing_driven_find_all_shortest_paths_from_heap(
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box);
 
-    void empty_heap_annotating_node_route_inf();
-
     //Adds the route tree rooted at rt_node to the heap, preparing it to be
     //used as branch-points for further routing.
     void add_route_tree_to_heap(const RouteTreeNode& rt_node,
@@ -286,8 +302,13 @@ class ConnectionRouter : public ConnectionRouterInterface {
 
     bool only_opin_inter_layer;
 
-    // The path manager for RCV, keeps track of the route tree as a set, also manages the allocation of the heap types
+    // Cumulative time spent in the path search part of the connection router.
+    std::chrono::microseconds path_search_cumulative_time;
+
+    // The path manager for RCV, keeps track of the route tree as a set, also
+    // manages the allocation of `rcv_path_data`.
     PathManager rcv_path_manager;
+    vtr::vector<RRNodeId, t_heap_path*> rcv_path_data;
 };
 
 /** Construct a connection router that uses the specified heap type.
diff --git a/vpr/src/route/connection_router_interface.h b/vpr/src/route/connection_router_interface.h
index b732e8f839e..62111edc285 100644
--- a/vpr/src/route/connection_router_interface.h
+++ b/vpr/src/route/connection_router_interface.h
@@ -52,8 +52,8 @@ class ConnectionRouterInterface {
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
      * bool: should retry with full bounding box?
-     * t_heap: heap element of cheapest path */
-    virtual std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree(
+     * RTExploredNode: the explored sink node, from which the cheapest path can be found via back-tracing */
+    virtual std::tuple<bool, bool, RTExploredNode> timing_driven_route_connection_from_route_tree(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
@@ -71,8 +71,8 @@ class ConnectionRouterInterface {
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
      * bool: should retry with full bounding box?
-     * t_heap: heap element of cheapest path */
-    virtual std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree_high_fanout(
+     * RTExploredNode: the explored sink node, from which the cheapest path can be found via back-tracing */
+    virtual std::tuple<bool, bool, RTExploredNode> timing_driven_route_connection_from_route_tree_high_fanout(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params& cost_params,
@@ -91,7 +91,10 @@ class ConnectionRouterInterface {
     // Dijkstra's algorithm with a modified exit condition (runs until heap is
     // empty).  When using cost_params.astar_fac = 0, for efficiency the
     // RouterLookahead used should be the NoOpLookahead.
-    virtual vtr::vector<RRNodeId, t_heap> timing_driven_find_all_shortest_paths_from_route_tree(
+    //
+    // Note: This routine is currently used only to generate information that
+    // may be helpful in debugging an architecture.
+    virtual vtr::vector<RRNodeId, RTExploredNode> timing_driven_find_all_shortest_paths_from_route_tree(
         const RouteTreeNode& rt_root,
         const t_conn_cost_params& cost_params,
         const t_bb& bounding_box,
diff --git a/vpr/src/route/d_ary_heap.h b/vpr/src/route/d_ary_heap.h
new file mode 100644
index 00000000000..5ac59f1eef2
--- /dev/null
+++ b/vpr/src/route/d_ary_heap.h
@@ -0,0 +1,74 @@
+#ifndef _VTR_D_ARY_HEAP_H
+#define _VTR_D_ARY_HEAP_H
+
+#include <vector>
+
+#include "device_grid.h"
+#include "heap_type.h"
+#include "d_ary_heap.tpp"
+
+/**
+ * @brief Min-heap with D child nodes per parent.
+ *
+ * @note
+ * Currently, DAryHeap only has two children, BinaryHeap and FourAryHeap. On small circuits,
+ * these heaps have negligible differences in runtime, but on larger heaps, runtime is lower when
+ * using FourAryHeap. On Koios large benchmarks, the runtime is ~5% better on FourAryHeap compared
+ * to BinaryHeap. This is likely because FourAryHeap has lower tree height, and as we can fit 8
+ * heap node (each is 8 bytes) on a cache line (commonly 64 bytes on modern architectures), each
+ * heap operation (the comparison among sibling nodes) tends to benefit from the caches.
+*/
+template<unsigned D>
+class DAryHeap : public HeapInterface {
+  public:
+    using priority_queue = customized_d_ary_priority_queue<D, HeapNode, std::vector<HeapNode>, HeapNodeComparator>;
+
+    DAryHeap() {}
+
+    void init_heap(const DeviceGrid& grid) {
+        size_t target_heap_size = (grid.width() - 1) * (grid.height() - 1);
+        pq_.reserve(target_heap_size); // reserve the memory for the heap structure
+    }
+
+    bool try_pop(HeapNode& heap_node) {
+        if (pq_.empty()) {
+            return false;
+        } else {
+            heap_node = pq_.top();
+            pq_.pop();
+            return true;
+        }
+    }
+
+    void add_to_heap(const HeapNode& heap_node) {
+        pq_.push(heap_node);
+    }
+
+    void push_back(const HeapNode& heap_node) {
+        pq_.push(heap_node); // FIXME: add to heap without maintaining the heap property
+    }
+
+    void build_heap() {
+        // FIXME: restore the heap property after pushing back nodes
+    }
+
+    bool is_valid() const {
+        return true; // FIXME: checking if the heap property is maintained or not
+    }
+
+    void empty_heap() {
+        pq_.clear();
+    }
+
+    bool is_empty_heap() const {
+        return (bool)(pq_.empty());
+    }
+
+  private:
+    priority_queue pq_;
+};
+
+using BinaryHeap = DAryHeap<2>;
+using FourAryHeap = DAryHeap<4>;
+
+#endif /* _VTR_D_ARY_HEAP_H */
diff --git a/vpr/src/route/d_ary_heap.tpp b/vpr/src/route/d_ary_heap.tpp
new file mode 100644
index 00000000000..565b8bac72b
--- /dev/null
+++ b/vpr/src/route/d_ary_heap.tpp
@@ -0,0 +1,162 @@
+#pragma once
+
+#include <algorithm>
+#include <vector>
+
+template<unsigned D, class T, class Container = std::vector<T>, class Compare = std::less<typename Container::value_type>>
+class customized_d_ary_priority_queue {
+    static_assert(D == 2 || D == 4, "Only support binary or 4-ary priority queue");
+
+  public:
+    typedef Container container_type;
+    typedef typename Container::value_type value_type;
+    typedef typename Container::size_type size_type;
+    typedef typename Container::reference reference;
+    typedef typename Container::const_reference const_reference;
+
+    Compare comp_;
+    /**
+     * @details
+     * heap_ is indexed from [1..heap_size]; the 0th element is unused. This simplifies arithmetic
+     * in first_child_index() and parent_index() functions.
+     *
+     * @todo
+     * If an 8-ary heap is implemented, experiment with starting at index 0
+     */
+    Container heap_;
+
+  private:
+    inline size_t parent_index(const size_t i) {
+        if constexpr (D == 2) {
+            return i >> 1;
+        } else {
+            return (i + 2) >> 2;
+        }
+    }
+
+    inline size_t first_child_index(const size_t i) {
+        if constexpr (D == 2) {
+            return i << 1;
+        } else {
+            return (i << 2) - 2;
+        }
+    }
+
+    inline size_t largest_child_index(const size_t first_child) {
+        if constexpr (D == 2) {
+            return first_child + !!comp_(heap_[first_child], heap_[first_child + 1]);
+        } else {
+            const size_t child_1 = first_child;
+            const size_t child_2 = child_1 + 1;
+            const size_t child_3 = child_1 + 2;
+            const size_t child_4 = child_1 + 3;
+            const size_t first_half_largest = child_1 + !!comp_(heap_[child_1], heap_[child_2]);
+            const size_t second_half_largest = child_3 + !!comp_(heap_[child_3], heap_[child_4]);
+            return comp_(heap_[first_half_largest], heap_[second_half_largest]) ? second_half_largest : first_half_largest;
+        }
+    }
+
+    inline size_t largest_child_index_partial(const size_t first_child, const size_t num_children /*must < `D`*/) {
+        if constexpr (D == 2) {
+            (void) num_children;
+            return first_child;
+        } else {
+            switch (num_children) {
+                case 3: {
+                    const size_t child_1 = first_child;
+                    const size_t child_2 = child_1 + 1;
+                    const size_t child_3 = child_1 + 2;
+                    const size_t first_two_children_largest = child_1 + !!comp_(heap_[child_1], heap_[child_2]);
+                    return comp_(heap_[first_two_children_largest], heap_[child_3]) ? child_3 : first_two_children_largest;
+                }
+                case 2: {
+                    return first_child + !!comp_(heap_[first_child], heap_[first_child + 1]);
+                }
+                default: {
+                    return first_child;
+                }
+            }
+        }
+    }
+
+    inline void pop_customized_heap() {
+        size_t length = heap_.size() - 1;
+        auto end = heap_.end();
+        auto value = std::move(end[-1]);
+        end[-1] = std::move(heap_[1]);
+        size_t index = 1;
+        for (;;) {
+            size_t first_child = first_child_index(index);
+            size_t last_child = first_child + (D - 1);
+            if (last_child < length) {
+                size_t largest_child = largest_child_index(first_child);
+                if (!comp_(value, heap_[largest_child])) {
+                    break;
+                }
+                heap_[index] = std::move(heap_[largest_child]);
+                index = largest_child;
+            } else if (first_child < length) {
+                size_t largest_child = largest_child_index_partial(first_child, length - first_child);
+                if (comp_(value, heap_[largest_child])) {
+                    heap_[index] = std::move(heap_[largest_child]);
+                    index = largest_child;
+                }
+                break;
+            } else {
+                break;
+            }
+        }
+        heap_[index] = std::move(value);
+    }
+
+    inline void push_customized_heap() {
+        auto value = std::move(heap_.back());
+        size_t index = heap_.size() - 1;
+        while (index > 1) {
+            size_t parent = parent_index(index);
+            if (!comp_(heap_[parent], value)) {
+                break;
+            }
+            heap_[index] = std::move(heap_[parent]);
+            index = parent;
+        }
+        heap_[index] = std::move(value);
+    }
+
+  public:
+    explicit customized_d_ary_priority_queue(const Compare& compare = Compare(),
+                                                const Container& cont = Container())
+        : comp_(compare)
+        , heap_(cont) {
+        heap_.resize(1); // FIXME: currently do not support `make_heap` from cont (heap_)
+    }
+
+    inline bool empty() const {
+        return heap_.size() == 1; // heap_[0] is invalid, heap is indexed from 1
+    }
+
+    inline size_type size() const {
+        return heap_.size() - 1; // heap_[0] is invalid, heap is indexed from 1
+    }
+
+    inline const_reference top() const { return heap_[1]; }
+
+    inline void pop() {
+        pop_customized_heap();
+        heap_.pop_back();
+    }
+
+    inline void push(const value_type& value) {
+        heap_.push_back(value);
+        push_customized_heap();
+    }
+
+    inline void push(value_type&& value) {
+        heap_.push_back(std::move(value));
+        push_customized_heap();
+    }
+
+    inline void clear() { heap_.resize(1); }
+
+    inline void reserve(size_type new_cap) { heap_.reserve(new_cap + 1); }
+};
diff --git a/vpr/src/route/four_ary_heap.cpp b/vpr/src/route/four_ary_heap.cpp
deleted file mode 100644
index e70ed389e9a..00000000000
--- a/vpr/src/route/four_ary_heap.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "four_ary_heap.h"
-#include "vtr_log.h"
-
-// The leftmost/smallest-index child of node i
-static inline size_t first_child(size_t i) { return (i << 2) - 2; }
-
-inline size_t FourAryHeap::parent(size_t i) const { return (i + 2) >> 2; }
-
-inline size_t FourAryHeap::smallest_child(size_t i) const {
-    // This function could be a simple loop to find the min cost child. However,
-    // using switch-case is 3% faster, which is worthwhile as this function is
-    // called very frequently.
-
-    const size_t child_1 = first_child(i);
-    const size_t child_2 = child_1 + 1;
-    const size_t child_3 = child_1 + 2;
-    const size_t child_4 = child_1 + 3;
-
-    size_t num_children = std::max(std::min(4, (int)heap_tail_ - (int)child_1), 0);
-
-    switch (num_children) {
-        case 4: {
-            size_t minA = (heap_[child_1].cost < heap_[child_2].cost) ? child_1 : child_2;
-            size_t minB = (heap_[child_3].cost < heap_[child_4].cost) ? child_3 : child_4;
-            return (heap_[minA].cost < heap_[minB].cost) ? minA : minB;
-        }
-        case 3: {
-            size_t minA = (heap_[child_1].cost < heap_[child_2].cost) ? child_1 : child_2;
-            return (heap_[minA].cost < heap_[child_3].cost) ? minA : child_3;
-        }
-        case 2:
-            return (heap_[child_1].cost < heap_[child_2].cost) ? child_1 : child_2;
-        default:
-            return child_1;
-    }
-}
-
-bool FourAryHeap::is_valid() const {
-    if (heap_.empty()) {
-        return false;
-    }
-
-    for (size_t i = 1; i <= parent(heap_tail_); ++i) {
-        size_t leftmost_child = first_child(i);
-
-        for (size_t j = 0; j < 4; ++j) {
-            if (leftmost_child + j >= heap_tail_)
-                break;
-            else if (heap_[leftmost_child + j].cost < heap_[i].cost)
-                return false;
-        }
-    }
-
-    return true;
-}
-
-t_heap* FourAryHeap::get_heap_head() {
-    /* Returns a pointer to the smallest element on the heap, or NULL if the     *
-     * heap is empty.  Invalid (index == OPEN) entries on the heap are never     *
-     * returned -- they are just skipped over.                                   */
-
-    t_heap* cheapest;
-    size_t hole, child;
-
-    do {
-        if (heap_tail_ == 1) { /* Empty heap. */
-            VTR_LOG_WARN("Empty heap occurred in get_heap_head.\n");
-            return (nullptr);
-        }
-
-        cheapest = heap_[1].elem_ptr;
-
-        hole = 1;
-        child = smallest_child(hole);
-
-        --heap_tail_;
-
-        while (child < heap_tail_) {
-            child = smallest_child(hole);
-
-            heap_[hole] = heap_[child];
-            hole = child;
-            child = first_child(hole);
-        }
-
-        sift_up(hole, heap_[heap_tail_]);
-    } while (!cheapest->index.is_valid()); /* Get another one if invalid entry. */
-
-    return (cheapest);
-}
-
-// make a heap rooted at index hole by **sifting down** in O(lgn) time
-void FourAryHeap::sift_down(size_t hole) {
-    heap_elem head{heap_[hole]};
-    size_t child{smallest_child(hole)};
-
-    while (child < heap_tail_) {
-        if (heap_[child].cost < head.cost) {
-            heap_[hole] = heap_[child];
-            hole = child;
-            child = smallest_child(hole);
-        } else
-            break;
-    }
-
-    heap_[hole] = head;
-}
\ No newline at end of file
diff --git a/vpr/src/route/four_ary_heap.h b/vpr/src/route/four_ary_heap.h
deleted file mode 100644
index 8dcb1d01b7d..00000000000
--- a/vpr/src/route/four_ary_heap.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef VTR_FOUR_ARY_HEAP_H
-#define VTR_FOUR_ARY_HEAP_H
-
-#include "k_ary_heap.h"
-#include <vector>
-
-/**
- * @brief Minheap with 4 child nodes per parent.
- *
- * @note
- * Currently, KAryHeap's two children are BinaryHeap and FourAryHeap. On small circuits, these
- * heaps have negligible differences in runtime, but on larger heaps, runtime is lower when
- * using FourAryHeap. On titan benchmarks, the runtime is ~1.8% better on FourAryHeap compared
- * to BinaryHeap. This is likely because FourAryHeap is more cache friendly, as we can fit 5
- * heap_elem on a cache line.
-*/
-class FourAryHeap : public KAryHeap {
-  public:
-    bool is_valid() const final;
-    t_heap* get_heap_head() final;
-
-  private:
-    void sift_down(size_t hole) final;
-    size_t parent(size_t i) const final;
-
-    /**
-     * @param i The parent node.
-     *
-     * @return The child node of i with the smallest cost. Returns the first (smallest index) child of i
-     * if i has no children.
-     */
-    size_t smallest_child(size_t i) const;
-};
-
-#endif //VTR_FOUR_ARY_HEAP_H
diff --git a/vpr/src/route/heap_type.cpp b/vpr/src/route/heap_type.cpp
index f9ee97dd657..d3cfec9c495 100644
--- a/vpr/src/route/heap_type.cpp
+++ b/vpr/src/route/heap_type.cpp
@@ -1,63 +1,7 @@
 #include "heap_type.h"
 
-#include "binary_heap.h"
-#include "four_ary_heap.h"
-#include "bucket.h"
-#include "rr_graph_fwd.h"
 #include "vpr_error.h"
-#include "vpr_types.h"
-
-HeapStorage::HeapStorage()
-    : heap_free_head_(nullptr)
-    , num_heap_allocated_(0) {}
-
-t_heap*
-HeapStorage::alloc() {
-    if (heap_free_head_ == nullptr) { /* No elements on the free list */
-        heap_free_head_ = vtr::chunk_new<t_heap>(&heap_ch_);
-    }
-
-    //Extract the head
-    t_heap* temp_ptr = heap_free_head_;
-    heap_free_head_ = heap_free_head_->next_heap_item();
-
-    num_heap_allocated_++;
-
-    //Reset
-    temp_ptr->set_next_heap_item(nullptr);
-    temp_ptr->cost = 0.;
-    temp_ptr->backward_path_cost = 0.;
-    temp_ptr->R_upstream = 0.;
-    temp_ptr->index = RRNodeId::INVALID();
-    temp_ptr->path_data = nullptr;
-    temp_ptr->set_prev_edge(RREdgeId::INVALID());
-    return (temp_ptr);
-}
-
-void HeapStorage::free(t_heap* hptr) {
-    hptr->set_next_heap_item(heap_free_head_);
-    heap_free_head_ = hptr;
-    num_heap_allocated_--;
-}
-
-void HeapStorage::free_all_memory() {
-    VTR_ASSERT(num_heap_allocated_ == 0);
-
-    if (heap_free_head_ != nullptr) {
-        t_heap* curr = heap_free_head_;
-        while (curr) {
-            t_heap* tmp = curr;
-            curr = curr->next_heap_item();
-
-            vtr::chunk_delete(tmp, &heap_ch_);
-        }
-
-        heap_free_head_ = nullptr;
-    }
-
-    /*free the memory chunks that were used by heap and linked f pointer */
-    free_chunk_memory(&heap_ch_);
-}
+#include "d_ary_heap.h"
 
 std::unique_ptr<HeapInterface> make_heap(e_heap_type heap_type) {
     switch (heap_type) {
@@ -65,8 +9,6 @@ std::unique_ptr<HeapInterface> make_heap(e_heap_type heap_type) {
             return std::make_unique<BinaryHeap>();
         case e_heap_type::FOUR_ARY_HEAP:
             return std::make_unique<FourAryHeap>();
-        case e_heap_type::BUCKET_HEAP_APPROXIMATION:
-            return std::make_unique<Bucket>();
         default:
             VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap_type %d", heap_type);
     }
diff --git a/vpr/src/route/heap_type.h b/vpr/src/route/heap_type.h
index e3dcb071c7d..dd722928bcc 100644
--- a/vpr/src/route/heap_type.h
+++ b/vpr/src/route/heap_type.h
@@ -1,6 +1,7 @@
 #ifndef _HEAP_TYPE_H
 #define _HEAP_TYPE_H
 
+#include <cstdint>
 #include "physical_types.h"
 #include "device_grid.h"
 #include "vtr_memory.h"
@@ -8,186 +9,81 @@
 #include "rr_graph_fwd.h"
 #include "route_path_manager.h"
 
+using HeapNodePriority = float;
+using HeapNodeId = RRNodeId;
+// Ensure that the heap node structure occupies only 64 bits to make the heap cache-friendly
+// and achieve high performance.
+static_assert(sizeof(RRNodeId) == sizeof(uint32_t));
+
 /**
  * @brief Used by the heap as its fundamental data structure. Each heap
- * element represents a partial route.
+ * node contains only the heap priority value (i.e., the cost of the RR node)
+ * and the index of the RR node. The size of each heap node is minimized to
+ * ensure that the heap is cache-friendly and to make the initialization and
+ * copying of heap nodes efficient.
  */
-struct t_heap {
-    ///@brief The cost used to sort heap. For the timing-driven router this is the backward_path_cost + expected cost to the target.
-    float cost = 0.;
-    ///@brief The "known" cost of the path up to and including this node. Used only by the timing-driven router. In this case, the
-    ///.cost member contains not only the known backward cost but also an expected cost to the target.
-    float backward_path_cost = 0.;
-    ///@brief Used only by the timing-driven router. Stores the upstream resistance to ground from this node, including the resistance
-    /// of the node itself (device_ctx.rr_nodes[index].R).
-    float R_upstream = 0.;
-    ///@brief The RR node index associated with the costs/R_upstream values.
-    RRNodeId index = RRNodeId::INVALID();
-    ///@brief Structure to handle extra RCV structures. Managed by PathManager class.
-    t_heap_path* path_data;
-
-    /**
-     * @brief Get the next t_heap item in the linked list.
-     */
-    t_heap* next_heap_item() const {
-        return u.next;
-    }
-
-    /**
-     * @brief Set the next t_heap item in the linked list.
-     */
-    void set_next_heap_item(t_heap* next) {
-        u.next = next;
-    }
-
-    /**
-     * @brief Get the edge from the previous node used to reach the current node.
-     *
-     * @note
-     * Be careful: will return 0 (a valid id!) if uninitialized.
-     */
-    constexpr RREdgeId prev_edge() const {
-        static_assert(sizeof(uint32_t) == sizeof(RREdgeId));
-        return RREdgeId(u.prev_edge);
-    }
-
-    /**
-     * @brief Set the edge from the previous node used to reach the current node..
-     */
-    inline void set_prev_edge(RREdgeId edge) {
-        static_assert(sizeof(uint32_t) == sizeof(RREdgeId));
-        u.prev_edge = size_t(edge);
-    }
-
-  private:
-    union {
-        ///@brief Pointer to the next t_heap structure in the free linked list.
-        t_heap* next = nullptr;
-
-        /**
-         * @brief The edge from the previous node used to reach the current. Not used when on the heap.
-         *
-         * @note
-         * The previous edge is not a StrongId for performance & brevity
-         * reasons: StrongIds can't be trivially placed into an anonymous
-         * union.
-         */
-        uint32_t prev_edge;
-    } u;
+struct HeapNode {
+    ///@brief The priority value or cost used to sort heap. For the timing-driven router
+    /// this is the total_cost (i.e., backward_path_cost + expected cost to the target).
+    HeapNodePriority prio;
+    ///@brief The RR node index associated with the cost.
+    HeapNodeId node;
 };
 
 /**
- * @brief t_heap object pool, useful for implementing heaps that conform to
- * HeapInterface.
+ * @brief The comparison function object used to sort heap, following the STL style.
  */
-class HeapStorage {
-  public:
-    HeapStorage();
-
-    /**
-     * @brief Allocate a heap item.
-     *
-     * @return The allocated item.
-     */
-    t_heap* alloc();
-
-    /**
-     * @brief Free a heap item.
-     */
-    void free(t_heap* hptr);
-
-    /**
-     * @brief Free all heap items.
-     */
-    void free_all_memory();
-
-  private:
-    /* For keeping track of the sudo malloc memory for the heap*/
-    vtr::t_chunk heap_ch_;
-
-    t_heap* heap_free_head_;
-    size_t num_heap_allocated_;
+struct HeapNodeComparator {
+    bool operator()(const HeapNode& u, const HeapNode& v) {
+        return u.prio > v.prio;
+    }
 };
 
 /**
  * @brief Interface to heap used for router optimization.
- *
- * @note
- * Objects used in instances of HeapInterface must always be allocated
- * and free'd using the HeapInterface::alloc and HeapInterface::free methods
- * of that instance. Object pools are likely in use.
- *
- * @details
- * As a general rule, any t_heap objects returned from this interface,
- * **must** be HeapInterface::free'd before destroying the HeapInterface
- * instance. This ensure that no leaks are present in the users of the heap.
- * Violating this assumption may result in an assertion violation.
  */
 class HeapInterface {
   public:
     virtual ~HeapInterface() {}
 
-    /**
-     * @brief Allocate a heap item.
-     *
-     * @details
-     * This transfers ownership of the t_heap object from HeapInterface to the
-     * caller.
-     */
-    virtual t_heap* alloc() = 0;
-
-    /**
-     * @brief Free a heap item.
-     *
-     * @details
-     * HeapInterface::free can be called on objects returned from either
-     * HeapInterface::alloc or HeapInterface::get_heap_head.
-     *
-     * @param hptr The element to free.
-     */
-    virtual void free(t_heap* hptr) = 0;
-
     /**
      * @brief Initializes heap storage based on the size of the device.
      *
      * @note
      * This method **must** be invoked at least once prior to the
      * following methods being called:<BR>
+     *  - try_pop<BR>
      *  - add_to_heap<BR>
      *  - push_back<BR>
-     *  - get_heap_head<BR>
-     *  - is_empty_heap<BR>
-     *  - empty_heap<BR>
      *  - build_heap<BR>
+     *  - empty_heap<BR>
+     *  - is_empty_heap<BR>
      *
      *  @param grid The FPGA device grid
      */
     virtual void init_heap(const DeviceGrid& grid) = 0;
 
     /**
-     * @brief Add t_heap to heap, preserving heap property.
-     *
-     * @details
-     * This transfers ownership of the t_heap object to HeapInterface from the
-     * called.
+     * @brief Pop the head (smallest element) of the heap. Return true if the pop
+     * succeeds; otherwise (if the heap is empty), return false.
      *
-     * @param hptr The element to add.
+     * @param heap_node The reference to a location to store the popped heap node.
      */
-    virtual void add_to_heap(t_heap* hptr) = 0;
+    virtual bool try_pop(HeapNode& heap_node) = 0;
 
     /**
-     * @brief Add t_heap to heap, however does not preserve heap property.
+     * @brief Add HeapNode to heap, preserving heap property.
      *
-     * @details
-     * This is useful if multiple t_heap's are being added in bulk. Once
-     * all t_heap's have been added, HeapInterface::build_heap can be invoked
-     * to restore the heap property in an efficient way.<BR><BR>
-     * This transfers ownership of the t_heap object to HeapInterface from the
-     * called.
+     * @param heap_node The element to add.
+     */
+    virtual void add_to_heap(const HeapNode& heap_node) = 0;
+
+    /**
+     * @brief Add HeapNode to heap, however does not preserve heap property.
      *
      * @param hptr The element to insert.
      */
-    virtual void push_back(t_heap* const hptr) = 0;
+    virtual void push_back(const HeapNode& heap_node) = 0;
 
     /**
      * @brief Restore the heap property.
@@ -198,20 +94,6 @@ class HeapInterface {
      */
     virtual void build_heap() = 0;
 
-    /**
-     * @brief Pop the head (smallest element) of the heap, and return it.
-     *
-     * @details
-     * This transfers ownership of the t_heap object from HeapInterface to the
-     * caller.
-     */
-    virtual t_heap* get_heap_head() = 0;
-
-    /**
-     * @brief Is the heap empty?
-     */
-    virtual bool is_empty_heap() const = 0;
-
     /**
      * @brief Is the heap valid?
      */
@@ -223,50 +105,15 @@ class HeapInterface {
     virtual void empty_heap() = 0;
 
     /**
-     * @brief Free all storage used by the heap.
-     *
-     * @details
-     * This returns all memory allocated by the HeapInterface instance. Only
-     * call this if the heap is no longer being used.
-     *
-     * @note
-     * Only invoke this method if all objects returned from this
-     * HeapInterface instance have been free'd.
-     */
-    virtual void free_all_memory() = 0;
-
-    /**
-     * @brief Set maximum number of elements that the heap should contain
-     * (the prune_limit). If the prune limit is hit, then the heap should
-     * kick out duplicate index entries.
-     *
-     * @details
-     * The prune limit exists to provide a maximum bound on memory usage in
-     * the heap. In some pathological cases, the router may explore
-     * incrementally better paths, resulting in many duplicate entries for
-     * RR nodes. To handle this edge case, if the number of heap items
-     * exceeds the prune_limit, then the heap will compacts itself.<BR><BR>
-     * The heap compaction process simply means taking the lowest cost entry
-     * for each index (e.g. RR node).  All nodes with higher costs can safely
-     * be dropped.<BR><BR>
-     * The pruning process is intended to bound the memory usage the heap can
-     * consume based on the prune_limit, which is expected to be a function of
-     * the graph size.
-     *
-     * @param max_index The highest index possible in the heap.
-     * @param prune_limit The maximum number of heap entries before pruning should
-     * take place. This should always be higher than max_index, likely by a
-     * significant amount. The pruning process has some overhead, so prune_limit
-     * should be ~2-4x the max_index to prevent excess pruning when not required.
+     * @brief Is the heap empty?
      */
-    virtual void set_prune_limit(size_t max_index, size_t prune_limit) = 0;
+    virtual bool is_empty_heap() const = 0;
 };
 
 enum class e_heap_type {
     INVALID_HEAP = 0,
     BINARY_HEAP,
     FOUR_ARY_HEAP,
-    BUCKET_HEAP_APPROXIMATION,
 };
 
 /**
diff --git a/vpr/src/route/k_ary_heap.cpp b/vpr/src/route/k_ary_heap.cpp
deleted file mode 100644
index f7dc7b8093c..00000000000
--- a/vpr/src/route/k_ary_heap.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-#include "k_ary_heap.h"
-#include "rr_graph_fwd.h"
-#include "vtr_log.h"
-
-KAryHeap::KAryHeap()
-    : heap_()
-    , heap_size_(0)
-    , heap_tail_(0)
-    , max_index_(std::numeric_limits<size_t>::max())
-    , prune_limit_(std::numeric_limits<size_t>::max()) {}
-
-KAryHeap::~KAryHeap() {
-    free_all_memory();
-}
-
-t_heap* KAryHeap::alloc() {
-    return storage_.alloc();
-}
-void KAryHeap::free(t_heap* hptr) {
-    storage_.free(hptr);
-}
-
-void KAryHeap::init_heap(const DeviceGrid& grid) {
-    size_t target_heap_size = (grid.width() - 1) * (grid.height() - 1);
-    if (heap_.empty() || heap_size_ < target_heap_size) {
-        if (!heap_.empty()) {
-            // coverity[offset_free : Intentional]
-            heap_.clear();
-        }
-        heap_size_ = (grid.width() - 1) * (grid.height() - 1);
-        heap_.resize(heap_size_ + 1); /* heap_size_ + 1 because heap stores from [1..heap_size] */
-    }
-    heap_tail_ = 1;
-}
-
-void KAryHeap::add_to_heap(t_heap* hptr) {
-    expand_heap_if_full();
-    // start with undefined hole
-    ++heap_tail_;
-    heap_elem new_elem = {hptr, hptr->cost};
-    sift_up(heap_tail_ - 1, new_elem);
-
-    // If we have pruned, rebuild the heap now.
-    if (check_prune_limit()) {
-        build_heap();
-    }
-}
-
-bool KAryHeap::is_empty_heap() const {
-    return (bool)(heap_tail_ == 1);
-}
-
-void KAryHeap::empty_heap() {
-    for (size_t i = 1; i < heap_tail_; i++)
-        free(heap_[i].elem_ptr);
-
-    heap_tail_ = 1;
-}
-
-size_t KAryHeap::size() const { return heap_tail_ - 1; } // heap[0] is not valid element
-
-// runs in O(n) time by sifting down; the least work is done on the most elements: 1 swap for bottom layer, 2 swap for 2nd, ... lgn swap for top
-// 1*(n/k^1) + 2*(n/k^2) + 3*(n/k^3) + ... + lgn*1 = k*n (sum of i/k^i)
-void KAryHeap::build_heap() {
-    for (size_t i = parent(heap_tail_); i != 0; --i)
-        sift_down(i);
-}
-
-void KAryHeap::set_prune_limit(size_t max_index, size_t prune_limit) {
-    if (prune_limit != std::numeric_limits<size_t>::max()) {
-        VTR_ASSERT(max_index < prune_limit);
-    }
-    max_index_ = max_index;
-    prune_limit_ = prune_limit;
-}
-
-void KAryHeap::sift_up(size_t leaf, heap_elem const& node) {
-    while ((leaf > 1) && (node.cost < heap_[parent(leaf)].cost)) {
-        // sift hole up
-        heap_[leaf] = heap_[parent(leaf)];
-        leaf = parent(leaf);
-    }
-
-    heap_[leaf] = node;
-}
-
-void KAryHeap::expand_heap_if_full() {
-    if (heap_tail_ >= heap_size_) { /* Heap is full */
-        heap_size_ *= 2;
-        heap_.resize(heap_size_ + 1);
-    }
-}
-
-// adds an element to the back of heap and expand if necessary, but does not maintain heap property
-void KAryHeap::push_back(t_heap* const hptr) {
-    expand_heap_if_full();
-
-    heap_elem new_elem = {hptr, hptr->cost};
-    heap_[heap_tail_] = new_elem;
-    ++heap_tail_;
-
-    check_prune_limit();
-}
-
-void KAryHeap::free_all_memory() {
-    if (!heap_.empty()) {
-        empty_heap();
-        // coverity[offset_free : Intentional]
-        heap_.clear();
-    }
-
-    //  heap_ = nullptr; /* Defensive coding:  crash hard if I use these. */
-    storage_.free_all_memory();
-}
-
-bool KAryHeap::check_prune_limit() {
-    if (heap_tail_ > prune_limit_) {
-        prune_heap();
-        return true;
-    }
-
-    return false;
-}
-
-void KAryHeap::prune_heap() {
-    VTR_ASSERT(max_index_ < prune_limit_);
-
-    heap_elem blank_elem = {nullptr, 0.0};
-    std::vector<heap_elem> best_heap_item(max_index_, blank_elem);
-
-    // Find the cheapest instance of each index and store it.
-    for (size_t i = 1; i < heap_tail_; i++) {
-        if (heap_[i].elem_ptr == nullptr) {
-            continue;
-        }
-
-        if (!heap_[i].elem_ptr->index.is_valid()) {
-            free(heap_[i].elem_ptr);
-            heap_[i].elem_ptr = nullptr;
-            continue;
-        }
-
-        auto idx = size_t(heap_[i].elem_ptr->index);
-
-        VTR_ASSERT(idx < max_index_);
-
-        if (best_heap_item[idx].elem_ptr == nullptr || best_heap_item[idx].cost > heap_[i].cost) {
-            best_heap_item[idx] = heap_[i];
-        }
-    }
-
-    // Free unused nodes.
-    for (size_t i = 1; i < heap_tail_; i++) {
-        if (heap_[i].elem_ptr == nullptr) {
-            continue;
-        }
-
-        auto idx = size_t(heap_[i].elem_ptr->index);
-
-        if (best_heap_item[idx].elem_ptr != heap_[i].elem_ptr) {
-            free(heap_[i].elem_ptr);
-            heap_[i].elem_ptr = nullptr;
-        }
-    }
-
-    heap_tail_ = 1;
-
-    for (size_t i = 0; i < max_index_; ++i) {
-        if (best_heap_item[i].elem_ptr != nullptr) {
-            heap_[heap_tail_++] = best_heap_item[i];
-        }
-    }
-}
diff --git a/vpr/src/route/k_ary_heap.h b/vpr/src/route/k_ary_heap.h
deleted file mode 100644
index fb0e8763fdf..00000000000
--- a/vpr/src/route/k_ary_heap.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef VTR_K_ARY_HEAP_H
-#define VTR_K_ARY_HEAP_H
-
-#include "heap_type.h"
-#include <vector>
-
-/**
- * @brief Abstract class whose children are HeapInterface implementations of a k-ary minheap.
- */
-class KAryHeap : public HeapInterface {
-  public:
-    KAryHeap();
-    ~KAryHeap();
-
-    t_heap* alloc() final;
-    void free(t_heap* hptr) final;
-
-    void init_heap(const DeviceGrid& grid) final;
-    void add_to_heap(t_heap* hptr) final;
-    void push_back(t_heap* const hptr) final;
-    bool is_empty_heap() const final;
-    void empty_heap() final;
-    void build_heap() final;
-    void set_prune_limit(size_t max_index, size_t prune_limit) final;
-    void free_all_memory() final;
-
-    virtual bool is_valid() const = 0;
-    virtual t_heap* get_heap_head() = 0;
-
-  protected:
-    /**
-     * @brief The struct which the heap_ vector contains.
-     *
-     * @details
-     * Previously, heap_ was made of only t_heap pointers. This meant that
-     * all comparisons required dereferencing to attain the element's cost.
-     * Now, the cost is attained by dereferencing only once in add_to_heap().
-     * This resulted in a slightly larger memory footprint but a ~1.4% runtime
-     * improvement.
-     *
-     * @param elem_ptr A pointer to the t_heap struct which contains all
-     * the node's information.
-     * @param cost The cost of the node.
-     *
-     * @todo
-     * We are currently storing the node cost in two places (in elem_ptr->cost and cost). This might be fixed in two ways:<BR>
-     * 1. Don't store the cost in t_heap.<BR>
-     * 2. Instead of using pointers, use a 32-bit ID. If we do this, we can create a new 8-ary heap, which is likely to be even
-     * faster as we can fit more heap_elem on one cache line (currently, we can fit 5 as heap_elem is 12 bytes), even with more
-     * comparisons.
-     */
-    struct heap_elem {
-        t_heap* elem_ptr;
-        float cost;
-    };
-
-    /**
-     * @return The number of elements in the heap.
-     */
-    size_t size() const;
-
-    /**
-     * @brief Sift node up until it satisfies minheap property.
-     *
-     * @details
-     * O(lgn) sifting up to maintain heap property after insertion (should sift
-     * own when building heap)
-     *
-     * @param leaf The heap leaf where node currently resides.
-     * @param node The node to be sifted up.
-     */
-    void sift_up(size_t leaf, heap_elem const& node);
-
-    /**
-     * @brief Expands heap by 2 times if it is full.
-     */
-    void expand_heap_if_full();
-
-    /**
-     * @brief If the size of the heap is greater than the prune limit, prune the heap.
-     *
-     * @return Whether the heap was pruned.
-     */
-    bool check_prune_limit();
-
-    /**
-     * @brief Prune the heap.
-     */
-    void prune_heap();
-
-    /**
-     * @brief Make a heap rooted at index hole by **sifting down** in O(lgn) time
-     *
-     * @param hole
-     */
-    virtual void sift_down(size_t hole) = 0;
-
-    /**
-     * @param i Heap child node.
-     *
-     * @return Heap parent node.
-     */
-    virtual size_t parent(size_t i) const = 0;
-
-    HeapStorage storage_;
-
-    /**
-     * @details
-     * heap_ is indexed from [1..heap_size]; the 0th element is unused. For BinaryHeap, this simplifies
-     * arithmetic in left() and parent() functions. Using a heap beginning at index 0 would simplify
-     * first_child() and parent() functions in FourAryHeap, but this does not improve runtime.
-     *
-     * @todo
-     * If an 8-ary heap is implemented, experiment with starting at index 0
-     */
-    std::vector<heap_elem> heap_;
-
-    size_t heap_size_; /* Number of slots in the heap array */
-    size_t heap_tail_; /* Index of first unused slot in the heap array */
-
-    size_t max_index_;
-    size_t prune_limit_;
-};
-
-#endif // VTR_K_ARY_HEAP_H
diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h
index 448e4f7f76c..d5f5354a392 100644
--- a/vpr/src/route/netlist_routers.h
+++ b/vpr/src/route/netlist_routers.h
@@ -16,9 +16,6 @@
  * of this interface. */
 
 #include "NetPinTimingInvalidator.h"
-#include "binary_heap.h"
-#include "four_ary_heap.h"
-#include "bucket.h"
 #include "clustered_netlist_utils.h"
 #include "connection_based_routing_fwd.h"
 #include "connection_router.h"
@@ -182,20 +179,6 @@ inline std::unique_ptr<NetlistRouter> make_netlist_router(
             routing_predictor,
             choking_spots,
             is_flat);
-    } else if (router_opts.router_heap == e_heap_type::BUCKET_HEAP_APPROXIMATION) {
-        return make_netlist_router_with_heap<Bucket>(
-            net_list,
-            router_lookahead,
-            router_opts,
-            connections_inf,
-            net_delay,
-            netlist_pin_lookup,
-            timing_info,
-            pin_timing_invalidator,
-            budgeting_inf,
-            routing_predictor,
-            choking_spots,
-            is_flat);
     } else {
         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap);
     }
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index fe9db221ae9..7fd9720e450 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -291,6 +291,9 @@ void reset_path_costs(const std::vector<RRNodeId>& visited_rr_nodes) {
         route_ctx.rr_node_route_inf[node].path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[node].backward_path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[node].prev_edge = RREdgeId::INVALID();
+        // Note: R_upstream of each node is intentionally not reset here.
+        // For the reasons and details, please refer to the `Update R_upstream`
+        // in `evaluate_timing_driven_node_costs` in `connection_router.cpp`.
     }
 }
 
@@ -781,7 +784,7 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f
     int num_local_opin, iconn, num_edges;
     int iclass, ipin;
     float cost;
-    t_heap* heap_head_ptr;
+    HeapNode heap_head_node;
     t_physical_tile_type_ptr type;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -839,22 +842,21 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f
 
                 //Add the OPIN to the heap according to it's congestion cost
                 cost = get_rr_cong_cost(to_node, pres_fac);
-                add_node_to_heap(heap, route_ctx.rr_node_route_inf,
-                                 to_node, cost, RREdgeId::INVALID(),
-                                 0., 0.);
+                if (cost < route_ctx.rr_node_route_inf[to_node].path_cost) {
+                    heap->add_to_heap({cost, to_node});
+                }
             }
 
             for (ipin = 0; ipin < num_local_opin; ipin++) {
                 //Pop the nodes off the heap. We get them from the heap so we
                 //reserve those pins with lowest congestion cost first.
-                heap_head_ptr = heap->get_heap_head();
-                RRNodeId inode(heap_head_ptr->index);
+                VTR_ASSERT(heap->try_pop(heap_head_node));
+                const RRNodeId& inode = heap_head_node.node;
 
                 VTR_ASSERT(rr_graph.node_type(inode) == OPIN);
 
                 adjust_one_rr_occ_and_acc_cost(inode, 1, acc_fac);
                 route_ctx.clb_opins_used_locally[blk_id][iclass][ipin] = inode;
-                heap->free(heap_head_ptr);
             }
 
             heap->empty_heap();
diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h
index a6f18f3af38..1d6bfb58082 100644
--- a/vpr/src/route/route_common.h
+++ b/vpr/src/route/route_common.h
@@ -144,98 +144,3 @@ float get_cost_from_lookahead(const RouterLookahead& router_lookahead,
                               float R_upstream,
                               const t_conn_cost_params cost_params,
                               bool is_flat);
-
-/* Creates a new t_heap object to be placed on the heap, if the new cost    *
- * given is lower than the current path_cost to this channel segment.  The  *
- * index of its predecessor is stored to make traceback easy.  The index of *
- * the edge used to get from its predecessor to it is also stored to make   *
- * timing analysis, etc.                                                    *
- *                                                                          *
- * Returns t_heap suitable for adding to heap or nullptr if node is more    *
- * expensive than previously explored path.                                 */
-template<typename T, typename RouteInf>
-t_heap* prepare_to_add_node_to_heap(
-    T* heap,
-    const RouteInf& rr_node_route_inf,
-    RRNodeId inode,
-    float total_cost,
-    RREdgeId prev_edge,
-    float backward_path_cost,
-    float R_upstream) {
-    if (total_cost >= rr_node_route_inf[inode].path_cost)
-        return nullptr;
-
-    t_heap* hptr = heap->alloc();
-
-    hptr->index = inode;
-    hptr->cost = total_cost;
-    hptr->set_prev_edge(prev_edge);
-    hptr->backward_path_cost = backward_path_cost;
-    hptr->R_upstream = R_upstream;
-    return hptr;
-}
-
-/* Puts an rr_node on the heap if it is the cheapest path.    */
-template<typename T, typename RouteInf>
-void add_node_to_heap(
-    T* heap,
-    const RouteInf& rr_node_route_inf,
-    RRNodeId inode,
-    float total_cost,
-    RREdgeId prev_edge,
-    float backward_path_cost,
-    float R_upstream) {
-    t_heap* hptr = prepare_to_add_node_to_heap(
-        heap,
-        rr_node_route_inf, inode, total_cost,
-        prev_edge, backward_path_cost, R_upstream);
-    if (hptr) {
-        heap->add_to_heap(hptr);
-    }
-}
-
-/* Puts an rr_node on the heap with the same condition as add_node_to_heap,
- * but do not fix heap property yet as that is more efficiently done from
- * bottom up with build_heap    */
-template<typename T, typename RouteInf>
-void push_back_node(
-    T* heap,
-    const RouteInf& rr_node_route_inf,
-    RRNodeId inode,
-    float total_cost,
-    RREdgeId prev_edge,
-    float backward_path_cost,
-    float R_upstream) {
-    t_heap* hptr = prepare_to_add_node_to_heap(
-        heap,
-        rr_node_route_inf, inode, total_cost, prev_edge,
-        backward_path_cost, R_upstream);
-    if (hptr) {
-        heap->push_back(hptr);
-    }
-}
-
-/* Puts an rr_node on the heap with the same condition as node_to_heap,
- * but do not fix heap property yet as that is more efficiently done from
- * bottom up with build_heap. Certain information is also added     */
-template<typename T>
-void push_back_node_with_info(
-    T* heap,
-    RRNodeId inode,
-    float total_cost,
-    float backward_path_cost,
-    float R_upstream,
-    float backward_path_delay,
-    PathManager* rcv_path_manager) {
-    t_heap* hptr = heap->alloc();
-    rcv_path_manager->alloc_path_struct(hptr->path_data);
-
-    hptr->index = inode;
-    hptr->cost = total_cost;
-    hptr->backward_path_cost = backward_path_cost;
-    hptr->R_upstream = R_upstream;
-
-    hptr->path_data->backward_delay = backward_path_delay;
-
-    heap->push_back(hptr);
-}
diff --git a/vpr/src/route/route_net.tpp b/vpr/src/route/route_net.tpp
index 7004dbb4d5f..0e8c4c268a5 100644
--- a/vpr/src/route/route_net.tpp
+++ b/vpr/src/route/route_net.tpp
@@ -313,7 +313,7 @@ inline NetResultFlags pre_route_to_clock_root(ConnectionRouter& router,
     router.clear_modified_rr_node_info();
 
     bool found_path, retry_with_full_bb;
-    t_heap cheapest;
+    RTExploredNode cheapest;
     ConnectionParameters conn_params(net_id,
                                      -1,
                                      false,
@@ -428,7 +428,7 @@ inline NetResultFlags route_sink(ConnectionRouter& router,
     router.clear_modified_rr_node_info();
 
     bool found_path;
-    t_heap cheapest;
+    RTExploredNode cheapest;
 
     bool net_is_global = net_list.net_is_global(net_id);
     bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold);
@@ -487,8 +487,8 @@ inline NetResultFlags route_sink(ConnectionRouter& router,
         update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr);
     }
 
-    if (budgeting_inf.if_set() && cheapest.path_data != nullptr && cost_params.delay_budget) {
-        if (cheapest.path_data->backward_delay < cost_params.delay_budget->min_delay) {
+    if (budgeting_inf.if_set() && cheapest.rcv_path_backward_delay != std::numeric_limits<float>::infinity() && cost_params.delay_budget) {
+        if (cheapest.rcv_path_backward_delay < cost_params.delay_budget->min_delay) {
             budgeting_inf.set_should_reroute(net_id, true);
         }
     }
diff --git a/vpr/src/route/route_path_manager.h b/vpr/src/route/route_path_manager.h
index c3f69980b67..f1673772193 100644
--- a/vpr/src/route/route_path_manager.h
+++ b/vpr/src/route/route_path_manager.h
@@ -6,19 +6,19 @@
 #include <vector>
 
 #ifndef _PATH_MANAGER_H
-#    define _PATH_MANAGER_H
+#define _PATH_MANAGER_H
 
-/* Extra path data needed by RCV, seperated from t_heap struct for performance reasons
+/* Extra path data needed by RCV, separated from RTExploredNode struct for performance reasons
  * Can be accessed by a pointer, won't be initialized unless by RCV
  * Use PathManager class to handle this structure's allocation and deallocation
  *
  * path_rr: The entire partial path up until the route tree with the first node being the SOURCE,
- *          or a part of the route tree that already exists for this net 
- * 
+ *          or a part of the route tree that already exists for this net
+ *
  * edge: A list of edges from each node in the partial path to reach the next node
- * 
+ *
  * backward_delay: The delay of the partial path plus the path from route tree to source
- * 
+ *
  * backward_cong: The congestion estimate of the partial path plus the path from route tree to source */
 struct t_heap_path {
     std::vector<RRNodeId> path_rr;
@@ -33,24 +33,24 @@ struct RoutingContext;
 /* A class to manage the extra data required for RCV
  * It manages a set containing all the nodes that currently exist in the route tree
  * This class also manages the extra memory allocation required for the t_heap_path structure
- * 
+ *
  * When RCV is enabled, the router will not always be looking for minimal cost routing
  * This means nodes that already exist in the current path, or current route tree could be expanded twice.
  * This would result in electrically illegal loops (example below)
- * 
+ *
  * OPIN--|----|             |-----------Sink 1
  *       |    |--------X----|     <--- The branch intersects with a previous routing
  *       |             |
  *       |-------------|                              Sink 2
- * 
+ *
  * To stop this, we keep track of the route tree (route_tree_nodes_), and each node keeps track of it's current partial routing up to the route tree
  * Before expanding a node, we check to see if it exists in either the route tree, or the current partial path to eliminate these scenarios
- * 
- * 
- * The t_heap_path structure was created to isolate the RCV specific data from the t_heap struct
- * Having these in t_heap creates significant performance issues when RCV is disabled
- * A t_heap_path pointer is instead stored in t_heap, which is selectively allocated only when RCV is enabled
- * 
+ *
+ *
+ * The t_heap_path structure was created to isolate the RCV specific data from the RTExploredNode struct
+ * Having these in RTExploredNode creates significant performance issues when RCV is disabled
+ * A t_heap_path pointer is instead stored in RTExploredNode, which is selectively allocated only when RCV is enabled
+ *
  * If the _is_enabled flag is true, alloc_path_struct allocates t_heap_path structures, otherwise will be a NOOP */
 class PathManager {
   public:
@@ -92,7 +92,6 @@ class PathManager {
 
     // Put all currently allocated structures into the free_nodes list
     // This currently does NOT invalidate them
-    // Ideally used before a t_heap empty_heap() call
     void empty_heap();
 
     // Clear the route tree nodes set, before moving onto the next net
diff --git a/vpr/src/route/route_tree.cpp b/vpr/src/route/route_tree.cpp
index daf21bd1eb8..799fa185fbd 100644
--- a/vpr/src/route/route_tree.cpp
+++ b/vpr/src/route/route_tree.cpp
@@ -478,15 +478,15 @@ void RouteTree::print(void) const {
 
 /** Add the most recently finished wire segment to the routing tree, and
  * update the Tdel, etc. numbers for the rest of the routing tree. hptr
- * is the heap pointer of the SINK that was reached, and target_net_pin_index
+ * is the pointer of the SINK that was reached/explored, and target_net_pin_index
  * is the net pin index corresponding to the SINK that was reached. Usually target_net_pin_index
  * is a non-negative integer indicating the netlist connection being routed, but it can be OPEN (-1)
- * to indicate this is a routing path to a virtual sink which we use when routing to the source of 
- * dedicated clock networks. 
+ * to indicate this is a routing path to a virtual sink which we use when routing to the source of
+ * dedicated clock networks.
  * This routine returns a tuple: RouteTreeNode of the branch it adds to the route tree and
  * RouteTreeNode of the SINK it adds to the routing. */
 std::tuple<vtr::optional<const RouteTreeNode&>, vtr::optional<const RouteTreeNode&>>
-RouteTree::update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat) {
+RouteTree::update_from_heap(RTExploredNode* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat) {
     /* Lock the route tree for writing. At least on Linux this shouldn't have an impact on single-threaded code */
     std::unique_lock<std::mutex> write_lock(_write_mutex);
 
@@ -515,7 +515,7 @@ RouteTree::update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRoute
  * to the SINK indicated by hptr. Returns the first (most upstream) new rt_node,
  * and the rt_node of the new SINK. Traverses up from SINK  */
 std::tuple<vtr::optional<RouteTreeNode&>, vtr::optional<RouteTreeNode&>>
-RouteTree::add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is_flat) {
+RouteTree::add_subtree_from_heap(RTExploredNode* hptr, int target_net_pin_index, bool is_flat) {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
     auto& route_ctx = g_vpr_ctx.routing();
@@ -534,7 +534,7 @@ RouteTree::add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is
      * Here we create two vectors:
      * new_branch_inodes: [sink, nodeN-1, nodeN-2, ... node 1] of length N
      * and new_branch_iswitches: [N-1->sink, N-2->N-1, ... 2->1, 1->found_node] of length N */
-    RREdgeId edge = hptr->prev_edge();
+    RREdgeId edge = hptr->prev_edge;
     RRNodeId new_inode = rr_graph.edge_src_node(edge);
     RRSwitchId new_iswitch = RRSwitchId(rr_graph.rr_nodes().edge_switch(edge));
 
diff --git a/vpr/src/route/route_tree.h b/vpr/src/route/route_tree.h
index 4991d57f301..37e89db16ae 100644
--- a/vpr/src/route/route_tree.h
+++ b/vpr/src/route/route_tree.h
@@ -323,6 +323,44 @@ class RouteTreeNode {
 /** fwd definition for compatibility class in old_traceback.h */
 class TracebackCompat;
 
+/**
+ * @brief Each RTExploredNode element stores the node states for the connection router and represents a partial route.
+ *
+ * @note Only `index`, `prev_edge`, and `rcv_path_backward_delay` fields are used as the return value outside the connection router.
+ */
+class RTExploredNode {
+  public:
+    /* Used inside the connection router */
+
+    ///@brief The cost used to sort heap. For the timing-driven router this is the backward_path_cost
+    /// plus the expected cost to the target.
+    float total_cost = std::numeric_limits<float>::infinity();
+    ///@brief The "known" cost of the path up to and including this node.
+    float backward_path_cost = std::numeric_limits<float>::infinity();
+    ///@brief Stores the upstream resistance to ground from this node in the path search (connection
+    /// routing), including the resistance of the node itself (device_ctx.rr_nodes[index].R).
+    float R_upstream = std::numeric_limits<float>::infinity();
+    ///@brief Structure to handle extra RCV structures. Managed by PathManager class.
+    t_heap_path* path_data = nullptr;
+
+    /* Used outside the connection router as the return values (`index` and `prev_edge` are also used inside the router). */
+
+    ///@brief The RR node index associated with the costs/R_upstream values. Outside the
+    /// connection router, this field is mainly used in `RouteTree::update_from_heap` and
+    /// `RouteTree::add_subtree_from_heap`. Inside the connection router, this is used as
+    /// part of the node info passed as a parameter of some member functions.
+    RRNodeId index = RRNodeId::INVALID();
+    ///@brief The edge from the previous node used to reach the current. Same usage as the
+    /// `index` field described above.
+    RREdgeId prev_edge = RREdgeId::INVALID();
+    ///@brief The delay of the partial path plus the path from route tree to source.
+    /// Needed by RCV. Set to infinity if RCV is disabled. This field is used as part
+    /// of the return value of the route routine, derived from the `path_data` pointer
+    /// (but not using `path_data` for returning to avoid issues with dynamic memory
+    /// management).
+    float rcv_path_backward_delay = std::numeric_limits<float>::infinity();
+};
+
 /**
  * @brief Top level route tree used in timing analysis and keeping routing state.
  *
@@ -357,7 +395,7 @@ class RouteTree {
      * RouteTreeNode of the SINK it adds to the routing.
      * Locking operation: only one thread can update_from_heap() a RouteTree at a time. */
     std::tuple<vtr::optional<const RouteTreeNode&>, vtr::optional<const RouteTreeNode&>>
-    update_from_heap(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat);
+    update_from_heap(RTExploredNode* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup, bool is_flat);
 
     /** Reload timing values (R_upstream, C_downstream, Tdel).
      * Can take a RouteTreeNode& to do an incremental update.
@@ -491,7 +529,7 @@ class RouteTree {
 
   private:
     std::tuple<vtr::optional<RouteTreeNode&>, vtr::optional<RouteTreeNode&>>
-    add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is_flat);
+    add_subtree_from_heap(RTExploredNode* hptr, int target_net_pin_index, bool is_flat);
 
     void add_non_configurable_nodes(RouteTreeNode* rt_node,
                                     bool reached_by_non_configurable_edge,
diff --git a/vpr/src/route/route_tree_fwd.h b/vpr/src/route/route_tree_fwd.h
index 61b61ae739d..6f48247ef30 100644
--- a/vpr/src/route/route_tree_fwd.h
+++ b/vpr/src/route/route_tree_fwd.h
@@ -4,3 +4,4 @@
 
 class RouteTree;
 class RouteTreeNode;
+class RTExploredNode;
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index e37744ab70a..ae25d5cdf78 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -97,7 +97,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node,
     RouterStats router_stats;
 
     bool found_path;
-    t_heap cheapest;
+    RTExploredNode cheapest;
     ConnectionParameters conn_params(ParentNetId::INVALID(),
                                      -1,
                                      false,
@@ -186,7 +186,7 @@ vtr::vector<RRNodeId, float> calculate_all_path_delays_from_rr_node(RRNodeId src
         is_flat);
     RouterStats router_stats;
     ConnectionParameters conn_params(ParentNetId::INVALID(), OPEN, false, std::unordered_map<RRNodeId, int>());
-    vtr::vector<RRNodeId, t_heap> shortest_paths = router.timing_driven_find_all_shortest_paths_from_route_tree(tree.root(),
+    vtr::vector<RRNodeId, RTExploredNode> shortest_paths = router.timing_driven_find_all_shortest_paths_from_route_tree(tree.root(),
                                                                                                                 cost_params,
                                                                                                                 bounding_box,
                                                                                                                 router_stats,
diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index bda721e1a24..1d5ae1b21c1 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -2,8 +2,6 @@
 #define ROUTER_DELAY_PROFILING_H_
 
 #include "vpr_types.h"
-#include "binary_heap.h"
-#include "four_ary_heap.h"
 #include "connection_router.h"
 
 #include <vector>
diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp
index c2ac5329a26..568b2b175f7 100644
--- a/vpr/test/test_connection_router.cpp
+++ b/vpr/test/test_connection_router.cpp
@@ -67,7 +67,7 @@ static float do_one_route(RRNodeId source_node,
 
     // Find the cheapest route if possible.
     bool found_path;
-    t_heap cheapest;
+    RTExploredNode cheapest;
     ConnectionParameters conn_params(ParentNetId::INVALID(),
                                      -1,
                                      false,
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt
index 66168627a8f..fcf92ec7e8f 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_calc_method/config/golden_results.txt
@@ -1,5 +1,5 @@
-arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar	34.84	vpr	975.36 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g6a44da44e	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T20:37:10	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	998768	10	10	168	178	1	68	30	11	8	88	io	auto	952.5 MiB	0.50	358	812	97	660	55	975.4 MiB	0.07	0.00	6.44563	-69.2664	-6.44563	6.44563	3.31	0.000633306	0.000584828	0.014981	0.013961	26	784	31	0	0	125464.	1425.72	1.77	0.217747	0.184211	11500	28430	-1	625	17	282	1013	95514	35394	6.59221	6.59221	-74.0805	-6.59221	0	0	163463.	1857.53	0.03	0.07	0.09	-1	-1	0.03	0.0275927	0.0245705	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_astar	34.42	vpr	975.53 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g6a44da44e	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T20:37:10	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	998944	10	10	168	178	1	68	30	11	8	88	io	auto	952.6 MiB	0.50	365	812	101	651	60	975.5 MiB	0.10	0.00	6.37156	-69.5088	-6.37156	6.37156	3.32	0.000634379	0.000586337	0.015972	0.0149437	24	851	26	0	0	114778.	1304.29	1.37	0.179349	0.152804	11416	27150	-1	691	14	354	1388	135595	52969	6.82221	6.82221	-75.6812	-6.82221	0	0	153433.	1743.56	0.03	0.07	0.09	-1	-1	0.03	0.024931	0.0223273	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_dijkstra	35.84	vpr	975.38 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g6a44da44e	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T20:37:10	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	998788	10	10	168	178	1	68	30	11	8	88	io	auto	952.4 MiB	0.50	367	812	86	668	58	975.4 MiB	0.15	0.00	6.39336	-69.4912	-6.39336	6.39336	4.34	0.000639177	0.000587378	0.017224	0.0162017	22	875	22	0	0	110609.	1256.92	1.66	0.199683	0.169442	11258	24748	-1	730	18	335	1182	109582	46429	6.92426	6.92426	-76.9247	-6.92426	0	0	134428.	1527.59	0.02	0.07	0.09	-1	-1	0.02	0.0283942	0.0252052	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_dijkstra	35.35	vpr	975.52 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g6a44da44e	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T20:37:10	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	998932	10	10	168	178	1	68	30	11	8	88	io	auto	952.8 MiB	0.50	368	812	78	675	59	975.5 MiB	0.07	0.00	6.26392	-68.4373	-6.26392	6.26392	4.33	0.000637702	0.000588521	0.0149562	0.0139792	28	776	45	0	0	134428.	1527.59	1.48	0.227998	0.19302	11590	29630	-1	595	13	254	987	91515	32222	6.61176	6.61176	-72.652	-6.61176	0	0	173354.	1969.93	0.03	0.07	0.10	-1	-1	0.03	0.0241301	0.021664	
+ arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  min_chan_width	  routed_wirelength	  min_chan_width_route_success_iteration	  logic_block_area_total	  logic_block_area_used	  min_chan_width_routing_area_total	  min_chan_width_routing_area_per_tile	  min_chan_width_route_time	  min_chan_width_total_timing_analysis_time	  min_chan_width_total_sta_time	  crit_path_num_rr_graph_nodes	  crit_path_num_rr_graph_edges	  crit_path_collapsed_nodes	  crit_path_routed_wirelength	  crit_path_route_success_iteration	  crit_path_total_nets_routed	  crit_path_total_connections_routed	  crit_path_total_heap_pushes	  crit_path_total_heap_pops	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  crit_path_routing_area_total	  crit_path_routing_area_per_tile	  router_lookahead_computation_time	  crit_path_route_time	  crit_path_create_rr_graph_time	  crit_path_create_intra_cluster_rr_graph_time	  crit_path_tile_lookahead_computation_time	  crit_path_router_lookahead_computation_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_astar	  27.50	  vpr	  977.58 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1001044	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  956.2 MiB	  0.45	  370	  858	  95	  697	  66	  977.6 MiB	  0.04	  0.00	  6.45248	  -69.1493	  -6.45248	  6.45248	  2.68	  0.000346945	  0.000301901	  0.0109124	  0.00985616	  -1	  -1	  -1	  -1	  32	  693	  33	  0	  0	  153433.	  1743.56	  1.19	  0.127615	  0.111696	  11830	  34246	  -1	  570	  10	  235	  725	  56242	  26416	  6.94346	  6.94346	  -73.9579	  -6.94346	  0	  0	  205860.	  2339.32	  0.06	  0.04	  0.08	  -1	  -1	  0.06	  0.0194505	  0.0184001	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_astar	  27.82	  vpr	  977.35 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1000804	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  954.9 MiB	  0.45	  369	  812	  82	  656	  74	  977.3 MiB	  0.04	  0.00	  6.45248	  -69.2479	  -6.45248	  6.45248	  2.74	  0.00035978	  0.000313724	  0.0101986	  0.00925468	  -1	  -1	  -1	  -1	  32	  691	  29	  0	  0	  153433.	  1743.56	  1.24	  0.130899	  0.114171	  11830	  34246	  -1	  553	  12	  224	  697	  51846	  24062	  6.94346	  6.94346	  -73.4811	  -6.94346	  0	  0	  205860.	  2339.32	  0.06	  0.04	  0.08	  -1	  -1	  0.06	  0.0206713	  0.0194697	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta_--place_delta_delay_matrix_calculation_method_dijkstra	  28.08	  vpr	  977.66 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1001124	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  955.1 MiB	  0.47	  370	  812	  89	  663	  60	  977.7 MiB	  0.04	  0.00	  6.52191	  -68.7563	  -6.52191	  6.52191	  3.40	  0.000347877	  0.0002958	  0.010332	  0.00933957	  -1	  -1	  -1	  -1	  22	  809	  21	  0	  0	  110609.	  1256.92	  0.45	  0.066663	  0.0592234	  11258	  24748	  -1	  663	  14	  329	  1173	  67735	  35710	  7.04515	  7.04515	  -76.4932	  -7.04515	  0	  0	  134428.	  1527.59	  0.04	  0.05	  0.07	  -1	  -1	  0.04	  0.0237505	  0.0223282	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta_override_--place_delta_delay_matrix_calculation_method_dijkstra	  28.29	  vpr	  977.61 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1001072	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  955.2 MiB	  0.45	  368	  812	  95	  656	  61	  977.6 MiB	  0.04	  0.00	  6.34478	  -68.8031	  -6.34478	  6.34478	  3.48	  0.000358527	  0.000311549	  0.0101593	  0.00922939	  -1	  -1	  -1	  -1	  28	  753	  22	  0	  0	  134428.	  1527.59	  0.44	  0.0663655	  0.0590372	  11590	  29630	  -1	  624	  15	  260	  959	  55378	  26467	  6.64742	  6.64742	  -72.827	  -6.64742	  0	  0	  173354.	  1969.93	  0.05	  0.04	  0.07	  -1	  -1	  0.05	  0.0225106	  0.0210004	 
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt
index cc882260f8f..10c4b944169 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_delay_model/config/golden_results.txt
@@ -1,3 +1,3 @@
-arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta	35.93	vpr	975.56 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g1d3eb07f5	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T23:07:52	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	998976	10	10	168	178	1	68	30	11	8	88	io	auto	952.8 MiB	0.54	420	582	82	470	30	975.6 MiB	0.07	0.00	6.38568	-70.463	-6.38568	6.38568	3.45	0.000645075	0.000592785	0.0119866	0.0112148	20	909	46	0	0	100248.	1139.18	0.84	0.12912	0.111352	11180	23751	-1	803	20	495	1987	182273	69910	6.92851	6.92851	-75.9518	-6.92851	0	0	125464.	1425.72	0.02	0.10	0.09	-1	-1	0.02	0.0328754	0.0291737	
-stratixiv_arch.timing.xml	styr.blif	common_--place_delay_model_delta_override	37.04	vpr	975.73 MiB		-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	10	-1	-1	success	v8.0.0-11333-g1d3eb07f5	release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-213-generic x86_64	2024-09-18T23:07:52	betzgrp-wintermute.eecg.utoronto.ca	/home/singera8/vtr-verilog-to-routing/vtr_flow/tasks	999148	10	10	168	178	1	68	30	11	8	88	io	auto	952.6 MiB	0.60	395	582	95	453	34	975.7 MiB	0.07	0.00	6.37094	-69.85	-6.37094	6.37094	3.47	0.000638173	0.000588606	0.012516	0.0117144	30	698	21	0	0	144567.	1642.81	1.56	0.195052	0.165386	11730	32605	-1	613	13	256	907	102553	34444	6.74537	6.74537	-72.8995	-6.74537	0	0	194014.	2204.70	0.03	0.07	0.11	-1	-1	0.03	0.0256888	0.0231304	
+ arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  min_chan_width	  routed_wirelength	  min_chan_width_route_success_iteration	  logic_block_area_total	  logic_block_area_used	  min_chan_width_routing_area_total	  min_chan_width_routing_area_per_tile	  min_chan_width_route_time	  min_chan_width_total_timing_analysis_time	  min_chan_width_total_sta_time	  crit_path_num_rr_graph_nodes	  crit_path_num_rr_graph_edges	  crit_path_collapsed_nodes	  crit_path_routed_wirelength	  crit_path_route_success_iteration	  crit_path_total_nets_routed	  crit_path_total_connections_routed	  crit_path_total_heap_pushes	  crit_path_total_heap_pops	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  crit_path_routing_area_total	  crit_path_routing_area_per_tile	  router_lookahead_computation_time	  crit_path_route_time	  crit_path_create_rr_graph_time	  crit_path_create_intra_cluster_rr_graph_time	  crit_path_tile_lookahead_computation_time	  crit_path_router_lookahead_computation_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta	  28.29	  vpr	  977.73 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1001196	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  955.4 MiB	  0.43	  393	  628	  105	  491	  32	  977.7 MiB	  0.03	  0.00	  6.51193	  -69.1178	  -6.51193	  6.51193	  2.64	  0.000368496	  0.000316279	  0.00897708	  0.00821508	  -1	  -1	  -1	  -1	  20	  893	  28	  0	  0	  100248.	  1139.18	  1.58	  0.129641	  0.112291	  11180	  23751	  -1	  831	  19	  496	  1987	  121384	  60113	  6.91414	  6.91414	  -78.1319	  -6.91414	  0	  0	  125464.	  1425.72	  0.04	  0.06	  0.07	  -1	  -1	  0.04	  0.0265283	  0.0245474	 
+ stratixiv_arch.timing.xml	  styr.blif	  common_--place_delay_model_delta_override	  28.12	  vpr	  977.50 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  10	  -1	  -1	  success	  0f69adb	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-15T16:01:56	  fv-az837-567	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  1000956	  10	  10	  168	  178	  1	  68	  30	  11	  8	  88	  io	  auto	  955.9 MiB	  0.54	  380	  628	  91	  496	  41	  977.5 MiB	  0.05	  0.00	  6.52338	  -69.1003	  -6.52338	  6.52338	  2.70	  0.000355671	  0.000305949	  0.00939391	  0.00863885	  -1	  -1	  -1	  -1	  30	  673	  12	  0	  0	  144567.	  1642.81	  1.15	  0.113164	  0.0991248	  11730	  32605	  -1	  585	  9	  216	  698	  45031	  21119	  6.8993	  6.8993	  -73.7008	  -6.8993	  0	  0	  194014.	  2204.70	  0.08	  0.05	  0.08	  -1	  -1	  0.08	  0.0197747	  0.0187602