diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index 9940ef85ee3..9685c20fe3e 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -242,10 +242,13 @@ class RRGraphView {
         std::string start_y;                                           //start y-coordinate
         std::string end_x;                                             //end x-coordinate
         std::string end_y;                                             //end y-coordinate
-        std::string layer_num_str;                                     //layer number
+        std::string start_layer_str;                                     //layer number
+        std::string end_layer_str;                                     //layer number
         std::string arrow;                                             //direction arrow
         std::string coordinate_string = node_type_string(node);        //write the component's type as a routing resource node
         coordinate_string += ":" + std::to_string(size_t(node)) + " "; //add the index of the routing resource node
+
+        int node_layer_num = node_layer(node);
         if (node_type(node) == OPIN || node_type(node) == IPIN) {
             coordinate_string += "side: ("; //add the side of the routing resource node
             for (const e_side& node_side : SIDES) {
@@ -259,12 +262,12 @@ class RRGraphView {
             // and the end to the lower coordinate
             start_x =  " (" + std::to_string(node_xhigh(node)) + ","; //start and end coordinates are the same for OPINs and IPINs
             start_y = std::to_string(node_yhigh(node)) + ",";
-            layer_num_str = std::to_string(node_layer(node)) + ")";
+            start_layer_str = std::to_string(node_layer_num) + ")";
         } else if (node_type(node) == SOURCE || node_type(node) == SINK) {
             // For SOURCE and SINK the starting and ending coordinate are identical, so just use start
             start_x = " (" + std::to_string(node_xhigh(node)) + ",";
             start_y = std::to_string(node_yhigh(node)) + ",";
-            layer_num_str = std::to_string(node_layer(node)) + ")";
+            start_layer_str = std::to_string(node_layer_num) + ")";
         } else if (node_type(node) == CHANX || node_type(node) == CHANY) { //for channels, we would like to describe the component with segment specific information
             RRIndexedDataId cost_index = node_cost_index(node);
             int seg_index = rr_indexed_data_[cost_index].seg_index;
@@ -278,26 +281,28 @@ class RRGraphView {
 
                 start_x = " (" + std::to_string(node_xhigh(node)) + ","; //start coordinates have large value
                 start_y = std::to_string(node_yhigh(node)) + ",";
+                start_layer_str = std::to_string(node_layer_num);
                 end_x = " (" + std::to_string(node_xlow(node)) + ","; //end coordinates have smaller value
                 end_y = std::to_string(node_ylow(node)) + ",";
-                layer_num_str = std::to_string(node_layer(node)) + ")";
+                end_layer_str = std::to_string(node_layer_num) + ")";
             }
 
             else {                                                      // signal travels in increasing direction, stays at same point, or can travel both directions
                 start_x = " (" + std::to_string(node_xlow(node)) + ","; //start coordinates have smaller value
                 start_y = std::to_string(node_ylow(node)) + ",";
+                start_layer_str = std::to_string(node_layer_num);
                 end_x = " (" + std::to_string(node_xhigh(node)) + ","; //end coordinates have larger value
                 end_y = std::to_string(node_yhigh(node)) + ",";
-                layer_num_str = std::to_string(node_layer(node)) + ")"; //layer number
+                end_layer_str = std::to_string(node_layer_num) + ")"; //layer number
                 if (node_direction(node) == Direction::BIDIR) {
                     arrow = "<->"; //indicate that signal can travel both direction
                 }
             }
         }
 
-        coordinate_string +=  start_x + start_y + layer_num_str; //Write the starting coordinates
+        coordinate_string +=  start_x + start_y + start_layer_str; //Write the starting coordinates
         coordinate_string += arrow;             //Indicate the direction
-        coordinate_string += end_x + end_y + layer_num_str;     //Write the end coordinates
+        coordinate_string += end_x + end_y + end_layer_str;     //Write the end coordinates
         return coordinate_string;
     }
 
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index a883d611493..829ef935f0d 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -203,8 +203,6 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeI
                                                                          const t_conn_cost_params cost_params,
                                                                          t_bb bounding_box) {
     VTR_ASSERT_SAFE(heap_.is_valid());
-    //std::cout << "using this: " << (void *)this << "\n";
-    //std::cout << "using heap: " << heap_.get_ptr() << "\n";
 
     if (heap_.is_empty_heap()) { //No source
         VTR_LOGV_DEBUG(router_debug_, "  Initial heap empty (no source)\n");
diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp
index 102a176f92e..db71adc6a8d 100644
--- a/vpr/src/route/router_lookahead_extended_map.cpp
+++ b/vpr/src/route/router_lookahead_extended_map.cpp
@@ -65,7 +65,7 @@ static std::pair<float, int> run_dijkstra(RRNodeId start_node,
                                           std::vector<util::Search_Path>* paths,
                                           util::RoutingCosts* routing_costs);
 
-std::pair<float, float> ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_node, int delta_x, int delta_y, const t_conn_cost_params& params) const {
+std::pair<float, float> ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_node, int delta_x, int delta_y, int to_layer_num, const t_conn_cost_params& params) const {
     auto& device_ctx = g_vpr_ctx.device();
     auto& rr_graph = device_ctx.rr_graph;
 
@@ -109,7 +109,7 @@ std::pair<float, float> ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no
         float expected_delay_cost = std::numeric_limits<float>::infinity();
         float expected_cong_cost = std::numeric_limits<float>::infinity();
 
-        for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) {
+        for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc][to_layer_num]) {
             const util::t_reachable_wire_inf& reachable_wire_inf = kv.second;
 
             util::Cost_Entry cost_entry;
@@ -195,13 +195,15 @@ std::pair<float, float> ExtendedMapLookahead::get_expected_delay_and_cong(RRNode
     int to_x = rr_graph.node_xlow(to_node);
     int to_y = rr_graph.node_ylow(to_node);
 
+    int to_layer_num = rr_graph.node_layer(to_node);
+
     int dx, dy;
     dx = to_x - from_x;
     dy = to_y - from_y;
 
     e_rr_type from_type = rr_graph.node_type(from_node);
     if (from_type == SOURCE || from_type == OPIN) {
-        return this->get_src_opin_cost(from_node, dx, dy, params);
+        return this->get_src_opin_cost(from_node, dx, dy, to_layer_num, params);
     } else if (from_type == IPIN) {
         return std::make_pair(0., 0.);
     }
@@ -420,7 +422,7 @@ std::pair<float, int> ExtendedMapLookahead::run_dijkstra(RRNodeId start_node,
 
 // compute the cost maps for lookahead
 void ExtendedMapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
+    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
 
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 
@@ -616,7 +618,7 @@ void ExtendedMapLookahead::write(const std::string& file) const {
 void ExtendedMapLookahead::read(const std::string& file) {
     cost_map_.read(file);
 
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
+    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
 
     this->chan_ipins_delays = util::compute_router_chan_ipin_lookahead();
 }
diff --git a/vpr/src/route/router_lookahead_extended_map.h b/vpr/src/route/router_lookahead_extended_map.h
index ccd3faaa2ad..45d877871cf 100644
--- a/vpr/src/route/router_lookahead_extended_map.h
+++ b/vpr/src/route/router_lookahead_extended_map.h
@@ -19,9 +19,6 @@ class ExtendedMapLookahead : public RouterLookahead {
     ///<Look-up table from SOURCE/OPIN to CHANX/CHANY of various types
     util::t_src_opin_delays src_opin_delays;
 
-    ///< Lookup table from SOURCE/OPIN to CHANX/CHANY of the another layer
-    util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
-
     ///<Look-up table from CHANX/CHANY to SINK/IPIN of various types
     util::t_chan_ipins_delays chan_ipins_delays;
 
@@ -33,7 +30,7 @@ class ExtendedMapLookahead : public RouterLookahead {
      * @param criticality_fac criticality of the current connection between 0 (all congestion) and 1 (all timing)
      * @return expected cost to get to the destination
      */
-    std::pair<float, float> get_src_opin_cost(RRNodeId from_node, int delta_x, int delta_y, const t_conn_cost_params& params) const;
+    std::pair<float, float> get_src_opin_cost(RRNodeId from_node, int delta_x, int delta_y, int to_layer_num, const t_conn_cost_params& params) const;
 
     /**
      * @brief Returns the CHAN -> IPIN delay that gets added to the final expected delay
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 0eea1a6c913..6b69d95695e 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -187,7 +187,7 @@ class PQ_Entry {
 
 /* used during Dijkstra expansion to store delay/congestion info lists for each relative coordinate for a given segment and channel type.
  * the list at each coordinate is later boiled down to a single representative cost entry to be stored in the final cost map */
-typedef vtr::Matrix<Expansion_Cost_Entry> t_routing_cost_map; //[0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
+typedef vtr::NdMatrix<Expansion_Cost_Entry, 3> t_routing_cost_map; //[0..num_layers][0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
 
 struct t_dijkstra_data {
     /* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */
@@ -212,9 +212,11 @@ t_wire_cost_map f_wire_cost_map;
  */
 Cost_Entry get_wire_cost_entry(e_rr_type rr_type,
                                int seg_index,
-                               int layer_num,
+                               int from_layer_num,
                                int delta_x,
-                               int delta_y);
+                               int delta_y,
+                               int to_layer_num);
+
 static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segment_inf);
 /***
  * @brief Compute the cost from pin to sinks of tiles - Compute the minimum cost to get to each tile sink from pins on the cluster
@@ -264,9 +266,9 @@ static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opi
  * @return (delay, congestion)
  */
 static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::t_reachable_wire_inf>& src_opin_delay_map,
-                                                      int layer_num,
                                                       int delta_x,
-                                                      int delta_y);
+                                                      int delta_y,
+                                                      int to_layer_num);
 
 // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost
 static void read_intra_cluster_router_lookahead(std::unordered_map<int, util::t_ipin_primitive_sink_delays>& inter_tile_pin_primitive_pin_delay,
@@ -281,7 +283,6 @@ static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x
 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(RRNodeId start_node,
-                         int sample_layer_num,
                          int start_x,
                          int start_y,
                          t_routing_cost_map& routing_cost_map,
@@ -292,11 +293,11 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry,
                                        vtr::vector<RRNodeId, bool>& node_expanded,
                                        std::priority_queue<PQ_Entry>& pq);
 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
-static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map);
+static void set_lookahead_map_costs(int from_layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map);
 /* fills in missing lookahead map entries by copying the cost of the closest valid entry */
 static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type);
 /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
-static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index);
+static Cost_Entry get_nearby_cost_entry(int from_layer_num, int x, int y, int to_layer_num, int segment_index, int chan_index);
 /* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
 static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y);
 static void adjust_rr_position(const RRNodeId rr, int& x, int& y);
@@ -470,19 +471,10 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         auto from_ptc = rr_graph.node_ptc_num(from_node);
 
-        // Currently, we assume inter-layer connections are only from a block output pin to another layer. Thus, if the from and to layers are different,
-        // We use src_opin_inter_layer_delays.
-        if (from_layer_num == to_layer_num) {
-            std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc],
-                                                                                       from_layer_num,
-                                                                                       delta_x,
-                                                                                       delta_y);
-        } else if (from_layer_num != to_layer_num) {
-            std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_inter_layer_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
-                                                                                       to_layer_num,
-                                                                                       delta_x,
-                                                                                       delta_y);
-        }
+        std::tie(expected_delay_cost, expected_cong_cost) = get_cost_from_src_opin(src_opin_delays[from_layer_num][from_tile_index][from_ptc][to_layer_num],
+                                                                                   delta_x,
+                                                                                   delta_y,
+                                                                                   to_layer_num);
 
         expected_delay_cost *= params.criticality;
         expected_cong_cost *= (1 - params.criticality);
@@ -506,34 +498,28 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
 
         VTR_ASSERT(from_seg_index >= 0);
 
-        // Since we assume that inter-layer connections are only from a block output pin to another layer, if the from node
-        // is of type CHANX/CHANY, and the sink node is on the other layer, there will no path from that node to the sink
-        if (from_layer_num != to_layer_num) {
-            expected_delay_cost = std::numeric_limits<float>::max() / 1e12;
-            expected_cong_cost = std::numeric_limits<float>::max() / 1e12;
-        } else {
-            /* now get the expected cost from our lookahead map */
-            Cost_Entry cost_entry = get_wire_cost_entry(from_type,
-                                                        from_seg_index,
-                                                        from_layer_num,
-                                                        delta_x,
-                                                        delta_y);
-            expected_delay_cost = cost_entry.delay;
-            expected_cong_cost = cost_entry.congestion;
-
-            VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
-                                vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
-                                                rr_node_arch_name(from_node, is_flat_).c_str(),
-                                                describe_rr_node(rr_graph,
-                                                                 device_ctx.grid,
-                                                                 device_ctx.rr_indexed_data,
-                                                                 from_node,
-                                                                 is_flat_)
-                                                    .c_str())
-                                    .c_str());
-        }
-        expected_delay_cost *= params.criticality;
-        expected_cong_cost *= (1 - params.criticality);
+        /* now get the expected cost from our lookahead map */
+        Cost_Entry cost_entry = get_wire_cost_entry(from_type,
+                                                    from_seg_index,
+                                                    from_layer_num,
+                                                    delta_x,
+                                                    delta_y,
+                                                    to_layer_num);
+        expected_delay_cost = cost_entry.delay;
+        expected_cong_cost = cost_entry.congestion;
+
+        VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost),
+                            vtr::string_fmt("Lookahead failed to estimate cost from %s: %s",
+                                            rr_node_arch_name(from_node, is_flat_).c_str(),
+                                            describe_rr_node(rr_graph,
+                                                             device_ctx.grid,
+                                                             device_ctx.rr_indexed_data,
+                                                             from_node,
+                                                             is_flat_)
+                                                .c_str())
+                                .c_str());
+        expected_delay_cost = cost_entry.delay * params.criticality;
+        expected_cong_cost = cost_entry.congestion * (1 - params.criticality);
     } else if (from_type == IPIN) { /* Change if you're allowing route-throughs */
         return std::make_pair(0., device_ctx.rr_indexed_data[RRIndexedDataId(SINK_COST_INDEX)].base_cost);
     } else { /* Change this if you want to investigate route-throughs */
@@ -552,7 +538,7 @@ void MapLookahead::compute(const std::vector<t_segment_inf>& segment_inf) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
+    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
 }
 
 void MapLookahead::compute_intra_tile() {
@@ -575,7 +561,7 @@ void MapLookahead::read(const std::string& file) {
 
     //Next, compute which wire types are accessible (and the cost to reach them)
     //from the different physical tile type's SOURCEs & OPINs
-    std::tie(this->src_opin_delays, this->src_opin_inter_layer_delays) = util::compute_router_src_opin_lookahead(is_flat_);
+    this->src_opin_delays = util::compute_router_src_opin_lookahead(is_flat_);
 }
 
 void MapLookahead::read_intra_cluster(const std::string& file) {
@@ -611,7 +597,7 @@ void MapLookahead::write_intra_cluster(const std::string& file) const {
 
 /******** Function Definitions ********/
 
-Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int layer_num, int delta_x, int delta_y) {
+Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int from_layer_num, int delta_x, int delta_y, int to_layer_num) {
     VTR_ASSERT_SAFE(rr_type == CHANX || rr_type == CHANY);
 
     int chan_index = 0;
@@ -619,11 +605,12 @@ Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int layer_num,
         chan_index = 1;
     }
 
-    VTR_ASSERT_SAFE(layer_num < (int)f_wire_cost_map.dim_size(0));
-    VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(3));
-    VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(4));
+    VTR_ASSERT_SAFE(from_layer_num < (int)f_wire_cost_map.dim_size(0));
+    VTR_ASSERT_SAFE(to_layer_num < (int)f_wire_cost_map.dim_size(3));
+    VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(4));
+    VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(5));
 
-    return f_wire_cost_map[layer_num][chan_index][seg_index][delta_x][delta_y];
+    return f_wire_cost_map[from_layer_num][chan_index][seg_index][to_layer_num][delta_x][delta_y];
 }
 
 static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segment_inf) {
@@ -637,6 +624,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
     f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()),
                                        2,
                                        segment_inf.size(),
+                                       static_cast<unsigned long>(grid.get_num_layers()),
                                        device_ctx.grid.width(),
                                        device_ctx.grid.height()});
 
@@ -665,11 +653,11 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
     int target_y = device_ctx.grid.height() - 2;
 
     //Profile each wire segment type
-    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
+    for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); from_layer_num++) {
         //if arch file specifies die_number="layer_num" doesn't require inter-cluster
         //programmable routing resources, then we shouldn't profile wire segment types in
         //the current layer
-        if (!device_ctx.inter_cluster_prog_routing_resources[layer_num]) {
+        if (!device_ctx.inter_cluster_prog_routing_resources[from_layer_num]) {
             continue;
         }
         for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) {
@@ -693,7 +681,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
 
                     for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) {
                         /* get the rr node index from which to start routing */
-                        RRNodeId start_node = get_start_node(layer_num, sample_x, sample_y,
+                        RRNodeId start_node = get_start_node(from_layer_num, sample_x, sample_y,
                                                              target_x, target_y, //non-corner upper right
                                                              chan_type, iseg, track_offset);
 
@@ -701,7 +689,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
                             continue;
                         }
                         // TODO: Temporary - After testing benchmarks this can be deleted
-                        VTR_ASSERT(rr_graph.node_layer(start_node) == layer_num);
+                        VTR_ASSERT(rr_graph.node_layer(start_node) == from_layer_num);
 
                         sample_nodes[chan_type].push_back(RRNodeId(start_node));
                     }
@@ -719,7 +707,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
                 for (RRNodeId rr_node : rr_graph.nodes()) {
                     auto rr_type = rr_graph.node_type(rr_node);
                     if (rr_type != chan_type) continue;
-                    if (rr_graph.node_layer(rr_node) != layer_num) continue;
+                    if (rr_graph.node_layer(rr_node) != from_layer_num) continue;
 
                     auto cost_index = rr_graph.node_cost_index(rr_node);
                     VTR_ASSERT(cost_index != RRIndexedDataId(OPEN));
@@ -740,7 +728,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
             //each sample location to profile the routing network from this type
 
             t_dijkstra_data dijkstra_data;
-            t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()});
+            t_routing_cost_map routing_cost_map({static_cast<unsigned long>(device_ctx.grid.get_num_layers()), device_ctx.grid.width(), device_ctx.grid.height()});
 
             for (e_rr_type chan_type : chan_types) {
                 if (sample_nodes[chan_type].empty()) {
@@ -762,7 +750,6 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
                         }
 
                         run_dijkstra(sample_node,
-                                     layer_num,
                                      sample_x,
                                      sample_y,
                                      routing_cost_map,
@@ -773,7 +760,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
 
                     /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead
                      * cost map */
-                    set_lookahead_map_costs(layer_num, iseg, chan_type, routing_cost_map);
+                    set_lookahead_map_costs(from_layer_num, iseg, chan_type, routing_cost_map);
 
                     /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on
                      * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */
@@ -781,7 +768,7 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
                 }
             }
         }
-        if (false) print_wire_cost_map(layer_num, segment_inf);
+        if (false) print_wire_cost_map(from_layer_num, segment_inf);
     }
 }
 
@@ -830,7 +817,6 @@ static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x
 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(RRNodeId start_node,
-                         int sample_layer_num,
                          int start_x,
                          int start_y,
                          t_routing_cost_map& routing_cost_map,
@@ -871,16 +857,13 @@ static void run_dijkstra(RRNodeId start_node,
             continue;
         }
 
-        if (rr_graph.node_layer(curr_node) != sample_layer_num) {
-            continue;
-        }
-
         //VTR_LOG("Expanding with delay=%10.3g cong=%10.3g (%s)\n", current.delay, current.congestion_upstream, describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, curr_node).c_str());
 
         /* if this node is an ipin record its congestion/delay in the routing_cost_map */
         if (rr_graph.node_type(curr_node) == IPIN) {
             int ipin_x = rr_graph.node_xlow(curr_node);
             int ipin_y = rr_graph.node_ylow(curr_node);
+            int ipin_layer = rr_graph.node_layer(curr_node);
 
             if (ipin_x >= start_x && ipin_y >= start_y) {
                 int delta_x, delta_y;
@@ -888,7 +871,7 @@ static void run_dijkstra(RRNodeId start_node,
                 delta_x = std::abs(delta_x);
                 delta_y = std::abs(delta_y);
 
-                routing_cost_map[delta_x][delta_y].add_cost_entry(current.delay, current.congestion_upstream);
+                routing_cost_map[ipin_layer][delta_x][delta_y].add_cost_entry(current.delay, current.congestion_upstream);
             }
         }
 
@@ -945,18 +928,20 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry,
 }
 
 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
-static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) {
+static void set_lookahead_map_costs(int from_layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) {
     int chan_index = 0;
     if (chan_type == CHANY) {
         chan_index = 1;
     }
 
     /* set the lookahead cost map entries with a representative cost entry from routing_cost_map */
-    for (unsigned ix = 0; ix < routing_cost_map.dim_size(0); ix++) {
-        for (unsigned iy = 0; iy < routing_cost_map.dim_size(1); iy++) {
-            Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[ix][iy];
+    for (unsigned to_layer = 0; to_layer < routing_cost_map.dim_size(0); to_layer++) {
+        for (unsigned ix = 0; ix < routing_cost_map.dim_size(1); ix++) {
+            for (unsigned iy = 0; iy < routing_cost_map.dim_size(2); iy++) {
+                Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[to_layer][ix][iy];
 
-            f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD);
+                f_wire_cost_map[from_layer_num][chan_index][segment_index][to_layer][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD);
+            }
         }
     }
 }
@@ -971,14 +956,16 @@ static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_
     auto& device_ctx = g_vpr_ctx.device();
 
     /* find missing cost entries and fill them in by copying a nearby cost entry */
-    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); ++layer_num) {
-        for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) {
-            for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) {
-                Cost_Entry cost_entry = f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy];
-
-                if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) {
-                    Cost_Entry copied_entry = get_nearby_cost_entry(layer_num, ix, iy, segment_index, chan_index);
-                    f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = copied_entry;
+    for (int from_layer_num = 0; from_layer_num < device_ctx.grid.get_num_layers(); from_layer_num++) {
+        for (int to_layer_num = 0; to_layer_num < device_ctx.grid.get_num_layers(); ++to_layer_num) {
+            for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) {
+                for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) {
+                    Cost_Entry cost_entry = f_wire_cost_map[from_layer_num][chan_index][segment_index][to_layer_num][ix][iy];
+
+                    if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) {
+                        Cost_Entry copied_entry = get_nearby_cost_entry(from_layer_num, ix, iy, to_layer_num, segment_index, chan_index);
+                        f_wire_cost_map[from_layer_num][chan_index][segment_index][to_layer_num][ix][iy] = copied_entry;
+                    }
                 }
             }
         }
@@ -986,7 +973,7 @@ static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_
 }
 
 /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
-static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index) {
+static Cost_Entry get_nearby_cost_entry(int from_layer_num, int x, int y, int to_layer_num, int segment_index, int chan_index) {
     /* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates
      * of the cost entry to be copied */
 
@@ -1013,14 +1000,21 @@ static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment
     copy_y = std::max(copy_y, 0); //Clip to zero
     copy_x = std::max(copy_x, 0); //Clip to zero
 
-    Cost_Entry copy_entry = f_wire_cost_map[layer_num][chan_index][segment_index][copy_x][copy_y];
+    Cost_Entry copy_entry = f_wire_cost_map[from_layer_num][chan_index][segment_index][to_layer_num][copy_x][copy_y];
 
     /* if the entry to be copied is also empty, recurse */
     if (std::isnan(copy_entry.delay) && std::isnan(copy_entry.congestion)) {
         if (copy_x == 0 && copy_y == 0) {
             copy_entry = Cost_Entry(0., 0.); //(0, 0) entry is invalid so set zero to terminate recursion
+            // set zero if the source and sink nodes are on the same layer. If they are not, it means that there is no connection from the source node to
+            // the other layer. This means that the connection should be set to a very large number
+            if (from_layer_num == to_layer_num) {
+                copy_entry = Cost_Entry(0., 0.);
+            } else {
+                copy_entry = Cost_Entry(std::numeric_limits<float>::max() / 1e12, std::numeric_limits<float>::max() / 1e12);
+            }
         } else {
-            copy_entry = get_nearby_cost_entry(layer_num, copy_x, copy_y, segment_index, chan_index);
+            copy_entry = get_nearby_cost_entry(from_layer_num, copy_x, copy_y, to_layer_num, segment_index, chan_index);
         }
     }
 
@@ -1351,7 +1345,9 @@ static void print_wire_cost_map(int layer_num, const std::vector<t_segment_inf>&
                         chan_index);
             for (size_t iy = 0; iy < device_ctx.grid.height(); iy++) {
                 for (size_t ix = 0; ix < device_ctx.grid.width(); ix++) {
-                    vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[layer_num][chan_index][iseg][ix][iy].delay);
+                    for (int to_layer_num = 0; to_layer_num < device_ctx.grid.get_num_layers(); ++to_layer_num) {
+                        vtr::printf("%2d,%2d,%2d: %10.3g\t", ix, iy, to_layer_num, f_wire_cost_map[layer_num][chan_index][iseg][to_layer_num][ix][iy].delay);
+                    }
                 }
                 vtr::printf("\n");
             }
@@ -1362,13 +1358,15 @@ static void print_wire_cost_map(int layer_num, const std::vector<t_segment_inf>&
 
 static void print_router_cost_map(const t_routing_cost_map& router_cost_map) {
     VTR_LOG("Djikstra Flood Costs:\n");
-    for (size_t x = 0; x < router_cost_map.dim_size(0); x++) {
-        for (size_t y = 0; y < router_cost_map.dim_size(1); y++) {
-            VTR_LOG("(%zu,%zu):\n", x, y);
-
-            for (size_t i = 0; i < router_cost_map[x][y].cost_vector.size(); ++i) {
-                Cost_Entry entry = router_cost_map[x][y].cost_vector[i];
-                VTR_LOG("  %d: delay=%10.3g cong=%10.3g\n", i, entry.delay, entry.congestion);
+    for (size_t to_layer_num = 0; to_layer_num < router_cost_map.dim_size(0); to_layer_num++) {
+        for (size_t x = 0; x < router_cost_map.dim_size(1); x++) {
+            for (size_t y = 0; y < router_cost_map.dim_size(2); y++) {
+                VTR_LOG("(%zu,%zu,%zu):\n", x, y, to_layer_num);
+
+                for (size_t i = 0; i < router_cost_map[to_layer_num][x][y].cost_vector.size(); ++i) {
+                    Cost_Entry entry = router_cost_map[to_layer_num][x][y].cost_vector[i];
+                    VTR_LOG("  %d: delay=%10.3g cong=%10.3g\n", i, entry.delay, entry.congestion);
+                }
             }
         }
     }
@@ -1470,30 +1468,32 @@ static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opi
                                           static_cast<unsigned long>(width),
                                           static_cast<unsigned long>(height)});
 
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
         for (int dx = 0; dx < width; dx++) {
             for (int dy = 0; dy < height; dy++) {
                 util::Cost_Entry min_cost(std::numeric_limits<float>::max(), std::numeric_limits<float>::max());
-                for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(1); chan_idx++) {
-                    for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(2); seg_idx++) {
-                        auto cost = util::Cost_Entry(f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].delay,
-                                                     f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].congestion);
-                        if (cost.delay < min_cost.delay) {
-                            min_cost.delay = cost.delay;
-                            min_cost.congestion = cost.congestion;
+                for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
+                    for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(1); chan_idx++) {
+                        for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(2); seg_idx++) {
+                            auto cost = util::Cost_Entry(f_wire_cost_map[from_layer_num][chan_idx][seg_idx][to_layer_num][dx][dy].delay,
+                                                         f_wire_cost_map[from_layer_num][chan_idx][seg_idx][to_layer_num][dx][dy].congestion);
+                            if (cost.delay < min_cost.delay) {
+                                min_cost.delay = cost.delay;
+                                min_cost.congestion = cost.congestion;
+                            }
                         }
                     }
                 }
-                internal_opin_global_cost_map[layer_num][dx][dy] = min_cost;
+                internal_opin_global_cost_map[from_layer_num][dx][dy] = min_cost;
             }
         }
     }
 }
 
 static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::t_reachable_wire_inf>& src_opin_delay_map,
-                                                      int layer_num,
                                                       int delta_x,
-                                                      int delta_y) {
+                                                      int delta_y,
+                                                      int to_layer_num) {
     float expected_delay_cost = std::numeric_limits<float>::infinity();
     float expected_cong_cost = std::numeric_limits<float>::infinity();
     if (src_opin_delay_map.empty()) {
@@ -1535,9 +1535,10 @@ static std::pair<float, float> get_cost_from_src_opin(const std::map<int, util::
                 //delay and congestion cost estimates
                 wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type,
                                                       reachable_wire_inf.wire_seg_index,
-                                                      layer_num,
+                                                      reachable_wire_inf.layer_number,
                                                       delta_x,
-                                                      delta_y);
+                                                      delta_y,
+                                                      to_layer_num);
             }
 
             float this_delay_cost = reachable_wire_inf.delay + wire_cost_entry.delay;
@@ -1760,7 +1761,7 @@ void read_router_lookahead(const std::string& file) {
 
     auto map = reader.getRoot<VprMapLookahead>();
 
-    ToNdMatrix<5, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry);
+    ToNdMatrix<6, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry);
 }
 
 void write_router_lookahead(const std::string& file) {
@@ -1769,7 +1770,7 @@ void write_router_lookahead(const std::string& file) {
     auto map = builder.initRoot<VprMapLookahead>();
 
     auto cost_map = map.initCostMap();
-    FromNdMatrix<5, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry);
+    FromNdMatrix<6, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry);
 
     writeMessageToFile(file, &builder);
 }
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index d6340acac85..12d6eaa9ad9 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -19,8 +19,6 @@ class MapLookahead : public RouterLookahead {
     std::unordered_map<int, std::unordered_map<int, util::Cost_Entry>> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost
     // Lookup table to store the minimum cost for each dx and dy
     vtr::NdMatrix<util::Cost_Entry, 3> distance_based_min_cost; // [layer_num][dx][dy] -> cost
-    // [tile_index][from_layer_num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
-    util::t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
 
     const t_det_routing_arch& det_routing_arch_;
     bool is_flat_;
@@ -57,8 +55,14 @@ class Cost_Entry {
 
 /* provides delay/congestion estimates to travel specified distances
  * in the x/y direction */
-typedef vtr::NdMatrix<Cost_Entry, 5> t_wire_cost_map; //[0..num_layers][0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
+// This is a 6D array storing the cost to travel from a node of type CHANX/CHANY to a point that is dx, dy further, and is on the "layer_num" layer.
+// To store this information, the first index is the layer number that the node under consideration is on, the second index represents the type of channel (X/Y)
+// that the node under consideration belongs to, the third is the segment type (specified in the architecture file under the "segmentlist" tag), the fourth is the
+// target "layer_num" mentioned above, the fifth is dx, and the last one is dy.
+typedef vtr::NdMatrix<Cost_Entry, 6> t_wire_cost_map; //[0..num_layers][0..1][[0..num_seg_types-1][0..num_layers][0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
                                                       //[0..1] entry distinguish between CHANX/CHANY start nodes respectively
+                                                      // The first index is the layer number that the node under consideration is on, and the forth index
+                                                      // is the layer number that the target node is on.
 
 void read_router_lookahead(const std::string& file);
 void write_router_lookahead(const std::string& file);
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index c9c7017f83c..9ffe2433c29 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -17,10 +17,16 @@
 #include "route_common.h"
 #include "route_debug.h"
 
-static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays, util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, bool is_multi_layer);
+static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays);
 
 static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays);
 
+/**
+ * @param itile
+ * @return Return the maximum ptc number of the SOURCE/OPINs of a tile type
+ */
+static int get_tile_src_opin_max_ptc_from_rr_graph(int itile);
+
 static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev);
 
 static void run_intra_tile_dijkstra(const RRGraphView& rr_graph,
@@ -304,34 +310,38 @@ template void expand_dijkstra_neighbours(const RRGraphView& rr_graph,
                                                              std::vector<PQ_Entry_Base_Cost>,
                                                              std::greater<PQ_Entry_Base_Cost>>* pq);
 
-std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat) {
+t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead");
     auto& device_ctx = g_vpr_ctx.device();
     auto& rr_graph = device_ctx.rr_graph;
 
     int num_layers = device_ctx.grid.get_num_layers();
-    bool is_multi_layer = (num_layers > 1);
 
     t_src_opin_delays src_opin_delays;
     src_opin_delays.resize(num_layers);
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-        src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size());
+    std::vector<int> tile_max_ptc(device_ctx.physical_tile_types.size(), OPEN);
+
+    // Get the maximum OPIN ptc for each tile type to reserve src_opin_delays
+    for (int itile = 0; itile < (int)device_ctx.physical_tile_types.size(); itile++) {
+        tile_max_ptc[itile] = get_tile_src_opin_max_ptc_from_rr_graph(itile);
     }
 
-    t_src_opin_inter_layer_delays src_opin_inter_layer_delays;
-    if (is_multi_layer) {
-        src_opin_inter_layer_delays.resize(num_layers);
-        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
-            int num_physical_tiles = (int)device_ctx.physical_tile_types.size();
-            src_opin_inter_layer_delays[layer_num].resize(num_physical_tiles);
+    // Resize src_opin_delays to accomodate enough ptc and layer
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size());
+        for (int itile = 0; itile < (int)device_ctx.physical_tile_types.size(); itile++) {
+            src_opin_delays[layer_num][itile].resize(tile_max_ptc[itile] + 1);
+            for (int ptc_num = 0; ptc_num <= tile_max_ptc[itile]; ptc_num++) {
+                src_opin_delays[layer_num][itile][ptc_num].resize(num_layers);
+            }
         }
     }
 
     //We assume that the routing connectivity of each instance of a physical tile is the same,
     //and so only measure one instance of each type
-    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+    for (int from_layer_num = 0; from_layer_num < num_layers; from_layer_num++) {
         for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) {
-            if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], layer_num) == 0) {
+            if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], from_layer_num) == 0) {
                 continue;
             }
             for (e_rr_type rr_type : {SOURCE, OPIN}) {
@@ -342,7 +352,9 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
                 while (ptcs_with_no_delays) { //Haven't found wire connected to ptc
                     ptcs_with_no_delays = false;
 
-                    sample_loc = pick_sample_tile(layer_num, &device_ctx.physical_tile_types[itile], sample_loc);
+                    sample_loc = pick_sample_tile(from_layer_num,
+                                                  &device_ctx.physical_tile_types[itile],
+                                                  sample_loc);
 
                     if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) {
                         //No untried instances of the current tile type left
@@ -364,31 +376,28 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
                             continue;
                         }
 
-                        if (ptc >= int(src_opin_delays[layer_num][itile].size())) {
-                            src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional...
-                            if (is_multi_layer) {
-                                size_t old_size = src_opin_inter_layer_delays[layer_num][itile].size();
-                                src_opin_inter_layer_delays[layer_num][itile].resize(ptc + 1);
-                                for (size_t i = old_size; i < src_opin_inter_layer_delays[layer_num][itile].size(); ++i) {
-                                    src_opin_inter_layer_delays[layer_num][itile][i].resize(num_layers);
-                                }
-                            }
-                        }
+                        VTR_ASSERT(ptc < int(src_opin_delays[from_layer_num][itile].size()));
 
                         //Find the wire types which are reachable from inode and record them and
                         //the cost to reach them
                         dijkstra_flood_to_wires(itile,
                                                 node_id,
-                                                src_opin_delays,
-                                                src_opin_inter_layer_delays,
-                                                is_multi_layer);
+                                                src_opin_delays);
 
-                        if (src_opin_delays[layer_num][itile][ptc].empty()) {
-                            VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n",
+                        bool reachable_wire_found = false;
+                        for (int to_layer_num = 0; to_layer_num < num_layers; to_layer_num++) {
+                            if (!src_opin_delays[from_layer_num][itile][ptc][to_layer_num].empty()) {
+                                reachable_wire_found = true;
+                                break;
+                            }
+                        }
+                        if (reachable_wire_found) {
+                            VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d,%d)\n",
                                            rr_node_typename[rr_type],
                                            rr_node_arch_name(node_id, is_flat).c_str(),
                                            sample_loc.x,
                                            sample_loc.y,
+                                           sample_loc.layer_num,
                                            is_flat);
 
                             ptcs_with_no_delays = true;
@@ -404,7 +413,7 @@ std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_o
         }
     }
 
-    return std::make_pair(src_opin_delays, src_opin_inter_layer_delays);
+    return src_opin_delays;
 }
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
@@ -489,9 +498,7 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g
 
 static void dijkstra_flood_to_wires(int itile,
                                     RRNodeId node,
-                                    util::t_src_opin_delays& src_opin_delays,
-                                    util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays,
-                                    bool is_multi_layer) {
+                                    util::t_src_opin_delays& src_opin_delays) {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
 
@@ -513,7 +520,7 @@ static void dijkstra_flood_to_wires(int itile,
     root.node = node;
 
     int ptc = rr_graph.node_ptc_num(node);
-    int node_layer_num = rr_graph.node_layer(node);
+    int root_layer_num = rr_graph.node_layer(node);
 
     /*
      * Perform Djikstra from the SOURCE/OPIN of interest, stopping at the the first
@@ -561,20 +568,13 @@ static void dijkstra_flood_to_wires(int itile,
             }
 
             //Keep costs of the best path to reach each wire type
-            if ((!src_opin_delays[node_layer_num][itile][ptc].count(seg_index)
-                 || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay)
-                && curr_layer_num == node_layer_num) {
-                src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type;
-                src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index;
-                src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay;
-                src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
-            } else if (is_multi_layer && (!src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num].count(seg_index) || curr.delay < src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay)
-                       && curr_layer_num != node_layer_num) {
-                // Store a CHANX/Y node or a SINK node on another layer that is reachable by the current node.
-                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
-                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;
-                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].delay = curr.delay;
-                src_opin_inter_layer_delays[node_layer_num][itile][ptc][curr_layer_num][seg_index].congestion = curr.congestion;
+            if (!src_opin_delays[root_layer_num][itile][ptc][curr_layer_num].count(seg_index)
+                || curr.delay < src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].delay) {
+                src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].wire_rr_type = curr_rr_type;
+                src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].wire_seg_index = seg_index;
+                src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].layer_number = curr_layer_num;
+                src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].delay = curr.delay;
+                src_opin_delays[root_layer_num][itile][ptc][curr_layer_num][seg_index].congestion = curr.congestion;
             }
 
         } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) {
@@ -714,6 +714,62 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch
     }
 }
 
+static int get_tile_src_opin_max_ptc_from_rr_graph(int itile) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& physical_tile = device_ctx.physical_tile_types[itile];
+    const auto& rr_graph = device_ctx.rr_graph;
+    const int num_layers = device_ctx.grid.get_num_layers();
+    int max_ptc = OPEN;
+
+    // Find a layer that has instances of the tile type
+    int tile_layer_num = OPEN;
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        if (device_ctx.grid.num_instances(&physical_tile, layer_num) > 0) {
+            tile_layer_num = layer_num;
+            break;
+        }
+    }
+
+    if (tile_layer_num == OPEN) {
+        VTR_LOG_WARN("Found no sample locations for %s\n",
+                     physical_tile.name);
+        max_ptc = OPEN;
+    } else {
+        for (e_rr_type rr_type : {SOURCE, OPIN}) {
+            t_physical_tile_loc sample_loc(OPEN, OPEN, OPEN);
+            sample_loc = pick_sample_tile(tile_layer_num, &physical_tile, sample_loc);
+
+            if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) {
+                //No untried instances of the current tile type left
+                VTR_LOG_WARN("Found no sample locations for %s in %s\n",
+                             rr_node_typename[rr_type],
+                             physical_tile.name);
+                return OPEN;
+            }
+
+            const std::vector<RRNodeId>& rr_nodes_at_loc = device_ctx.rr_graph.node_lookup().find_grid_nodes_at_all_sides(sample_loc.layer_num,
+                                                                                                                          sample_loc.x,
+                                                                                                                          sample_loc.y,
+                                                                                                                          rr_type);
+            for (RRNodeId node_id : rr_nodes_at_loc) {
+                int ptc = rr_graph.node_ptc_num(node_id);
+                // For the time being, we decide to not let the lookahead explore the node inside the clusters
+                if (!is_inter_cluster_node(&physical_tile,
+                                           rr_type,
+                                           ptc)) {
+                    continue;
+                }
+
+                if (ptc >= max_ptc) {
+                    max_ptc = ptc;
+                }
+            }
+        }
+    }
+
+    return max_ptc;
+}
+
 static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev) {
     //Very simple for now, just pick the fist matching tile found
     t_physical_tile_loc loc(OPEN, OPEN, OPEN);
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 0245208fdf7..7f1f83c1848 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -254,6 +254,7 @@ void expand_dijkstra_neighbours(const RRGraphView& rr_graph,
 struct t_reachable_wire_inf {
     e_rr_type wire_rr_type;
     int wire_seg_index;
+    int layer_number;
 
     //Costs to reach the wire type from the current node
     float congestion;
@@ -271,11 +272,8 @@ struct t_reachable_wire_inf {
 // SOURCE/OPIN of a given tile type.
 //
 // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned
-// as the lookahead expected cost. [opin/src layer_num][tile_index][opin/src ptc_number] -> pair<seg_index, t_reachable_wire_inf>
-typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
-// Store the wire segments on to_layer_num reachable from a given SOURCE/OPIN
-// [from_layer_num][tile_index][from opin/src ptc num][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
-typedef std::vector<std::vector<std::vector<std::vector<std::map<int, util::t_reachable_wire_inf>>>>> t_src_opin_inter_layer_delays;
+// as the lookahead expected cost. [opin/src layer_num][tile_index][opin/src ptc_number][to_layer_num] -> pair<seg_index, t_reachable_wire_inf>
+typedef std::vector<std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>>> t_src_opin_delays;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -294,9 +292,9 @@ typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins
 /**
  * @brief For each tile, iterate over its OPINs and store which segment types are accessible from each OPIN
  * @param is_flat
- * @return (segments accessible on the same type, segments accessible on other layer)
+ * @return
  */
-std::pair<t_src_opin_delays, t_src_opin_inter_layer_delays> compute_router_src_opin_lookahead(bool is_flat);
+t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat);
 
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 
diff --git a/vpr/test/test_map_lookahead_serdes.cpp b/vpr/test/test_map_lookahead_serdes.cpp
index 9beb03b3601..a9095377df5 100644
--- a/vpr/test/test_map_lookahead_serdes.cpp
+++ b/vpr/test/test_map_lookahead_serdes.cpp
@@ -10,16 +10,19 @@ namespace {
 static constexpr const char kMapLookaheadBin[] = "test_map_lookahead.bin";
 
 TEST_CASE("round_trip_map_lookahead", "[vpr]") {
-    constexpr std::array<size_t, 5> kDim({1, 10, 12, 15, 16});
+    constexpr size_t num_layers = 1;
+    constexpr std::array<size_t, 6> kDim({num_layers, 10, 12, num_layers, 15, 16});
 
     f_wire_cost_map.resize(kDim);
-    for (size_t layer = 0; layer < kDim[0]; layer++) {
+    for (size_t from_layer = 0; from_layer < kDim[0]; from_layer++) {
         for (size_t x = 0; x < kDim[1]; ++x) {
             for (size_t y = 0; y < kDim[2]; ++y) {
-                for (size_t z = 0; z < kDim[3]; ++z) {
-                    for (size_t w = 0; w < kDim[4]; ++w) {
-                        f_wire_cost_map[layer][x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1);
-                        f_wire_cost_map[layer][x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1);
+                for (size_t to_layer = 0; to_layer < kDim[3]; to_layer++) {
+                    for (size_t z = 0; z < kDim[4]; ++z) {
+                        for (size_t w = 0; w < kDim[5]; ++w) {
+                            f_wire_cost_map[from_layer][x][y][to_layer][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1);
+                            f_wire_cost_map[from_layer][x][y][to_layer][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1);
+                        }
                     }
                 }
             }
@@ -28,20 +31,22 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") {
 
     write_router_lookahead(kMapLookaheadBin);
 
-    for (size_t layer = 0; layer < kDim[0]; layer++) {
+    for (size_t from_layer = 0; from_layer < kDim[0]; from_layer++) {
         for (size_t x = 0; x < kDim[1]; ++x) {
             for (size_t y = 0; y < kDim[2]; ++y) {
-                for (size_t z = 0; z < kDim[3]; ++z) {
-                    for (size_t w = 0; w < kDim[4]; ++w) {
-                        f_wire_cost_map[layer][x][y][z][w].delay = 0.f;
-                        f_wire_cost_map[layer][x][y][z][w].congestion = 0.f;
+                for (size_t to_layer = 0; to_layer < kDim[3]; to_layer++) {
+                    for (size_t z = 0; z < kDim[4]; ++z) {
+                        for (size_t w = 0; w < kDim[5]; ++w) {
+                            f_wire_cost_map[from_layer][x][y][to_layer][z][w].delay = 0.f;
+                            f_wire_cost_map[from_layer][x][y][to_layer][z][w].congestion = 0.f;
+                        }
                     }
                 }
             }
         }
     }
 
-    f_wire_cost_map.resize({0, 0, 0, 0, 0});
+    f_wire_cost_map.resize({0, 0, 0, 0, 0, 0});
 
     read_router_lookahead(kMapLookaheadBin);
 
@@ -49,13 +54,15 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") {
         REQUIRE(f_wire_cost_map.dim_size(i) == kDim[i]);
     }
 
-    for (size_t layer = 0; layer < kDim[0]; layer++) {
+    for (size_t from_layer = 0; from_layer < kDim[0]; from_layer++) {
         for (size_t x = 0; x < kDim[1]; ++x) {
             for (size_t y = 0; y < kDim[2]; ++y) {
-                for (size_t z = 0; z < kDim[3]; ++z) {
-                    for (size_t w = 0; w < kDim[4]; ++w) {
-                        REQUIRE(f_wire_cost_map[layer][x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1));
-                        REQUIRE(f_wire_cost_map[layer][x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1));
+                for (size_t to_layer = 0; to_layer < kDim[3]; to_layer++) {
+                    for (size_t z = 0; z < kDim[4]; ++z) {
+                        for (size_t w = 0; w < kDim[5]; ++w) {
+                            REQUIRE(f_wire_cost_map[from_layer][x][y][to_layer][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1));
+                            REQUIRE(f_wire_cost_map[from_layer][x][y][to_layer][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1));
+                        }
                     }
                 }
             }