diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h
index b4a7b7c031e..2077f2b9419 100644
--- a/libs/libarchfpga/src/device_grid.h
+++ b/libs/libarchfpga/src/device_grid.h
@@ -50,7 +50,7 @@ class DeviceGrid {
      * @brief Return the number of instances of the specified tile type on the specified layer. If the layer_num is -1, return the total number of instances of the specified tile type on all layers.
      * @note This function should be used if count_instances() is called in the constructor.
      */
-    size_t num_instances(t_physical_tile_type_ptr type, int layer_num = 0) const;
+    size_t num_instances(t_physical_tile_type_ptr type, int layer_num) const;
 
     /**
      * @brief Returns the block types which limits the device size (may be empty if
@@ -59,23 +59,23 @@ class DeviceGrid {
     std::vector<t_logical_block_type_ptr> limiting_resources() const { return limiting_resources_; }
 
     ///@brief Return the t_physical_tile_type_ptr at the specified location
-    inline t_physical_tile_type_ptr get_physical_type(size_t x, size_t y, int layer_num = 0) const {
-        return grid_[layer_num][x][y].type;
+    inline t_physical_tile_type_ptr get_physical_type(const t_physical_tile_loc& tile_loc) const {
+        return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].type;
     }
 
     ///@brief Return the width offset of the tile at the specified location. The root location of the tile is where width_offset and height_offset are 0.
-    inline int get_width_offset(size_t x, size_t y, int layer_num = 0) const {
-        return grid_[layer_num][x][y].width_offset;
+    inline int get_width_offset(const t_physical_tile_loc& tile_loc) const {
+        return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].width_offset;
     }
 
     ///@brief Return the height offset of the tile at the specified location. The root location of the tile is where width_offset and height_offset are 0
-    inline int get_height_offset(size_t x, size_t y, int layer_num = 0) const {
-        return grid_[layer_num][x][y].height_offset;
+    inline int get_height_offset(const t_physical_tile_loc& tile_loc) const {
+        return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].height_offset;
     }
 
     ///@brief Return the metadata of the tile at the specified location
-    inline const t_metadata_dict* get_metadata(size_t x, size_t y, int layer_num = 0) const {
-        return grid_[layer_num][x][y].meta;
+    inline const t_metadata_dict* get_metadata(const t_physical_tile_loc& tile_loc) const {
+        return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].meta;
     }
 
     ///@brief Given t_grid_tile, return the x coordinate of the tile on the given layer - Used by serializer functions
@@ -94,6 +94,12 @@ class DeviceGrid {
         return diff % grid_.dim_size(2);
     }
 
+    ///@brief Given t_grid_tile, return the layer number of the tile - Used by serializer functions
+    inline int get_grid_loc_layer(const t_grid_tile*& grid_loc) const {
+        int layer_num = std::floor(static_cast<int>(grid_loc - &grid_.get(0)) / (width() * height()));
+        return layer_num;
+    }
+
     ///@brief Return the nth t_grid_tile on the given layer of the flattened grid - Used by serializer functions
     inline const t_grid_tile* get_grid_locs_grid_loc(int n) const {
         return &grid_.get(n);
diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index a4699e2ccd8..dde02162379 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -814,6 +814,31 @@ struct t_physical_pin {
     }
 };
 
+/**
+ * @brief Describes The location of a physical tile
+ * @param layer_num The die number of the physical tile. If the FPGA only has one die, or the physical tile is located
+ *                  on the base die, layer_num is equal to zero. If it is one the die above base die, it is one, etc.
+ * @param x The x location of the physical tile on the given die
+ * @param y The y location of the physical tile on the given die
+ */
+struct t_physical_tile_loc {
+    int x = OPEN;
+    int y = OPEN;
+    int layer_num = OPEN;
+
+    t_physical_tile_loc() = default;
+
+    t_physical_tile_loc(int x_val, int y_val, int layer_num_val)
+        : x(x_val)
+        , y(y_val)
+        , layer_num(layer_num_val) {}
+
+    // Returns true if this type location layer_num/x/y is not equal to OPEN
+    operator bool() const {
+        return !(x == OPEN || y == OPEN || layer_num == OPEN);
+    }
+};
+
 /** Describes I/O and clock ports of a physical tile type
  *
  *  It corresponds to <port/> tags in the FPGA architecture description
diff --git a/libs/librrgraph/CMakeLists.txt b/libs/librrgraph/CMakeLists.txt
index 37a4a275cc2..372e8a6f33c 100644
--- a/libs/librrgraph/CMakeLists.txt
+++ b/libs/librrgraph/CMakeLists.txt
@@ -48,14 +48,14 @@ add_custom_target(
     COMMAND ${CMAKE_COMMAND} -E make_directory rr_graph_generate
     COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate git clone https://github.com/duck2/uxsdcxx
     COMMAND python3 -mpip install --user -r rr_graph_generate/uxsdcxx/requirements.txt
-    COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcxx.py ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd
-    COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcap.py ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd
+    COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcxx.py ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd
+    COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcap.py ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd
     COMMAND ${CMAKE_COMMAND} -E copy
         rr_graph_generate/rr_graph_uxsdcxx.h
         rr_graph_generate/rr_graph_uxsdcxx_capnp.h
         rr_graph_generate/rr_graph_uxsdcxx_interface.h
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/base/gen
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/io/gen
     COMMAND ${CMAKE_COMMAND} -E copy rr_graph_generate/rr_graph_uxsdcxx.capnp ${CMAKE_CURRENT_SOURCE_DIR}/../libvtrcapnproto/gen
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
\ No newline at end of file
diff --git a/libs/librrgraph/src/base/check_rr_graph.cpp b/libs/librrgraph/src/base/check_rr_graph.cpp
index 524f6a56b93..8ed76ed2f58 100644
--- a/libs/librrgraph/src/base/check_rr_graph.cpp
+++ b/libs/librrgraph/src/base/check_rr_graph.cpp
@@ -234,9 +234,11 @@ void check_rr_graph(const RRGraphView& rr_graph,
         size_t inode = (size_t)rr_node;
         t_rr_type rr_type = rr_graph.node_type(rr_node);
         int ptc_num = rr_graph.node_ptc_num(rr_node);
+        int layer_num = rr_graph.node_layer(rr_node);
         int xlow = rr_graph.node_xlow(rr_node);
         int ylow = rr_graph.node_ylow(rr_node);
-        t_physical_tile_type_ptr type = grid.get_physical_type(xlow, ylow);
+
+        t_physical_tile_type_ptr type = grid.get_physical_type({xlow, ylow, layer_num});
 
         if (rr_type == IPIN || rr_type == OPIN) {
             // #TODO: No edges are added for internal pins. However, they need to be checked somehow!
@@ -273,7 +275,9 @@ void check_rr_graph(const RRGraphView& rr_graph,
                 if (!is_chain && !is_fringe && !is_wire) {
                     if (rr_graph.node_type(rr_node) == IPIN || rr_graph.node_type(rr_node) == OPIN) {
                         if (has_adjacent_channel(rr_graph, grid, node)) {
-                            auto block_type = grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node));
+                            auto block_type = grid.get_physical_type({rr_graph.node_xlow(rr_node),
+                                                                      rr_graph.node_ylow(rr_node),
+                                                                      rr_graph.node_layer(rr_node)});
                             std::string pin_name = block_type_pin_index_to_name(block_type, rr_graph.node_pin_num(rr_node), is_flat);
                             /* Print error messages for all the sides that a node may appear */
                             for (const e_side& node_side : SIDES) {
@@ -312,7 +316,9 @@ static bool rr_node_is_global_clb_ipin(const RRGraphView& rr_graph, const Device
     int ipin;
     t_physical_tile_type_ptr type;
 
-    type = grid.get_physical_type(rr_graph.node_xlow(inode), rr_graph.node_ylow(inode));
+    type = grid.get_physical_type({rr_graph.node_xlow(inode),
+                                   rr_graph.node_ylow(inode),
+                                   rr_graph.node_layer(inode)});
 
     if (rr_graph.node_type(inode) != IPIN)
         return (false);
@@ -335,7 +341,7 @@ void check_rr_node(const RRGraphView& rr_graph,
 
     //Make sure over-flow doesn't happen
     VTR_ASSERT(inode >= 0);
-    int xlow, ylow, xhigh, yhigh, ptc_num, capacity;
+    int xlow, ylow, xhigh, yhigh, layer_num, ptc_num, capacity;
     t_rr_type rr_type;
     t_physical_tile_type_ptr type;
     int nodes_per_chan, tracks_per_node;
@@ -348,6 +354,7 @@ void check_rr_node(const RRGraphView& rr_graph,
     xhigh = rr_graph.node_xhigh(rr_node);
     ylow = rr_graph.node_ylow(rr_node);
     yhigh = rr_graph.node_yhigh(rr_node);
+    layer_num = rr_graph.node_layer(rr_node);
     ptc_num = rr_graph.node_ptc_num(rr_node);
     capacity = rr_graph.node_capacity(rr_node);
     cost_index = rr_graph.node_cost_index(rr_node);
@@ -363,6 +370,11 @@ void check_rr_node(const RRGraphView& rr_graph,
                         "in check_rr_node: rr endpoints (%d,%d) and (%d,%d) are out of range.\n", xlow, ylow, xhigh, yhigh);
     }
 
+    if (layer_num < 0 || layer_num > int(grid.get_num_layers()) - 1) {
+        VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
+                        "in check_rr_node: rr endpoints layer_num (%d) is out of range.\n", layer_num);
+    }
+
     if (ptc_num < 0) {
         VPR_ERROR(VPR_ERROR_ROUTE,
                   "in check_rr_node: inode %d (type %d) had a ptc_num of %d.\n", inode, rr_type, ptc_num);
@@ -374,7 +386,7 @@ void check_rr_node(const RRGraphView& rr_graph,
     }
 
     /* Check that the segment is within the array and such. */
-    type = grid.get_physical_type(xlow, ylow);
+    type = grid.get_physical_type({xlow, ylow, layer_num});
 
     switch (rr_type) {
         case SOURCE:
diff --git a/libs/librrgraph/src/base/rr_graph_builder.cpp b/libs/librrgraph/src/base/rr_graph_builder.cpp
index 535e027ca9f..072b47804ab 100644
--- a/libs/librrgraph/src/base/rr_graph_builder.cpp
+++ b/libs/librrgraph/src/base/rr_graph_builder.cpp
@@ -28,26 +28,27 @@ MetadataStorage<std::tuple<int, int, short>>& RRGraphBuilder::rr_edge_metadata()
 void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) {
     t_rr_type node_type = node_storage_.node_type(node);
     short node_ptc_num = node_storage_.node_ptc_num(node);
+    short node_layer = node_storage_.node_layer(node);
     for (int ix = node_storage_.node_xlow(node); ix <= node_storage_.node_xhigh(node); ix++) {
         for (int iy = node_storage_.node_ylow(node); iy <= node_storage_.node_yhigh(node); iy++) {
             switch (node_type) {
                 case SOURCE:
                 case SINK:
                 case CHANY:
-                    node_lookup_.add_node(node, ix, iy, node_type, node_ptc_num, SIDES[0]);
+                    node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, SIDES[0]);
                     break;
                 case CHANX:
                     /* Currently need to swap x and y for CHANX because of chan, seg convention 
                      * TODO: Once the builders is reworked for use consistent (x, y) convention,
                      * the following swapping can be removed
                      */
-                    node_lookup_.add_node(node, iy, ix, node_type, node_ptc_num, SIDES[0]);
+                    node_lookup_.add_node(node,node_layer, iy, ix, node_type, node_ptc_num, SIDES[0]);
                     break;
                 case OPIN:
                 case IPIN:
                     for (const e_side& side : SIDES) {
                         if (node_storage_.is_node_on_specific_side(node, side)) {
-                            node_lookup_.add_node(node, ix, iy, node_type, node_ptc_num, side);
+                            node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, side);
                         }
                     }
                     break;
diff --git a/libs/librrgraph/src/base/rr_graph_builder.h b/libs/librrgraph/src/base/rr_graph_builder.h
index f1777355f07..5c00e1d2c18 100644
--- a/libs/librrgraph/src/base/rr_graph_builder.h
+++ b/libs/librrgraph/src/base/rr_graph_builder.h
@@ -165,6 +165,11 @@ class RRGraphBuilder {
         node_storage_.set_node_coordinates(id, x1, y1, x2, y2);
     }
 
+    /** @brief Set the node layer (specifies which die the node is located at) */
+    inline void set_node_layer(RRNodeId id, short layer){
+        node_storage_.set_node_layer(id,layer);
+    }
+
     /** @brief The ptc_num carries different meanings for different node types
      * (true in VPR RRG that is currently supported, may not be true in customized RRG)
      * CHANX or CHANY: the track id in routing channels
@@ -179,6 +184,11 @@ class RRGraphBuilder {
         node_storage_.set_node_ptc_num(id, new_ptc_num);
     }
 
+    /** @brief set the layer number at which RRNodeId is located at */
+    inline void set_node_layer(RRNodeId id, int layer){
+        node_storage_.set_node_layer(id, layer);
+    }
+
     /** @brief set_node_pin_num() is designed for logic blocks, which are IPIN and OPIN nodes */
     inline void set_node_pin_num(RRNodeId id, int new_pin_num) {
         node_storage_.set_node_pin_num(id, new_pin_num);
diff --git a/libs/librrgraph/src/base/rr_graph_storage.cpp b/libs/librrgraph/src/base/rr_graph_storage.cpp
index 94ca29b7636..9934752dce0 100644
--- a/libs/librrgraph/src/base/rr_graph_storage.cpp
+++ b/libs/librrgraph/src/base/rr_graph_storage.cpp
@@ -624,6 +624,10 @@ const char* t_rr_graph_storage::node_side_string(RRNodeId id) const {
     return SIDE_STRING[NUM_SIDES];
 }
 
+void t_rr_graph_storage::set_node_layer(RRNodeId id, short layer) {
+    node_layer_[id] = layer;
+}
+
 void t_rr_graph_storage::set_node_ptc_num(RRNodeId id, int new_ptc_num) {
     node_ptc_[id].ptc_.pin_num = new_ptc_num; //TODO: eventually remove
 }
@@ -777,6 +781,7 @@ int t_rr_graph_view::node_class_num(RRNodeId id) const {
     return get_node_class_num(node_storage_, node_ptc_, id);
 }
 
+
 t_rr_graph_view t_rr_graph_storage::view() const {
     VTR_ASSERT(partitioned_);
     VTR_ASSERT(node_storage_.size() == node_fan_in_.size());
@@ -785,6 +790,7 @@ t_rr_graph_view t_rr_graph_storage::view() const {
         vtr::make_const_array_view_id(node_ptc_),
         vtr::make_const_array_view_id(node_first_edge_),
         vtr::make_const_array_view_id(node_fan_in_),
+        vtr::make_const_array_view_id(node_layer_),
         vtr::make_const_array_view_id(edge_src_node_),
         vtr::make_const_array_view_id(edge_dest_node_),
         vtr::make_const_array_view_id(edge_switch_));
diff --git a/libs/librrgraph/src/base/rr_graph_storage.h b/libs/librrgraph/src/base/rr_graph_storage.h
index 6d150c02641..b6c85caa22e 100644
--- a/libs/librrgraph/src/base/rr_graph_storage.h
+++ b/libs/librrgraph/src/base/rr_graph_storage.h
@@ -77,6 +77,7 @@ struct alignas(16) t_rr_node_data {
     } dir_side_;
 
     uint16_t capacity_ = 0;
+
 };
 
 // t_rr_node_data is a key data structure, so fail at compile time if the
@@ -226,6 +227,14 @@ class t_rr_graph_storage {
         return node_fan_in_[id];
     }
 
+    /* Find the layer number that RRNodeId is located at.
+     * it is zero if the FPGA only has one die.
+     * The layer number start from the base die (base die: 0, the die above it: 1, etc.)
+     * */
+    short node_layer(RRNodeId id) const{
+        return node_layer_[id];
+    }
+
     // This prefetechs hot RR node data required for optimization.
     //
     // Note: This is optional, but may lower time spent on memory stalls in
@@ -393,6 +402,7 @@ class t_rr_graph_storage {
         make_room_in_vector(&node_storage_, size_t(elem_position));
         node_ptc_.reserve(node_storage_.capacity());
         node_ptc_.resize(node_storage_.size());
+        node_layer_.resize(node_storage_.size());
     }
 
     // Reserve storage for RR nodes.
@@ -401,6 +411,7 @@ class t_rr_graph_storage {
         VTR_ASSERT(!edges_read_);
         node_storage_.reserve(size);
         node_ptc_.reserve(size);
+        node_layer_.reserve(size);
     }
 
     // Resize node storage to accomidate size RR nodes.
@@ -409,6 +420,7 @@ class t_rr_graph_storage {
         VTR_ASSERT(!edges_read_);
         node_storage_.resize(size);
         node_ptc_.resize(size);
+        node_layer_.resize(size);
     }
 
     // Number of RR nodes that can be accessed.
@@ -429,6 +441,7 @@ class t_rr_graph_storage {
         node_ptc_.clear();
         node_first_edge_.clear();
         node_fan_in_.clear();
+        node_layer_.clear();
         seen_edge_.clear();
         edge_src_node_.clear();
         edge_dest_node_.clear();
@@ -448,6 +461,7 @@ class t_rr_graph_storage {
         node_ptc_.shrink_to_fit();
         node_first_edge_.shrink_to_fit();
         node_fan_in_.shrink_to_fit();
+        node_layer_.shrink_to_fit();
         seen_edge_.shrink_to_fit();
         edge_src_node_.shrink_to_fit();
         edge_dest_node_.shrink_to_fit();
@@ -461,6 +475,7 @@ class t_rr_graph_storage {
         VTR_ASSERT(!edges_read_);
         node_storage_.emplace_back();
         node_ptc_.emplace_back();
+        node_layer_.emplace_back();
     }
 
     // Given `order`, a vector mapping each RRNodeId to a new one (old -> new),
@@ -479,6 +494,7 @@ class t_rr_graph_storage {
 
     void set_node_type(RRNodeId id, t_rr_type new_type);
     void set_node_coordinates(RRNodeId id, short x1, short y1, short x2, short y2);
+    void set_node_layer(RRNodeId id, short layer);
     void set_node_cost_index(RRNodeId, RRIndexedDataId new_cost_index);
     void set_node_rc_index(RRNodeId, NodeRCIndex new_rc_index);
     void set_node_capacity(RRNodeId, short new_capacity);
@@ -670,6 +686,12 @@ class t_rr_graph_storage {
     // Fan in counts for each RR node.
     vtr::vector<RRNodeId, t_edge_size> node_fan_in_;
 
+    // Layer number that each RR node is located at
+    // Layer number refers to the die that the node belongs to. The layer number of base die is zero and die above it one, etc.
+    // This data is also considered as a hot data since it is used in inner loop of router, but since it didn't fit nicely into t_rr_node_data due to alignment issues, we had to store it
+    // in a separate vector.
+    vtr::vector<RRNodeId, short> node_layer_;
+
     // Edge storage.
     vtr::vector<RREdgeId, RRNodeId> edge_src_node_;
     vtr::vector<RREdgeId, RRNodeId> edge_dest_node_;
@@ -721,6 +743,7 @@ class t_rr_graph_view {
         const vtr::array_view_id<RRNodeId, const t_rr_node_ptc_data> node_ptc,
         const vtr::array_view_id<RRNodeId, const RREdgeId> node_first_edge,
         const vtr::array_view_id<RRNodeId, const t_edge_size> node_fan_in,
+        const vtr::array_view_id<RRNodeId, const short> node_layer,
         const vtr::array_view_id<RREdgeId, const RRNodeId> edge_src_node,
         const vtr::array_view_id<RREdgeId, const RRNodeId> edge_dest_node,
         const vtr::array_view_id<RREdgeId, const short> edge_switch)
@@ -728,6 +751,7 @@ class t_rr_graph_view {
         , node_ptc_(node_ptc)
         , node_first_edge_(node_first_edge)
         , node_fan_in_(node_fan_in)
+        , node_layer_(node_layer)
         , edge_src_node_(edge_src_node)
         , edge_dest_node_(edge_dest_node)
         , edge_switch_(edge_switch) {}
@@ -784,6 +808,11 @@ class t_rr_graph_view {
         return node_fan_in_[id];
     }
 
+    /* Retrieve layer(die) number that RRNodeId is located at */
+    short node_layer(RRNodeId id) const{
+        return node_layer_[id];
+    }
+
     // This prefetechs hot RR node data required for optimization.
     //
     // Note: This is optional, but may lower time spent on memory stalls in
@@ -824,6 +853,7 @@ class t_rr_graph_view {
     vtr::array_view_id<RRNodeId, const t_rr_node_ptc_data> node_ptc_;
     vtr::array_view_id<RRNodeId, const RREdgeId> node_first_edge_;
     vtr::array_view_id<RRNodeId, const t_edge_size> node_fan_in_;
+    vtr::array_view_id<RRNodeId, const short> node_layer_;
     vtr::array_view_id<RREdgeId, const RRNodeId> edge_src_node_;
     vtr::array_view_id<RREdgeId, const RRNodeId> edge_dest_node_;
     vtr::array_view_id<RREdgeId, const short> edge_switch_;
diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h
index cdae9ebe5de..3d808b23c71 100644
--- a/libs/librrgraph/src/base/rr_graph_view.h
+++ b/libs/librrgraph/src/base/rr_graph_view.h
@@ -160,6 +160,11 @@ class RRGraphView {
         return node_storage_.node_yhigh(node);
     }
 
+    /** @brief Get the layer num of a routing resource node. This function is inlined for runtime optimization. */
+    inline short node_layer(RRNodeId node) const {
+        return node_storage_.node_layer(node);
+    }
+
     /** @brief Get the first out coming edge of resource node. This function is inlined for runtime optimization. */
     inline RREdgeId node_first_edge(RRNodeId node) const {
         return node_storage_.first_edge(node);
diff --git a/libs/librrgraph/src/base/rr_node_types.h b/libs/librrgraph/src/base/rr_node_types.h
index ed0da0b37fe..56c2b97c3e6 100644
--- a/libs/librrgraph/src/base/rr_node_types.h
+++ b/libs/librrgraph/src/base/rr_node_types.h
@@ -112,8 +112,8 @@ struct t_rr_rc_data {
     float C;
 };
 
-// This is the data type of fast lookups of an rr-node given an (rr_type, x, y, and the side)
-//[0..num_rr_types-1][0..grid_width-1][0..grid_height-1][0..NUM_SIDES-1][0..max_ptc-1]
-typedef std::array<vtr::NdMatrix<std::vector<int>, 3>, NUM_RR_TYPES> t_rr_node_indices;
+// This is the data type of fast lookups of an rr-node given an (rr_type, layer, x, y, and the side)
+//[0..num_rr_types-1][0..num_layer-1][0..grid_width-1][0..grid_height-1][0..NUM_SIDES-1][0..max_ptc-1]
+typedef std::array<vtr::NdMatrix<std::vector<int>, 4>, NUM_RR_TYPES> t_rr_node_indices;
 
 #endif
diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp
index 6f6bae475d3..5b76b3418af 100644
--- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp
+++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp
@@ -4,7 +4,8 @@
 RRSpatialLookup::RRSpatialLookup() {
 }
 
-RRNodeId RRSpatialLookup::find_node(int x,
+RRNodeId RRSpatialLookup::find_node(int layer,
+                                    int x,
                                     int y,
                                     t_rr_type type,
                                     int ptc,
@@ -27,8 +28,8 @@ RRNodeId RRSpatialLookup::find_node(int x,
         node_side = SIDES[0];
     }
 
-    /* Pre-check: the x, y, side and ptc should be non negative numbers! Otherwise, return an invalid id */
-    if ((x < 0) || (y < 0) || (node_side == NUM_SIDES) || (ptc < 0)) {
+    /* Pre-check: the layer, x, y, side and ptc should be non-negative numbers! Otherwise, return an invalid id */
+    if ((layer < 0) || (x < 0) || (y < 0) || (node_side == NUM_SIDES) || (ptc < 0)) {
         return RRNodeId::INVALID();
     }
 
@@ -44,9 +45,9 @@ RRNodeId RRSpatialLookup::find_node(int x,
         std::swap(node_x, node_y);
     }
 
-    VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims());
+    VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims());
 
-    /* Sanity check to ensure the x, y, side and ptc are in range 
+    /* Sanity check to ensure the layer, x, y, side and ptc are in range
      * - Return an valid id by searching in look-up when all the parameters are in range
      * - Return an invalid id if any out-of-range is detected
      */
@@ -54,26 +55,31 @@ RRNodeId RRSpatialLookup::find_node(int x,
         return RRNodeId::INVALID();
     }
 
-    if (node_x >= rr_node_indices_[type].dim_size(0)) {
+    if (size_t(layer) >= rr_node_indices_[type].dim_size(0)) {
         return RRNodeId::INVALID();
     }
 
-    if (node_y >= rr_node_indices_[type].dim_size(1)) {
+    if (node_x >= rr_node_indices_[type].dim_size(1)) {
         return RRNodeId::INVALID();
     }
 
-    if (node_side >= rr_node_indices_[type].dim_size(2)) {
+    if(node_y >= rr_node_indices_[type].dim_size(2)){
         return RRNodeId::INVALID();
     }
 
-    if (size_t(ptc) >= rr_node_indices_[type][node_x][node_y][node_side].size()) {
+    if (node_side >= rr_node_indices_[type].dim_size(3)) {
         return RRNodeId::INVALID();
     }
 
-    return RRNodeId(rr_node_indices_[type][node_x][node_y][node_side][ptc]);
+    if (size_t(ptc) >= rr_node_indices_[type][layer][node_x][node_y][node_side].size()) {
+        return RRNodeId::INVALID();
+    }
+
+    return RRNodeId(rr_node_indices_[type][layer][node_x][node_y][node_side][ptc]);
 }
 
-std::vector<RRNodeId> RRSpatialLookup::find_nodes(int x,
+std::vector<RRNodeId> RRSpatialLookup::find_nodes(int layer,
+                                                  int x,
                                                   int y,
                                                   t_rr_type type,
                                                   e_side side) const {
@@ -82,8 +88,8 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int x,
      */
     std::vector<RRNodeId> nodes;
 
-    /* Pre-check: the x, y, type are valid! Otherwise, return an empty vector */
-    if (x < 0 || y < 0) {
+    /* Pre-check: the layer, x, y are valid! Otherwise, return an empty vector */
+    if (layer < 0 || x < 0 || y < 0) {
         return nodes;
     }
 
@@ -99,7 +105,7 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int x,
         std::swap(node_x, node_y);
     }
 
-    VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims());
+    VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims());
 
     /* Sanity check to ensure the x, y, side are in range 
      * - Return a list of valid ids by searching in look-up when all the parameters are in range
@@ -109,28 +115,32 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int x,
         return nodes;
     }
 
-    if (node_x >= rr_node_indices_[type].dim_size(0)) {
+    if (size_t(layer) >= rr_node_indices_[type].dim_size(0)) {
         return nodes;
     }
 
-    if (node_y >= rr_node_indices_[type].dim_size(1)) {
+    if (node_x >= rr_node_indices_[type].dim_size(1)) {
         return nodes;
     }
 
-    if (side >= rr_node_indices_[type].dim_size(2)) {
+    if(node_y >= rr_node_indices_[type].dim_size(2)){
+        return nodes;
+    }
+
+    if (side >= rr_node_indices_[type].dim_size(3)) {
         return nodes;
     }
 
     /* Reserve space to avoid memory fragmentation */
     size_t num_nodes = 0;
-    for (const auto& node : rr_node_indices_[type][node_x][node_y][side]) {
+    for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) {
         if (RRNodeId(node)) {
             num_nodes++;
         }
     }
 
     nodes.reserve(num_nodes);
-    for (const auto& node : rr_node_indices_[type][node_x][node_y][side]) {
+    for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) {
         if (RRNodeId(node)) {
             nodes.push_back(RRNodeId(node));
         }
@@ -139,7 +149,8 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes(int x,
     return nodes;
 }
 
-std::vector<RRNodeId> RRSpatialLookup::find_channel_nodes(int x,
+std::vector<RRNodeId> RRSpatialLookup::find_channel_nodes(int layer,
+                                                          int x,
                                                           int y,
                                                           t_rr_type type) const {
     /* Pre-check: node type should be routing tracks! */
@@ -147,10 +158,11 @@ std::vector<RRNodeId> RRSpatialLookup::find_channel_nodes(int x,
         return std::vector<RRNodeId>();
     }
 
-    return find_nodes(x, y, type);
+    return find_nodes(layer, x, y, type);
 }
 
-std::vector<RRNodeId> RRSpatialLookup::find_nodes_at_all_sides(int x,
+std::vector<RRNodeId> RRSpatialLookup::find_nodes_at_all_sides(int layer,
+                                                               int x,
                                                                int y,
                                                                t_rr_type rr_type,
                                                                int ptc) const {
@@ -159,17 +171,17 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes_at_all_sides(int x,
     /* TODO: Consider to access the raw data like find_node() rather than calling find_node() many times, which hurts runtime */
     if (rr_type == IPIN || rr_type == OPIN) {
         indices.reserve(NUM_SIDES);
-        //For pins we need to look at all the sides of the current grid tile
+        //For pins, we need to look at all the sides of the current grid tile
         for (e_side side : SIDES) {
-            RRNodeId rr_node_index = find_node(x, y, rr_type, ptc, side);
+            RRNodeId rr_node_index = find_node(layer, x, y, rr_type, ptc, side);
             if (rr_node_index) {
                 indices.push_back(rr_node_index);
             }
         }
         indices.shrink_to_fit();
     } else {
-        //Sides do not effect non-pins so there should only be one per ptc
-        RRNodeId rr_node_index = find_node(x, y, rr_type, ptc);
+        //Sides do not affect non-pins so there should only be one per ptc
+        RRNodeId rr_node_index = find_node(layer, x, y, rr_type, ptc);
         if (rr_node_index) {
             indices.push_back(rr_node_index);
         }
@@ -178,81 +190,86 @@ std::vector<RRNodeId> RRSpatialLookup::find_nodes_at_all_sides(int x,
     return indices;
 }
 
-std::vector<RRNodeId> RRSpatialLookup::find_grid_nodes_at_all_sides(int x,
+std::vector<RRNodeId> RRSpatialLookup::find_grid_nodes_at_all_sides(int layer,
+                                                                    int x,
                                                                     int y,
                                                                     t_rr_type rr_type) const {
     VTR_ASSERT(rr_type == SOURCE || rr_type == OPIN || rr_type == IPIN || rr_type == SINK);
     if (rr_type == SOURCE || rr_type == SINK) {
-        return find_nodes(x, y, rr_type);
+        return find_nodes(layer,x, y, rr_type);
     }
 
     std::vector<RRNodeId> nodes;
     /* Reserve space to avoid memory fragmentation */
     size_t num_nodes = 0;
     for (e_side node_side : SIDES) {
-        num_nodes += find_nodes(x, y, rr_type, node_side).size();
+        num_nodes += find_nodes(layer,x, y, rr_type, node_side).size();
     }
 
     nodes.reserve(num_nodes);
     for (e_side node_side : SIDES) {
-        std::vector<RRNodeId> temp_nodes = find_nodes(x, y, rr_type, node_side);
+        std::vector<RRNodeId> temp_nodes = find_nodes(layer,x, y, rr_type, node_side);
         nodes.insert(nodes.end(), temp_nodes.begin(), temp_nodes.end());
     }
     return nodes;
 }
 
-void RRSpatialLookup::reserve_nodes(int x,
+void RRSpatialLookup::reserve_nodes(int layer,
+                                    int x,
                                     int y,
                                     t_rr_type type,
                                     int num_nodes,
                                     e_side side) {
-    VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims());
+    VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims());
 
     /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */
     if (type != IPIN && type != OPIN) {
         VTR_ASSERT(side == SIDES[0]);
     }
 
-    resize_nodes(x, y, type, side);
+    resize_nodes(layer, x, y, type, side);
 
-    rr_node_indices_[type][x][y][side].reserve(num_nodes);
+    rr_node_indices_[type][layer][x][y][side].reserve(num_nodes);
 }
 
 void RRSpatialLookup::add_node(RRNodeId node,
+                               int layer,
                                int x,
                                int y,
                                t_rr_type type,
                                int ptc,
                                e_side side) {
     VTR_ASSERT(node); /* Must have a valid node id to be added */
-    VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims());
+    VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims());
 
     /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */
     if (type != IPIN && type != OPIN) {
         VTR_ASSERT(side == SIDES[0]);
     }
 
-    resize_nodes(x, y, type, side);
+    resize_nodes(layer, x, y, type, side);
 
-    if (size_t(ptc) >= rr_node_indices_[type][x][y][side].size()) {
+    if (size_t(ptc) >= rr_node_indices_[type][layer][x][y][side].size()) {
         /* Deposit invalid ids to newly allocated elements while original elements are untouched */
-        rr_node_indices_[type][x][y][side].resize(ptc + 1, int(size_t(RRNodeId::INVALID())));
+        rr_node_indices_[type][layer][x][y][side].resize(ptc + 1, int(size_t(RRNodeId::INVALID())));
     }
 
     /* Resize on demand finished; Register the node */
-    rr_node_indices_[type][x][y][side][ptc] = int(size_t(node));
+    rr_node_indices_[type][layer][x][y][side][ptc] = int(size_t(node));
 }
 
-void RRSpatialLookup::mirror_nodes(const vtr::Point<int>& src_coord,
+void RRSpatialLookup::mirror_nodes(const int layer,
+                                   const vtr::Point<int>& src_coord,
                                    const vtr::Point<int>& des_coord,
                                    t_rr_type type,
                                    e_side side) {
     VTR_ASSERT(SOURCE == type || SINK == type);
-    resize_nodes(des_coord.x(), des_coord.y(), type, side);
-    rr_node_indices_[type][des_coord.x()][des_coord.y()][side] = rr_node_indices_[type][src_coord.x()][src_coord.y()][side];
+    resize_nodes(layer, des_coord.x(), des_coord.y(), type, side);
+    rr_node_indices_[type][layer][des_coord.x()][des_coord.y()][side] = rr_node_indices_[type][layer][src_coord.x()][src_coord.y()][side];
 }
 
-void RRSpatialLookup::resize_nodes(int x,
+void RRSpatialLookup::resize_nodes(int layer,
+                                   int x,
                                    int y,
                                    t_rr_type type,
                                    e_side side) {
@@ -263,25 +280,30 @@ void RRSpatialLookup::resize_nodes(int x,
     VTR_ASSERT(type < rr_node_indices_.size());
     VTR_ASSERT(x >= 0);
     VTR_ASSERT(y >= 0);
-
-    if ((x >= int(rr_node_indices_[type].dim_size(0)))
-        || (y >= int(rr_node_indices_[type].dim_size(1)))
-        || (size_t(side) >= rr_node_indices_[type].dim_size(2))) {
-        rr_node_indices_[type].resize({std::max(rr_node_indices_[type].dim_size(0), size_t(x) + 1),
-                                       std::max(rr_node_indices_[type].dim_size(1), size_t(y) + 1),
-                                       std::max(rr_node_indices_[type].dim_size(2), size_t(side) + 1)});
+    VTR_ASSERT(layer >= 0);
+
+    if ((layer >= int(rr_node_indices_[type].dim_size(0)))
+        || (x >= int(rr_node_indices_[type].dim_size(1)))
+        || (y >= int(rr_node_indices_[type].dim_size(2)))
+        || (size_t(side) >= rr_node_indices_[type].dim_size(3))) {
+        rr_node_indices_[type].resize({std::max(rr_node_indices_[type].dim_size(0),size_t(layer)+1),
+                                       std::max(rr_node_indices_[type].dim_size(1), size_t(x) + 1),
+                                       std::max(rr_node_indices_[type].dim_size(2), size_t(y) + 1),
+                                       std::max(rr_node_indices_[type].dim_size(3), size_t(side) + 1)});
     }
 }
 
 void RRSpatialLookup::reorder(const vtr::vector<RRNodeId, RRNodeId> dest_order) {
     // update rr_node_indices, a map to optimize rr_index lookups
     for (auto& grid : rr_node_indices_) {
-        for (size_t x = 0; x < grid.dim_size(0); x++) {
-            for (size_t y = 0; y < grid.dim_size(1); y++) {
-                for (size_t s = 0; s < grid.dim_size(2); s++) {
-                    for (auto& node : grid[x][y][s]) {
-                        if (node != OPEN) {
-                            node = size_t(dest_order[RRNodeId(node)]);
+        for(size_t l = 0; l < grid.dim_size(0); l++) {
+            for (size_t x = 0; x < grid.dim_size(1); x++) {
+                for (size_t y = 0; y < grid.dim_size(2); y++) {
+                    for (size_t s = 0; s < grid.dim_size(3); s++) {
+                        for (auto &node: grid[l][x][y][s]) {
+                            if (node != OPEN) {
+                                node = size_t(dest_order[RRNodeId(node)]);
+                            }
                         }
                     }
                 }
diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.h b/libs/librrgraph/src/base/rr_spatial_lookup.h
index adffd0445fc..ccfe73a7633 100644
--- a/libs/librrgraph/src/base/rr_spatial_lookup.h
+++ b/libs/librrgraph/src/base/rr_spatial_lookup.h
@@ -41,24 +41,25 @@ class RRSpatialLookup {
     /**
      * @brief Returns the index of the specified routing resource node.  
      *
+     *   @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA)
      *   @param (x, y) are the grid location within the FPGA
      *   @param rr_type specifies the type of resource,
-     *   @param ptc gives a unique number of resources of that type (e.g. CHANX) at that (x,y).
+     *   @param ptc gives a unique number of resources of that type (e.g. CHANX) at that (layer,x,y).
      *
      * @note All ptcs start at 0 and are positive.
      *       Depending on what type of resource this is, ptc can be 
      *         - the class number of a common SINK/SOURCE node of grid, 
      *           starting at 0 and go up to logical_class_inf size - 1 of SOURCEs + SINKs in a grid
      *         - pin number of an input/output pin of a grid. They would normally start at 0
-     *           and go to the number of pins on a block at that (x, y) location
+     *           and go to the number of pins on a block at that (layer,x,y) location
      *         - track number of a routing wire in a channel. They would normally go from 0
-     *           to channel_width - 1 at that (x,y)
+     *           to channel_width - 1 at that (layer,x,y)
      *
      * @note An invalid id will be returned if the node does not exist
      *
      * @note For segments (CHANX and CHANY) of length > 1, the segment is
-     * given an rr_index based on the (x,y) location at which it starts (i.e.
-     * lowest (x,y) location at which this segment exists).
+     * given an rr_index based on the (layer,x,y) location at which it starts (i.e.
+     * lowest (layer,x,y) location at which this segment exists).
      *
      * @note The 'side' argument only applies to IPIN/OPIN types, and specifies which
      * side of the grid tile the node should be located on. The value is ignored
@@ -67,7 +68,8 @@ class RRSpatialLookup {
      * This routine also performs error checking to make sure the node in
      * question exists.
      */
-    RRNodeId find_node(int x,
+    RRNodeId find_node(int layer,
+                       int x,
                        int y,
                        t_rr_type type,
                        int ptc,
@@ -76,18 +78,20 @@ class RRSpatialLookup {
     /**
      * @brief Returns the indices of the specified routing resource nodes, representing routing tracks in a channel.  
      *
+     *   @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA)
      *   @param (x, y) are the coordinate of the routing channel within the FPGA
      *   @param rr_type specifies the type of routing channel, either x-direction or y-direction
      *
      * @note 
-     * - Return an empty list if there are no routing channel at the given (x, y) location
+     * - Return an empty list if there are no routing channel at the given (layer,x,y) location
      * - The node list returned only contain valid ids
-     *   For example, if the 2nd routing track does not exist in a routing channel at (x, y) location,
-     *   while the 3rd routing track does exist in a routing channel at (x, y) location,
+     *   For example, if the 2nd routing track does not exist in a routing channel at (layer,x,y) location,
+     *   while the 3rd routing track does exist in a routing channel at (layer,x, y) location,
      *   the node list will not contain the node for the 2nd routing track, but the 2nd element in the list
      *   will be the node for the 3rd routing track
      */
-    std::vector<RRNodeId> find_channel_nodes(int x,
+    std::vector<RRNodeId> find_channel_nodes(int layer,
+                                             int x,
                                              int y,
                                              t_rr_type type) const;
 
@@ -95,26 +99,29 @@ class RRSpatialLookup {
      * @brief Like find_node() but returns all matching nodes on all the sides.
      *
      * This is particularly useful for getting all instances
-     * of a specific IPIN/OPIN at a specific grid tile (x,y) location.
+     * of a specific IPIN/OPIN at a specific grid tile (layer,x,y).
      */
-    std::vector<RRNodeId> find_nodes_at_all_sides(int x,
+    std::vector<RRNodeId> find_nodes_at_all_sides(int layer,
+                                                  int x,
                                                   int y,
                                                   t_rr_type rr_type,
                                                   int ptc) const;
 
     /**
-     * @brief Returns all matching nodes on all the sides at a specific grid tile (x,y) location.
+     * @brief Returns all matching nodes on all the sides at a specific grid tile (layer,x,y) location.
      *
      * As this is applicable to grid pins, the type of nodes are limited to SOURCE/SINK/IPIN/OPIN
      */
-    std::vector<RRNodeId> find_grid_nodes_at_all_sides(int x,
+    std::vector<RRNodeId> find_grid_nodes_at_all_sides(int layer,
+                                                       int x,
                                                        int y,
                                                        t_rr_type rr_type) const;
 
     /* -- Mutators -- */
   public:
-    /** @brief Reserve the memory for a list of nodes at (x, y) location with given type and side */
-    void reserve_nodes(int x,
+    /** @brief Reserve the memory for a list of nodes at (layer, x, y) location with given type and side */
+    void reserve_nodes(int layer,
+                       int x,
                        int y,
                        t_rr_type type,
                        int num_nodes,
@@ -125,6 +132,7 @@ class RRSpatialLookup {
      *
      * @note You must have a valid node id to register the node in the lookup
      *
+     *   @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA)
      *   @param (x, y) are the coordinate of the node to be indexable in the fast look-up
      *   @param type is the type of a node
      *   @param ptc is a feature number of a node, which can be
@@ -144,6 +152,7 @@ class RRSpatialLookup {
      *   As such, multiple node addition could be efficiently implemented
      */
     void add_node(RRNodeId node,
+                  int layer,
                   int x,
                   int y,
                   t_rr_type type,
@@ -155,7 +164,7 @@ class RRSpatialLookup {
      * a destination coordinate.
      *
      * This function is mostly need by SOURCE and SINK nodes which are indexable in multiple locations.
-     * Considering a bounding box (x, y)->(x + width, y + height) of a multi-height and multi-width grid, 
+     * Considering a bounding box (layer, x, y)->(layer, x + width, y + height) of a multi-height and multi-width grid,
      * SOURCE and SINK nodes are indexable in any location inside the boundry.
      *
      * An example of usage: 
@@ -189,13 +198,14 @@ class RRSpatialLookup {
      * corner when dealing with large blocks. But this may require the data structure to be dependent 
      * on DeviceGrid information (it needs to identify if a grid has height > 1 as well as width > 1)
      */
-    void mirror_nodes(const vtr::Point<int>& src_coord,
+    void mirror_nodes(const int layer,
+                      const vtr::Point<int>& src_coord,
                       const vtr::Point<int>& des_coord,
                       t_rr_type type,
                       e_side side);
 
     /**
-     * @brief Resize the given 3 dimensions (x, y, side) of the RRSpatialLookup data structure for the given type
+     * @brief Resize the given 4 dimensions (layer, x, y, side) of the RRSpatialLookup data structure for the given type
      *
      * This function will keep any existing data
      *
@@ -205,7 +215,8 @@ class RRSpatialLookup {
      * TODO: should have a reserve function but vtd::ndmatrix does not have such API
      *       as a result, resize can be an internal one while reserve function is a public mutator
      */
-    void resize_nodes(int x,
+    void resize_nodes(int layer,
+                      int x,
                       int y,
                       t_rr_type type,
                       e_side side);
@@ -220,17 +231,18 @@ class RRSpatialLookup {
   private:
     /* An internal API to find all the nodes in a specific location with a given type
      * For OPIN/IPIN nodes that may exist on multiple sides, a specific side must be provided  
-     * This API is NOT public because its too powerful for developers with very limited sanity checks 
+     * This API is NOT public because it is too powerful for developers with very limited sanity checks
      * But it is used to build the public APIs find_channel_nodes() etc., where sufficient sanity checks are applied
      */
-    std::vector<RRNodeId> find_nodes(int x,
+    std::vector<RRNodeId> find_nodes(int layer,
+                                     int x,
                                      int y,
                                      t_rr_type type,
                                      e_side side = SIDES[0]) const;
 
     /* -- Internal data storage -- */
   private:
-    /* Fast look-up: TODO: Should rework the data type. Currently it is based on a 3-dimensional arrqay mater where some dimensions must always be accessed with a specific index. Such limitation should be overcome */
+    /* Fast look-up: TODO: Should rework the data type. Currently it is based on a 3-dimensional array mater where some dimensions must always be accessed with a specific index. Such limitation should be overcome */
     t_rr_node_indices rr_node_indices_;
 };
 
diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h
index 6f4a1eaf05f..843aa582f12 100644
--- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h
+++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h
@@ -4,9 +4,9 @@
  * https://github.com/duck2/uxsdcxx
  * Modify only if your build process doesn't involve regenerating this file.
  *
- * Cmdline: uxsdcxx/uxsdcxx.py /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * Input file: /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * md5sum of input file: 41df83ecf127a53590711ddec605742a
+ * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * md5sum of input file: 8672cb3951993f7e0ea3433a02507672
  */
 
 #include <functional>
@@ -82,12 +82,12 @@ template <class T, typename Context>
 inline void load_block_types(const pugi::xml_node &root, T &out, Context &context, const std::function<void(const char*)> *report_error, ptrdiff_t *offset_debug);
 template <class T, typename Context>
 inline void load_grid_loc(const pugi::xml_node &root, T &out, Context &context, const std::function<void(const char*)> *report_error, ptrdiff_t *offset_debug);
-inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function<void(const char*)> * report_error);
+inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, int* layer, const std::function<void(const char*)> * report_error);
 template <class T, typename Context>
 inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context, const std::function<void(const char*)> *report_error, ptrdiff_t *offset_debug);
 template <class T, typename Context>
 inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context, const std::function<void(const char*)> *report_error, ptrdiff_t *offset_debug);
-inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function<void(const char*)> * report_error);
+inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function<void(const char*)> * report_error);
 template <class T, typename Context>
 inline void load_node_timing(const pugi::xml_node &root, T &out, Context &context, const std::function<void(const char*)> *report_error, ptrdiff_t *offset_debug);
 inline void load_node_timing_required_attributes(const pugi::xml_node &root, float * C, float * R, const std::function<void(const char*)> * report_error);
@@ -269,14 +269,14 @@ constexpr const char *atok_lookup_t_block_type[] = {"height", "id", "name", "wid
 enum class gtok_t_block_types {BLOCK_TYPE};
 constexpr const char *gtok_lookup_t_block_types[] = {"block_type"};
 
-enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, X, Y};
-constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "width_offset", "x", "y"};
+enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, X, Y, LAYER};
+constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "width_offset", "x", "y", "layer"};
 
 enum class gtok_t_grid_locs {GRID_LOC};
 constexpr const char *gtok_lookup_t_grid_locs[] = {"grid_loc"};
 
-enum class atok_t_node_loc {PTC, SIDE, XHIGH, XLOW, YHIGH, YLOW};
-constexpr const char *atok_lookup_t_node_loc[] = {"ptc", "side", "xhigh", "xlow", "yhigh", "ylow"};
+enum class atok_t_node_loc {LAYER, PTC, SIDE, XHIGH, XLOW, YHIGH, YLOW};
+constexpr const char *atok_lookup_t_node_loc[] = {"layer", "ptc", "side", "xhigh", "xlow", "yhigh", "ylow"};
 
 
 enum class atok_t_node_timing {C, R};
@@ -1015,6 +1015,21 @@ inline atok_t_grid_loc lex_attr_t_grid_loc(const char *in, const std::function<v
 		default: break;
 		}
 		break;
+
+	case 5:
+		switch(*((triehash_uu32*)&in[0])){
+			case onechar('l', 0, 32) | onechar('a', 8, 32) | onechar('y', 16, 32) | onechar('e', 24, 32):
+				switch(in[4]){
+					case onechar('r', 0, 8):
+						return atok_t_grid_loc::LAYER;
+					break;
+					default: break;
+				}
+			break;
+			default:break;
+		}
+		break;
+
 	case 12:
 		switch(*((triehash_uu64*)&in[0])){
 		case onechar('w', 0, 64) | onechar('i', 8, 64) | onechar('d', 16, 64) | onechar('t', 24, 64) | onechar('h', 32, 64) | onechar('_', 40, 64) | onechar('o', 48, 64) | onechar('f', 56, 64):
@@ -1117,6 +1132,14 @@ inline atok_t_node_loc lex_attr_t_node_loc(const char *in, const std::function<v
 		break;
 	case 5:
 		switch(*((triehash_uu32*)&in[0])){
+		case onechar('l', 0, 32) | onechar('a', 8, 32) | onechar('y', 16, 32) | onechar('e', 24, 32):
+			switch(in[4]){
+			case onechar('r', 0, 8):
+				return atok_t_node_loc::LAYER;
+			break;
+			default: break;
+			}
+		break;
 		case onechar('x', 0, 32) | onechar('h', 8, 32) | onechar('i', 16, 32) | onechar('g', 24, 32):
 			switch(in[4]){
 			case onechar('h', 0, 8):
@@ -2072,8 +2095,6 @@ inline enum_loc_side lex_enum_loc_side(const char *in, bool throw_on_invalid, co
 /* Internal loading functions, which validate and load a PugiXML DOM tree into memory. */
 inline int load_int(const char *in, const std::function<void(const char *)> * report_error){
 	int out;
-	// global variable, must set to 0 before using it to avoid changed by other errors
-	errno = 0; 
 	out = std::strtol(in, NULL, 10);
 	if(errno != 0)
 		noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a int.").c_str());
@@ -2082,8 +2103,6 @@ inline int load_int(const char *in, const std::function<void(const char *)> * re
 
 inline unsigned int load_unsigned_int(const char *in, const std::function<void(const char *)> * report_error){
 	unsigned int out;
-	// global variable, must set to 0 before using it to avoid changed by other errors
-	errno = 0;
 	out = std::strtoul(in, NULL, 10);
 	if(errno != 0)
 		noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a unsigned int.").c_str());
@@ -2092,8 +2111,6 @@ inline unsigned int load_unsigned_int(const char *in, const std::function<void(c
 
 inline float load_float(const char *in, const std::function<void(const char *)> * report_error){
 	float out;
-	// global variable, must set to 0 before using it to avoid changed by other errors
-	errno = 0;
 	out = std::strtof(in, NULL);
 	if(errno != 0)
 		noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a float.").c_str());
@@ -2291,14 +2308,14 @@ inline void load_block_type_required_attributes(const pugi::xml_node &root, int
 	if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_block_type, report_error);
 }
 
-inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function<void(const char *)> * report_error){
-	std::bitset<5> astate = 0;
+inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, int* layer, const std::function<void(const char *)> * report_error){
+	std::bitset<6> astate = 0;
 	for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){
 		atok_t_grid_loc in = lex_attr_t_grid_loc(attr.name(), report_error);
 		if(astate[(int)in] == 0) astate[(int)in] = 1;
 		else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in <grid_loc>.").c_str());
 		switch(in){
-		case atok_t_grid_loc::BLOCK_TYPE_ID:
+        case atok_t_grid_loc::BLOCK_TYPE_ID:
 			*block_type_id = load_int(attr.value(), report_error);
 			break;
 		case atok_t_grid_loc::HEIGHT_OFFSET:
@@ -2313,20 +2330,25 @@ inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int *
 		case atok_t_grid_loc::Y:
 			*y = load_int(attr.value(), report_error);
 			break;
+        case atok_t_grid_loc::LAYER:
+            *layer=load_int(attr.value(), report_error);
 		default: break; /* Not possible. */
 		}
 	}
-	std::bitset<5> test_astate = astate | std::bitset<5>(0b00000);
+	std::bitset<6> test_astate = astate | std::bitset<6>(0b000000);
 	if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_grid_loc, report_error);
 }
 
-inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function<void(const char *)> * report_error){
-	std::bitset<6> astate = 0;
+inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function<void(const char *)> * report_error){
+	std::bitset<7> astate = 0;
 	for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){
 		atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error);
 		if(astate[(int)in] == 0) astate[(int)in] = 1;
 		else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in <node_loc>.").c_str());
 		switch(in){
+		case atok_t_node_loc::LAYER:
+			*layer = load_int(attr.value(), report_error);
+			break;
 		case atok_t_node_loc::PTC:
 			*ptc = load_int(attr.value(), report_error);
 			break;
@@ -2348,7 +2370,7 @@ inline void load_node_loc_required_attributes(const pugi::xml_node &root, int *
 		default: break; /* Not possible. */
 		}
 	}
-	std::bitset<6> test_astate = astate | std::bitset<6>(0b000010);
+	std::bitset<7> test_astate = astate | std::bitset<7>(0b0000100);
 	if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_loc, report_error);
 }
 
@@ -3202,8 +3224,10 @@ inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context,
 				memset(&grid_loc_x, 0, sizeof(grid_loc_x));
 				int grid_loc_y;
 				memset(&grid_loc_y, 0, sizeof(grid_loc_y));
-				load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, report_error);
-				auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y);
+                int grid_loc_layer;
+                memset(&grid_loc_layer,0,sizeof(grid_loc_layer));
+				load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, &grid_loc_layer, report_error);
+				auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y, grid_loc_layer);
 				load_grid_loc(node, out, child_context, report_error, offset_debug);
 				out.finish_grid_locs_grid_loc(child_context);
 			}
@@ -3227,6 +3251,9 @@ inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context,
 	for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){
 		atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error);
 		switch(in){
+		case atok_t_node_loc::LAYER:
+			/* Attribute layer is already set */
+			break;
 		case atok_t_node_loc::PTC:
 			/* Attribute ptc is already set */
 			break;
@@ -3408,6 +3435,8 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons
 		switch(in){
 		case gtok_t_node::LOC:
 			{
+				int node_loc_layer;
+				memset(&node_loc_layer, 0, sizeof(node_loc_layer));
 				int node_loc_ptc;
 				memset(&node_loc_ptc, 0, sizeof(node_loc_ptc));
 				int node_loc_xhigh;
@@ -3418,8 +3447,8 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons
 				memset(&node_loc_yhigh, 0, sizeof(node_loc_yhigh));
 				int node_loc_ylow;
 				memset(&node_loc_ylow, 0, sizeof(node_loc_ylow));
-				load_node_loc_required_attributes(node, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error);
-				auto child_context = out.init_node_loc(context, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow);
+				load_node_loc_required_attributes(node, &node_loc_layer, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error);
+				auto child_context = out.init_node_loc(context, node_loc_layer, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow);
 				load_node_loc(node, out, child_context, report_error, offset_debug);
 				out.finish_node_loc(child_context);
 			}
@@ -3920,6 +3949,8 @@ inline void write_grid_locs(T &in, std::ostream &os, Context &context){
 			os << " width_offset=\"" << in.get_grid_loc_width_offset(child_context) << "\"";
 			os << " x=\"" << in.get_grid_loc_x(child_context) << "\"";
 			os << " y=\"" << in.get_grid_loc_y(child_context) << "\"";
+			os << " layer=\"" << in.get_grid_loc_layer(child_context) << "\"";
+
 			os << "/>\n";
 		}
 	}
@@ -3958,6 +3989,7 @@ inline void write_node(T &in, std::ostream &os, Context &context){
 	{
 		auto child_context = in.get_node_loc(context);
 		os << "<loc";
+		os << " layer=\"" << in.get_node_loc_layer(child_context) << "\"";
 		os << " ptc=\"" << in.get_node_loc_ptc(child_context) << "\"";
 		if((bool)in.get_node_loc_side(child_context))
 			os << " side=\"" << lookup_loc_side[(int)in.get_node_loc_side(child_context)] << "\"";
diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h
index c890e738696..f59e1d24629 100644
--- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h
+++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_capnp.h
@@ -4,9 +4,9 @@
  * https://github.com/duck2/uxsdcxx
  * Modify only if your build process doesn't involve regenerating this file.
  *
- * Cmdline: uxsdcxx/uxsdcap.py /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * Input file: /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * md5sum of input file: 41df83ecf127a53590711ddec605742a
+ * Cmdline: uxsdcxx/uxsdcap.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * md5sum of input file: 8672cb3951993f7e0ea3433a02507672
  */
 
 #include <functional>
@@ -687,7 +687,7 @@ inline void load_grid_locs_capnp_type(const ucap::GridLocs::Reader &root, T &out
 		auto data = root.getGridLocs();
 		out.preallocate_grid_locs_grid_loc(context, data.size());
 		for(const auto & el : data) {
-			auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY());
+			auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY(), el.getLayer());
 			load_grid_loc_capnp_type(el, out, child_context, report_error, stack);
 			out.finish_grid_locs_grid_loc(child_context);
 			stack->back().second += 1;
@@ -775,7 +775,7 @@ inline void load_node_capnp_type(const ucap::Node::Reader &root, T &out, Context
 	stack->push_back(std::make_pair("getLoc", 0));
 	if (root.hasLoc()) {
 		auto child_el = root.getLoc();
-		auto child_context = out.init_node_loc(context, child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow());
+		auto child_context = out.init_node_loc(context, child_el.getLayer(), child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow());
 		load_node_loc_capnp_type(child_el, out, child_context, report_error, stack);
 		out.finish_node_loc(child_context);
 	}
@@ -1153,6 +1153,7 @@ inline void write_node_capnp_type(T &in, ucap::Node::Builder &root, Context &con
 	{
 		auto child_context = in.get_node_loc(context);
 		auto node_loc = root.initLoc();
+		node_loc.setLayer(in.get_node_loc_layer(child_context));
 		node_loc.setPtc(in.get_node_loc_ptc(child_context));
 		if((bool)in.get_node_loc_side(child_context))
 			node_loc.setSide(conv_to_enum_loc_side(in.get_node_loc_side(child_context)));
diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h
index 69795c800ca..9a61c8cbe12 100644
--- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h
+++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h
@@ -4,9 +4,9 @@
  * https://github.com/duck2/uxsdcxx
  * Modify only if your build process doesn't involve regenerating this file.
  *
- * Cmdline: uxsdcxx/uxsdcxx.py /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * Input file: /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd
- * md5sum of input file: 41df83ecf127a53590711ddec605742a
+ * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+ * md5sum of input file: 8672cb3951993f7e0ea3433a02507672
  */
 
 #include <functional>
@@ -332,6 +332,7 @@ class RrGraphBase {
 	 * <xs:complexType name="grid_loc">
 	 *   <xs:attribute name="x" type="xs:int" use="required" />
 	 *   <xs:attribute name="y" type="xs:int" use="required" />
+	 *   <xs:attribute name="layer" type="xs:int" use=:"required" />
 	 *   <xs:attribute name="block_type_id" type="xs:int" use="required" />
 	 *   <xs:attribute name="width_offset" type="xs:int" use="required" />
 	 *   <xs:attribute name="height_offset" type="xs:int" use="required" />
@@ -342,6 +343,7 @@ class RrGraphBase {
 	virtual inline int get_grid_loc_width_offset(typename ContextTypes::GridLocReadContext &ctx) = 0;
 	virtual inline int get_grid_loc_x(typename ContextTypes::GridLocReadContext &ctx) = 0;
 	virtual inline int get_grid_loc_y(typename ContextTypes::GridLocReadContext &ctx) = 0;
+	virtual inline int get_grid_loc_layer(typename ContextTypes::GridLocReadContext &ctx) =0;
 
 	/** Generated for complex type "grid_locs":
 	 * <xs:complexType name="grid_locs">
@@ -351,13 +353,14 @@ class RrGraphBase {
 	 * </xs:complexType>
 	*/
 	virtual inline void preallocate_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, size_t size) = 0;
-	virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y) = 0;
+	virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) = 0;
 	virtual inline void finish_grid_locs_grid_loc(typename ContextTypes::GridLocWriteContext &ctx) = 0;
 	virtual inline size_t num_grid_locs_grid_loc(typename ContextTypes::GridLocsReadContext &ctx) = 0;
 	virtual inline typename ContextTypes::GridLocReadContext get_grid_locs_grid_loc(int n, typename ContextTypes::GridLocsReadContext &ctx) = 0;
 
 	/** Generated for complex type "node_loc":
 	 * <xs:complexType name="node_loc">
+	 *   <xs:attribute name="layer" type="xs:int" use="required" />
 	 *   <xs:attribute name="xlow" type="xs:int" use="required" />
 	 *   <xs:attribute name="ylow" type="xs:int" use="required" />
 	 *   <xs:attribute name="xhigh" type="xs:int" use="required" />
@@ -366,6 +369,7 @@ class RrGraphBase {
 	 *   <xs:attribute name="ptc" type="xs:int" use="required" />
 	 * </xs:complexType>
 	*/
+	virtual inline int get_node_loc_layer(typename ContextTypes::NodeLocReadContext &ctx) = 0;
 	virtual inline int get_node_loc_ptc(typename ContextTypes::NodeLocReadContext &ctx) = 0;
 	virtual inline enum_loc_side get_node_loc_side(typename ContextTypes::NodeLocReadContext &ctx) = 0;
 	virtual inline void set_node_loc_side(enum_loc_side side, typename ContextTypes::NodeLocWriteContext &ctx) = 0;
@@ -436,7 +440,7 @@ class RrGraphBase {
 	virtual inline void set_node_direction(enum_node_direction direction, typename ContextTypes::NodeWriteContext &ctx) = 0;
 	virtual inline unsigned int get_node_id(typename ContextTypes::NodeReadContext &ctx) = 0;
 	virtual inline enum_node_type get_node_type(typename ContextTypes::NodeReadContext &ctx) = 0;
-	virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0;
+	virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0;
 	virtual inline void finish_node_loc(typename ContextTypes::NodeLocWriteContext &ctx) = 0;
 	virtual inline typename ContextTypes::NodeLocReadContext get_node_loc(typename ContextTypes::NodeReadContext &ctx) = 0;
 	virtual inline typename ContextTypes::NodeTimingWriteContext init_node_timing(typename ContextTypes::NodeWriteContext &ctx, float C, float R) = 0;
diff --git a/libs/librrgraph/src/io/rr_graph.xsd b/libs/librrgraph/src/io/rr_graph.xsd
index cdc60f654e4..4c05adfe5d5 100644
--- a/libs/librrgraph/src/io/rr_graph.xsd
+++ b/libs/librrgraph/src/io/rr_graph.xsd
@@ -208,6 +208,7 @@
   </xs:simpleType>
 
   <xs:complexType name="grid_loc">
+    <xs:attribute name="layer" type="xs:int" use="required"/>
     <xs:attribute name="x" type="xs:int" use="required"/>
     <xs:attribute name="y" type="xs:int" use="required"/>
     <xs:attribute name="block_type_id" type="xs:int" use="required"/>
@@ -258,6 +259,7 @@
   </xs:simpleType>
 
   <xs:complexType name="node_loc">
+    <xs:attribute name="layer" type="xs:int" use="required"/>
     <xs:attribute name="xlow" type="xs:int" use="required"/>
     <xs:attribute name="ylow" type="xs:int" use="required"/>
     <xs:attribute name="xhigh" type="xs:int" use="required"/>
diff --git a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h
index 0f0cb893a4e..2f0017be2ac 100644
--- a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h
+++ b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h
@@ -627,11 +627,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
      * </xs:complexType>
      */
 
-    inline int init_node_loc(int& inode, int ptc, int xhigh, int xlow, int yhigh, int ylow) final {
+    inline int init_node_loc(int& inode, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) final {
         auto node = (*rr_nodes_)[inode];
         RRNodeId node_id = node.id();
 
         rr_graph_builder_->set_node_coordinates(node_id, xlow, ylow, xhigh, yhigh);
+        rr_graph_builder_->set_node_layer(node_id, layer);
         rr_graph_builder_->set_node_ptc_num(node_id, ptc);
         return inode;
     }
@@ -643,6 +644,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
     inline int get_node_loc_ptc(const t_rr_node& node) final {
         return rr_graph_->node_ptc_num(node.id());
     }
+    inline int get_node_loc_layer(const t_rr_node& node) final {
+        return rr_graph_->node_layer(node.id());
+    }
     inline int get_node_loc_xhigh(const t_rr_node& node) final {
         return rr_graph_->node_xhigh(node.id());
     }
@@ -1453,10 +1457,10 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
                 grid_.grid_size(), size);
         }
     }
-    inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y) final {
-        const auto& type = grid_.get_physical_type(x, y);
-        int grid_width_offset = grid_.get_width_offset(x, y);
-        int grid_height_offset = grid_.get_height_offset(x, y);
+    inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) final {
+        const auto& type = grid_.get_physical_type({x, y, layer});
+        int grid_width_offset = grid_.get_width_offset({x, y, layer});
+        int grid_height_offset = grid_.get_height_offset({x, y, layer});
 
         if (type->index != block_type_id) {
             report_error(
@@ -1497,6 +1501,11 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
     inline int get_grid_loc_y(const t_grid_tile*& grid_loc) final {
         return grid_.get_grid_loc_y(grid_loc);
     }
+
+    inline int get_grid_loc_layer(const t_grid_tile*& grid_loc) final{
+        return grid_.get_grid_loc_layer(grid_loc);
+    }
+
     inline size_t num_grid_locs_grid_loc(void*& /*iter*/) final {
         return grid_.grid_size();
     }
@@ -1504,6 +1513,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
         return grid_.get_grid_locs_grid_loc(n);
     }
 
+
     /** Generated for complex type "rr_graph":
      * <xs:complexType xmlns:xs="http://www.w3.org/2001/XMLSchema">
      *     <xs:all>
@@ -1627,16 +1637,16 @@ class RrGraphSerializer final : public uxsd::RrGraphBase<RrGraphContextTypes> {
 
   private:
     /*Allocates and load the rr_node look up table. SINK and SOURCE, IPIN and OPIN
-     *share the same look up table. CHANX and CHANY have individual look ups */
+     *share the same look-up table. CHANX and CHANY have individual look-ups */
     void process_rr_node_indices() {
         auto& rr_graph_builder = (*rr_graph_builder_);
 
         /* Alloc the lookup table */
         for (t_rr_type rr_type : RR_TYPES) {
             if (rr_type == CHANX) {
-                rr_graph_builder.node_lookup().resize_nodes(grid_.height(), grid_.width(), rr_type, NUM_SIDES);
+                rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.height(), grid_.width(), rr_type, NUM_SIDES);
             } else {
-                rr_graph_builder.node_lookup().resize_nodes(grid_.width(), grid_.height(), rr_type, NUM_SIDES);
+                rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.width(), grid_.height(), rr_type, NUM_SIDES);
             }
         }
 
diff --git a/libs/librrgraph/src/utils/describe_rr_node.cpp b/libs/librrgraph/src/utils/describe_rr_node.cpp
index 484f0d47e3e..ee74b482686 100644
--- a/libs/librrgraph/src/utils/describe_rr_node.cpp
+++ b/libs/librrgraph/src/utils/describe_rr_node.cpp
@@ -29,8 +29,9 @@ std::string describe_rr_node(const RRGraphView& rr_graph,
                                    seg_index);
         }
     } else if (rr_graph.node_type(RRNodeId(inode)) == IPIN || rr_graph.node_type(RRNodeId(inode)) == OPIN) {
-        auto type = grid.get_physical_type(rr_graph.node_xlow(RRNodeId(inode)),
-                                           rr_graph.node_ylow(RRNodeId(inode)));
+        auto type = grid.get_physical_type({rr_graph.node_xlow(RRNodeId(inode)),
+                                            rr_graph.node_ylow(RRNodeId(inode)),
+                                            rr_graph.node_layer(RRNodeId(inode))});
 
         std::string pin_name = block_type_pin_index_to_name(type, rr_graph.node_pin_num(RRNodeId(inode)), is_flat);
 
diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp
index 7013bcf8ad2..db77f7bc999 100644
--- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp
+++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp
@@ -2,11 +2,11 @@
 # https://github.com/duck2/uxsdcxx
 # Modify only if your build process doesn't involve regenerating this file.
 #
-# Cmdline: uxsdcxx/uxsdcap.py /research/ece/lnis/USERS/tang/github/vtr-verilog-to-routing/vpr/src/route/rr_graph.xsd
-# Input file: /research/ece/lnis/USERS/tang/github/vtr-verilog-to-routing/vpr/src/route/rr_graph.xsd
-# md5sum of input file: cd57d47fc9dfa62c7030397ca759217e
+# Cmdline: uxsdcxx/uxsdcap.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+# Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd
+# md5sum of input file: 8672cb3951993f7e0ea3433a02507672
 
-@0xe4650d345d47589d;
+@0xe9a519eb0e454dd4;
 using Cxx = import "/capnp/c++.capnp";
 $Cxx.namespace("ucap");
 
@@ -154,6 +154,7 @@ struct GridLoc {
 	widthOffset @2 :Int32;
 	x @3 :Int32;
 	y @4 :Int32;
+	layer @5 : Int32;
 }
 
 struct GridLocs {
@@ -161,12 +162,13 @@ struct GridLocs {
 }
 
 struct NodeLoc {
-	ptc @0 :Int32;
-	side @1 :LocSide;
-	xhigh @2 :Int32;
-	xlow @3 :Int32;
-	yhigh @4 :Int32;
-	ylow @5 :Int32;
+	layer @0 :Int32;
+	ptc @1 :Int32;
+	side @2 :LocSide;
+	xhigh @3 :Int32;
+	xlow @4 :Int32;
+	yhigh @5 :Int32;
+	ylow @6 :Int32;
 }
 
 struct NodeTiming {
diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp
index 925799f22a4..90d4b6671ee 100644
--- a/utils/fasm/src/fasm.cpp
+++ b/utils/fasm/src/fasm.cpp
@@ -57,10 +57,11 @@ void FasmWriterVisitor::visit_clb_impl(ClusterBlockId blk_id, const t_pb* clb) {
 
     int x = place_ctx.block_locs[blk_id].loc.x;
     int y = place_ctx.block_locs[blk_id].loc.y;
+    int layer_num = place_ctx.block_locs[blk_id].loc.layer;
     int sub_tile = place_ctx.block_locs[blk_id].loc.sub_tile;
-    physical_tile_ = device_ctx.grid.get_physical_type(x, y);
+    physical_tile_ = device_ctx.grid.get_physical_type({x, y, layer_num});
     logical_block_ = cluster_ctx.clb_nlist.block_type(blk_id);
-    const auto& grid_meta = device_ctx.grid.get_metadata(x, y);
+    const auto& grid_meta = device_ctx.grid.get_metadata({x, y, layer_num});
 
     blk_prefix_ = "";
     clb_prefix_ = "";
diff --git a/utils/fasm/test/test_fasm.cpp b/utils/fasm/test/test_fasm.cpp
index 3632f8fae81..ef55f4604f5 100644
--- a/utils/fasm/test/test_fasm.cpp
+++ b/utils/fasm/test/test_fasm.cpp
@@ -192,7 +192,8 @@ static std::string get_pin_feature (size_t inode) {
     // Get tile physical tile and the pin number
     int ilow = rr_graph.node_xlow(RRNodeId(inode));
     int jlow = rr_graph.node_ylow(RRNodeId(inode));
-    auto physical_tile = device_ctx.grid.get_physical_type(ilow, jlow);
+    int layer_num = rr_graph.node_layer(RRNodeId(inode));
+    auto physical_tile = device_ctx.grid.get_physical_type({ilow, jlow, layer_num});
     int pin_num = rr_graph.node_pin_num(RRNodeId(inode));
 
     // Get the sub tile (type, not instance) and index of its pin that matches
diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index 7e8756d6430..892674cc43b 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -155,6 +155,9 @@ static void profile_source(const Netlist<>& net_list,
     vtr::ScopedStartFinishTimer timer("Profiling source");
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& grid = device_ctx.grid;
+    // TODO: We assume if this function is called, the grid has a 2D structure - It assumes everything is on layer number 0, so it won't work yet for multi-layer FPGAs
+    VTR_ASSERT(grid.get_num_layers() == 1);
+    int layer_num = 0;
 
     auto router_lookahead = make_router_lookahead(det_routing_arch,
                                                   router_opts.lookahead_type,
@@ -175,17 +178,17 @@ static void profile_source(const Netlist<>& net_list,
 
     for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
         for (int sink_y = start_y; sink_y <= end_y; sink_y++) {
-            if(device_ctx.grid.get_physical_type(sink_x, sink_y) == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+            if(device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}) == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
                 continue;
             }
 
             auto best_sink_ptcs = get_best_classes(RECEIVER,
-                    device_ctx.grid.get_physical_type(sink_x, sink_y));
+                                                   device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}));
             bool successfully_routed;
             for (int sink_ptc : best_sink_ptcs) {
                 VTR_ASSERT(sink_ptc != OPEN);
-
-                int sink_rr_node = size_t(device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc));
+                //TODO: should pass layer_num instead of 0 to node_lookup once the multi-die FPGAs support is completed
+                int sink_rr_node = size_t(device_ctx.rr_graph.node_lookup().find_node(0,sink_x, sink_y, SINK, sink_ptc));
 
                 if (directconnect_exists(source_rr_node, sink_rr_node)) {
                     //Skip if we shouldn't measure direct connects and a direct connect exists
diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp
index b15b3e25469..3569f5bff1f 100644
--- a/vpr/src/base/SetupGrid.cpp
+++ b/vpr/src/base/SetupGrid.cpp
@@ -156,7 +156,6 @@ static DeviceGrid auto_size_device_grid(const std::vector<t_grid_def>& grid_layo
     auto auto_layout_itr = std::find_if(grid_layouts.begin(), grid_layouts.end(), is_auto_grid_def);
     if (auto_layout_itr != grid_layouts.end()) {
         //Automatic grid layout, find the smallest height/width
-
         VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one <auto_layout>");
 
         //Determine maximum device size to try before concluding that the circuit cannot fit on any device
@@ -175,7 +174,7 @@ static DeviceGrid auto_size_device_grid(const std::vector<t_grid_def>& grid_layo
         const auto& grid_def = *auto_layout_itr;
         VTR_ASSERT(grid_def.aspect_ratio >= 0.);
 
-        //Initial size is 3x3, the smallest possible while avoiding
+        //Initial size is num_layers x 3 x 3, the smallest possible while avoiding
         //start before end location issues with <perimeter> location
         //specifications
         size_t width = 3;
@@ -277,7 +276,7 @@ static std::vector<t_logical_block_type_ptr> grid_overused_resources(const Devic
     //Initialize available tile counts
     std::unordered_map<t_physical_tile_type_ptr, int> avail_tiles;
     for (auto& tile_type : device_ctx.physical_tile_types) {
-        avail_tiles[&tile_type] = grid.num_instances(&tile_type);
+        avail_tiles[&tile_type] = grid.num_instances(&tile_type, -1);
     }
 
     //Sort so we allocate logical blocks with the fewest equivalent sites first (least flexible)
@@ -691,54 +690,61 @@ static void set_grid_block_type(int priority,
 
 ///@brief Check grid is valid
 static void CheckGrid(const DeviceGrid& grid) {
-    for (int layer = 0; layer < grid.get_num_layers(); layer++) { //Check each die individually
-        for (size_t i = 0; i < grid.width(); ++i) {
-            for (size_t j = 0; j < grid.height(); ++j) {
-                auto type = grid.get_physical_type(i, j);
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { //Check each die individually
+        for (int i = 0; i < (int)grid.width(); ++i) {
+            for (int j = 0; j < (int)grid.height(); ++j) {
+                const t_physical_tile_loc tile_loc(i, j, layer_num);
+                const auto& type = grid.get_physical_type(tile_loc);
+                int width_offset = grid.get_width_offset(tile_loc);
+                int height_offset = grid.get_height_offset(tile_loc);
                 if (nullptr == type) {
-                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has no type.\n", i, j);
+                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has no type.\n", i, j, layer_num);
                 }
 
-                int width_offset = grid.get_width_offset(i, j);
-                int height_offset = grid.get_height_offset(i, j);
-                if ((width_offset < 0)
-                    || (width_offset >= type->width)) {
-                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid width offset (%d).\n", i, j, layer,
+                if ((grid.get_width_offset(tile_loc) < 0)
+                    || (grid.get_width_offset(tile_loc) >= type->width)) {
+                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid width offset (%d).\n",
+                                    i,
+                                    j,
+                                    layer_num,
                                     width_offset);
                 }
-                if ((height_offset < 0)
-                    || (height_offset >= type->height)) {
-                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid height offset (%d).\n", i, j, layer,
+                if ((grid.get_height_offset(tile_loc) < 0)
+                    || (grid.get_height_offset(tile_loc) >= type->height)) {
+                    VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid height offset (%d).\n",
+                                    i,
+                                    j,
+                                    layer_num,
                                     height_offset);
                 }
 
                 //Verify that type and width/height offsets are correct (e.g. for dimension > 1 blocks)
-                if (width_offset == 0 && height_offset == 0) {
+                if (grid.get_width_offset(tile_loc) == 0 && grid.get_height_offset(tile_loc) == 0) {
                     //From the root block check that all other blocks are correct
-                    for (size_t x = i; x < i + type->width; ++x) {
+                    for (int x = i; x < i + type->width; ++x) {
                         int x_offset = x - i;
-                        for (size_t y = j; y < j + type->height; ++y) {
+                        for (int y = j; y < j + type->height; ++y) {
                             int y_offset = y - j;
-
-                            const auto& tile_type = grid.get_physical_type(x, y);
-                            int tile_width_offset = grid.get_width_offset(x, y);
-                            int tile_height_offset = grid.get_height_offset(x, y);
+                            const t_physical_tile_loc tile_loc_offset(x, y, layer_num);
+                            const auto& tile_type = grid.get_physical_type(tile_loc_offset);
+                            int tile_width_offset = grid.get_width_offset(tile_loc_offset);
+                            int tile_height_offset = grid.get_height_offset(tile_loc_offset);
                             if (tile_type != type) {
                                 VPR_FATAL_ERROR(VPR_ERROR_OTHER,
                                                 "Grid Location (%d,%d,%d) should have type '%s' (based on root location) but has type '%s'\n",
-                                                i, j, layer, type->name, tile_type->name);
+                                                i, j, layer_num, type->name, type->name);
                             }
 
                             if (tile_width_offset != x_offset) {
                                 VPR_FATAL_ERROR(VPR_ERROR_OTHER,
                                                 "Grid Location (%d,%d,%d) of type '%s' should have width offset '%d' (based on root location) but has '%d'\n",
-                                                i, j, layer, type->name, x_offset, tile_width_offset);
+                                                i, j, layer_num, type->name, x_offset, tile_width_offset);
                             }
 
                             if (tile_height_offset != y_offset) {
                                 VPR_FATAL_ERROR(VPR_ERROR_OTHER,
                                                 "Grid Location (%d,%d,%d)  of type '%s' should have height offset '%d' (based on root location) but has '%d'\n",
-                                                i, j, layer, type->name, y_offset, tile_height_offset);
+                                                i, j, layer_num, type->name, y_offset, tile_height_offset);
                             }
                         }
                     }
@@ -751,13 +757,15 @@ static void CheckGrid(const DeviceGrid& grid) {
 float calculate_device_utilization(const DeviceGrid& grid, std::map<t_logical_block_type_ptr, size_t> instance_counts) {
     //Record the resources of the grid
     std::map<t_physical_tile_type_ptr, size_t> grid_resources;
-    for (size_t x = 0; x < grid.width(); ++x) {
-        for (size_t y = 0; y < grid.height(); ++y) {
-            int width_offset = grid.get_width_offset(x, y);
-            int height_offset = grid.get_height_offset(x, y);
-            if (width_offset == 0 && height_offset == 0) {
-                const auto& type = grid.get_physical_type(x, y);
-                ++grid_resources[type];
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
+        for (int x = 0; x < (int)grid.width(); ++x) {
+            for (int y = 0; y < (int)grid.height(); ++y) {
+                int width_offset = grid.get_width_offset({x, y, layer_num});
+                int height_offset = grid.get_height_offset({x, y, layer_num});
+                if (width_offset == 0 && height_offset == 0) {
+                    const auto& type = grid.get_physical_type({x, y, layer_num});
+                    ++grid_resources[type];
+                }
             }
         }
     }
diff --git a/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h b/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h
index 6da8558b84d..8939778861e 100644
--- a/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h
+++ b/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h
@@ -58,6 +58,7 @@ class VprConstraintsBase {
      *   <xs:attribute name="subtile" type="xs:int" />
      * </xs:complexType>
      */
+    virtual inline int get_add_region_layer_num(typename ContextTypes::AddRegionReadContext& ctx) = 0;
     virtual inline int get_add_region_subtile(typename ContextTypes::AddRegionReadContext& ctx) = 0;
     virtual inline void set_add_region_subtile(int subtile, typename ContextTypes::AddRegionWriteContext& ctx) = 0;
     virtual inline int get_add_region_x_high(typename ContextTypes::AddRegionReadContext& ctx) = 0;
diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp
index 68b51ef5b02..17c40e4781e 100644
--- a/vpr/src/base/read_place.cpp
+++ b/vpr/src/base/read_place.cpp
@@ -200,16 +200,36 @@ void read_place_body(std::ifstream& placement_file,
         } else if (tokens[0][0] == '#') {
             continue; //Skip commented lines
 
-        } else if (tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) {
+        } else if ((tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) || (tokens.size() == 5 || (tokens.size() > 5 && tokens[5][0] == '#'))) {
             //Load the block location
             //
-            //We should have 4 tokens of actual data, with an optional 5th (commented) token indicating VPR's
+            // If the place file corresponds to a 3D architecture, it should contain 5 tokens of actual data, with an optional 6th (commented) token indicating VPR's internal block number.
+            // If it belongs to 2D architecture file, supported for backward compatability, We should have 4 tokens of actual data, with an optional 5th (commented) token indicating VPR's
             //internal block number
+            int block_name_index = 0;
+            int block_x_index = 1;
+            int block_y_index = 2;
+            int sub_tile_index_index = 3;
+            int block_layer_index;
+            if (tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) {
+                //2D architecture
+                block_layer_index = -1;
+
+            } else {
+                // 3D architecture
+                block_layer_index = 4;
+            }
 
-            std::string block_name = tokens[0];
-            int block_x = vtr::atoi(tokens[1]);
-            int block_y = vtr::atoi(tokens[2]);
-            int sub_tile_index = vtr::atoi(tokens[3]);
+            std::string block_name = tokens[block_name_index];
+            int block_x = vtr::atoi(tokens[block_x_index]);
+            int block_y = vtr::atoi(tokens[block_y_index]);
+            int sub_tile_index = vtr::atoi(tokens[sub_tile_index_index]);
+            int block_layer;
+            if (block_layer_index != -1) {
+                block_layer = vtr::atoi(tokens[block_layer_index]);
+            } else {
+                block_layer = 0;
+            }
 
             //c-style block name needed for printing block name in error messages
             char const* c_block_name = block_name.c_str();
@@ -230,7 +250,7 @@ void read_place_body(std::ifstream& placement_file,
 
             //Check if block is listed multiple times with conflicting locations in constraints file
             if (seen_blocks[blk_id] > 0) {
-                if (block_x != place_ctx.block_locs[blk_id].loc.x || block_y != place_ctx.block_locs[blk_id].loc.y || sub_tile_index != place_ctx.block_locs[blk_id].loc.sub_tile) {
+                if (block_x != place_ctx.block_locs[blk_id].loc.x || block_y != place_ctx.block_locs[blk_id].loc.y || sub_tile_index != place_ctx.block_locs[blk_id].loc.sub_tile || block_layer != place_ctx.block_locs[blk_id].loc.layer) {
                     std::string cluster_name = cluster_ctx.clb_nlist.block_name(blk_id);
                     VPR_THROW(VPR_ERROR_PLACE,
                               "The location of cluster %s (#%d) is specified %d times in the constraints file with conflicting locations. \n"
@@ -243,6 +263,7 @@ void read_place_body(std::ifstream& placement_file,
             loc.x = block_x;
             loc.y = block_y;
             loc.sub_tile = sub_tile_index;
+            loc.layer = block_layer;
 
             if (seen_blocks[blk_id] == 0) {
                 set_block_location(blk_id, loc);
@@ -301,8 +322,8 @@ void print_place(const char* net_file,
             net_file,
             net_id);
     fprintf(fp, "Array size: %zu x %zu logic blocks\n\n", device_ctx.grid.width(), device_ctx.grid.height());
-    fprintf(fp, "#block name\tx\ty\tsubblk\tblock number\n");
-    fprintf(fp, "#----------\t--\t--\t------\t------------\n");
+    fprintf(fp, "#block name\tx\ty\tsubblk\tlayer\tblock number\n");
+    fprintf(fp, "#----------\t--\t--\t------\t-----\t------------\n");
 
     if (!place_ctx.block_locs.empty()) { //Only if placement exists
         for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
@@ -310,7 +331,11 @@ void print_place(const char* net_file,
             if (strlen(cluster_ctx.clb_nlist.block_name(blk_id).c_str()) < 8)
                 fprintf(fp, "\t");
 
-            fprintf(fp, "%d\t%d\t%d", place_ctx.block_locs[blk_id].loc.x, place_ctx.block_locs[blk_id].loc.y, place_ctx.block_locs[blk_id].loc.sub_tile);
+            fprintf(fp, "%d\t%d\t%d\t%d",
+                    place_ctx.block_locs[blk_id].loc.x,
+                    place_ctx.block_locs[blk_id].loc.y,
+                    place_ctx.block_locs[blk_id].loc.sub_tile,
+                    place_ctx.block_locs[blk_id].loc.layer);
             fprintf(fp, "\t#%zu\n", size_t(blk_id));
         }
     }
diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp
index 348beea3033..e5d59d6245b 100644
--- a/vpr/src/base/read_route.cpp
+++ b/vpr/src/base/read_route.cpp
@@ -55,7 +55,7 @@ static void process_route(std::ifstream& fp, const char* filename, int& lineno);
 static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno);
 static void process_nets(std::ifstream& fp, ClusterNetId inet, std::string name, std::vector<std::string> input_tokens, const char* filename, int& lineno);
 static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno);
-static void format_coordinates(int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno);
+static void format_coordinates(int& layer_num, int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno);
 static void format_pin_info(std::string& pb_name, std::string& port_name, int& pb_pin_num, std::string input);
 static std::string format_name(std::string name);
 static bool check_rr_graph_connectivity(RRNodeId prev_node, RRNodeId node);
@@ -233,7 +233,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file
 
     /*remember the position of the last line in order to go back*/
     std::streampos oldpos = fp.tellg();
-    int inode, x, y, x2, y2, ptc, switch_id, net_pin_index, offset;
+    int inode, layer_num, x, y, layer_num2, x2, y2, ptc, switch_id, net_pin_index, offset;
     std::string prev_type;
     int node_count = 0;
     std::string input;
@@ -278,11 +278,11 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file
                           "Node %d has a type that does not match the RR graph", inode);
             }
 
-            format_coordinates(x, y, tokens[3], inet, filename, lineno);
+            format_coordinates(layer_num, x, y, tokens[3], inet, filename, lineno);
             auto rr_node = RRNodeId(inode);
 
             if (tokens[4] == "to") {
-                format_coordinates(x2, y2, tokens[5], inet, filename, lineno);
+                format_coordinates(layer_num2, x2, y2, tokens[5], inet, filename, lineno);
                 if (rr_graph.node_xlow(rr_node) != x || rr_graph.node_xhigh(rr_node) != x2 || rr_graph.node_yhigh(rr_node) != y2 || rr_graph.node_ylow(rr_node) != y) {
                     vpr_throw(VPR_ERROR_ROUTE, filename, lineno,
                               "The coordinates of node %d does not match the rr graph", inode);
@@ -312,7 +312,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file
 
             /* Verify types and ptc*/
             if (tokens[2] == "SOURCE" || tokens[2] == "SINK" || tokens[2] == "OPIN" || tokens[2] == "IPIN") {
-                const auto& type = device_ctx.grid.get_physical_type(x, y);
+                const auto& type = device_ctx.grid.get_physical_type({x, y, layer_num});
                 if (tokens[4 + offset] == "Pad:" && !is_io_type(type)) {
                     vpr_throw(VPR_ERROR_ROUTE, filename, lineno,
                               "Node %d is of the wrong type", inode);
@@ -333,16 +333,17 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file
             /*Process switches and pb pin info if it is ipin or opin type*/
             if (tokens[6 + offset] != "Switch:") {
                 /*This is an opin or ipin, process its pin nums*/
-                auto type = device_ctx.grid.get_physical_type(x, y);
+                auto type = device_ctx.grid.get_physical_type({x, y, layer_num});
                 if (!is_io_type(type) && (tokens[2] == "IPIN" || tokens[2] == "OPIN")) {
                     int pin_num = rr_graph.node_pin_num(RRNodeId(inode));
 
-                    int height_offset = device_ctx.grid.get_height_offset(x, y);
+                    int height_offset = device_ctx.grid.get_height_offset({x, y, layer_num});
 
                     int capacity, relative_pin;
                     std::tie(capacity, relative_pin) = get_capacity_location_from_physical_pin(type, pin_num);
 
-                    ClusterBlockId iblock = place_ctx.grid_blocks[x][y - height_offset].blocks[capacity];
+                    ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({x, y - height_offset, capacity, layer_num});
+
                     t_pb_graph_pin* pb_pin;
 
                     pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num);
@@ -419,7 +420,7 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch
     auto& place_ctx = g_vpr_ctx.placement();
 
     std::string block, bnum_str;
-    int x, y;
+    int layer_num, x, y;
     std::vector<std::string> tokens;
     int pin_counter = 0;
 
@@ -439,7 +440,7 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch
             fp.seekg(oldpos);
             return;
         } else {
-            format_coordinates(x, y, tokens[4], inet, filename, lineno);
+            format_coordinates(layer_num, x, y, tokens[4], inet, filename, lineno);
 
             /*remove ()*/
             bnum_str = format_name(tokens[2]);
@@ -472,17 +473,30 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch
 }
 
 ///@brief Parse coordinates in the form of (x,y) into correct x and y values
-static void format_coordinates(int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno) {
+static void format_coordinates(int& layer_num, int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno) {
     coord = format_name(coord);
+
     std::stringstream coord_stream(coord);
-    if (!(coord_stream >> x)) {
-        vpr_throw(VPR_ERROR_ROUTE, filename, lineno,
-                  "Net %lu has coordinates that is not in the form (x,y)", size_t(net));
+    std::vector<int> coords;
+    int tmp_coord;
+    while (coord_stream >> tmp_coord) {
+        coords.push_back(tmp_coord);
+        coord_stream.ignore(1, ',');
     }
-    coord_stream.ignore(1, ' ');
-    if (!(coord_stream >> y)) {
+    if (coords.size() != 2 && coords.size() != 3) {
         vpr_throw(VPR_ERROR_ROUTE, filename, lineno,
-                  "Net %lu has coordinates that is not in the form (x,y)", size_t(net));
+                  "Net %lu has coordinates that is not in the form (layer_num,x,y)", size_t(net));
+    }
+
+    if (coords.size() == 2) {
+        layer_num = 0;
+        x = coords[0];
+        y = coords[1];
+    } else {
+        VTR_ASSERT(coords.size() == 3);
+        layer_num = coords[0];
+        x = coords[1];
+        y = coords[2];
     }
 }
 
@@ -578,9 +592,10 @@ void print_route(const Netlist<>& net_list,
                     t_rr_type rr_type = rr_graph.node_type(inode);
                     int ilow = rr_graph.node_xlow(inode);
                     int jlow = rr_graph.node_ylow(inode);
+                    int layer_num = rr_graph.node_layer(inode);
 
-                    fprintf(fp, "Node:\t%zu\t%6s (%d,%d) ", size_t(inode),
-                            rr_graph.node_type_string(inode), ilow, jlow);
+                    fprintf(fp, "Node:\t%zu\t%6s (%d,%d,%d) ", size_t(inode),
+                            rr_graph.node_type_string(inode), layer_num, ilow, jlow);
 
                     if ((ilow != rr_graph.node_xhigh(inode))
                         || (jlow != rr_graph.node_yhigh(inode)))
@@ -590,7 +605,7 @@ void print_route(const Netlist<>& net_list,
                     switch (rr_type) {
                         case IPIN:
                         case OPIN:
-                            if (is_io_type(device_ctx.grid.get_physical_type(ilow, jlow))) {
+                            if (is_io_type(device_ctx.grid.get_physical_type({ilow, jlow, layer_num}))) {
                                 fprintf(fp, " Pad: ");
                             } else { /* IO Pad. */
                                 fprintf(fp, " Pin: ");
@@ -604,7 +619,7 @@ void print_route(const Netlist<>& net_list,
 
                         case SOURCE:
                         case SINK:
-                            if (is_io_type(device_ctx.grid.get_physical_type(ilow, jlow))) {
+                            if (is_io_type(device_ctx.grid.get_physical_type({ilow, jlow, layer_num}))) {
                                 fprintf(fp, " Pad: ");
                             } else { /* IO Pad. */
                                 fprintf(fp, " Class: ");
@@ -620,17 +635,18 @@ void print_route(const Netlist<>& net_list,
 
                     fprintf(fp, "%d  ", rr_graph.node_ptc_num(inode));
 
-                    auto physical_tile = device_ctx.grid.get_physical_type(ilow, jlow);
+                    auto physical_tile = device_ctx.grid.get_physical_type({ilow, jlow, layer_num});
                     if (!is_io_type(physical_tile) && (rr_type == IPIN || rr_type == OPIN)) {
                         int pin_num = rr_graph.node_pin_num(inode);
-                        int xoffset = device_ctx.grid.get_width_offset(ilow, jlow);
-                        int yoffset = device_ctx.grid.get_height_offset(ilow, jlow);
+                        int xoffset = device_ctx.grid.get_width_offset({ilow, jlow, layer_num});
+                        int yoffset = device_ctx.grid.get_height_offset({ilow, jlow, layer_num});
                         const t_sub_tile* sub_tile;
                         int sub_tile_rel_cap;
                         std::tie(sub_tile, sub_tile_rel_cap) = get_sub_tile_from_pin_physical_num(physical_tile, pin_num);
                         int sub_tile_offset = sub_tile->capacity.low + sub_tile_rel_cap;
 
-                        ClusterBlockId iblock = place_ctx.grid_blocks[ilow - xoffset][jlow - yoffset].blocks[sub_tile_offset];
+                        ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({ilow - xoffset, jlow - yoffset,
+                                                                                         sub_tile_offset, layer_num});
                         VTR_ASSERT(iblock);
                         const t_pb_graph_pin* pb_pin;
                         if (is_pin_on_tile(physical_tile, pin_num)) {
diff --git a/vpr/src/base/region.cpp b/vpr/src/base/region.cpp
index 594ec76564e..5c38f9ace86 100644
--- a/vpr/src/base/region.cpp
+++ b/vpr/src/base/region.cpp
@@ -9,17 +9,23 @@ Region::Region() {
     region_bounds.set_ymin(999);
     region_bounds.set_xmax(-1);
     region_bounds.set_ymax(-1);
+    layer_num = -1;
 }
 
-vtr::Rect<int> Region::get_region_rect() const {
-    return region_bounds;
+RegionRectCoord Region::get_region_rect() const {
+    return RegionRectCoord(region_bounds, layer_num);
 }
 
-void Region::set_region_rect(int _xmin, int _ymin, int _xmax, int _ymax) {
-    region_bounds.set_xmin(_xmin);
-    region_bounds.set_xmax(_xmax);
-    region_bounds.set_ymin(_ymin);
-    region_bounds.set_ymax(_ymax);
+void Region::set_region_rect(const RegionRectCoord& rect_coord) {
+    region_bounds.set_xmin(rect_coord.xmin);
+    region_bounds.set_xmax(rect_coord.xmax);
+    region_bounds.set_ymin(rect_coord.ymin);
+    region_bounds.set_ymax(rect_coord.ymax);
+    layer_num = rect_coord.layer_num;
+}
+
+int Region::get_layer_num() const {
+    return layer_num;
 }
 
 int Region::get_sub_tile() const {
@@ -31,11 +37,18 @@ void Region::set_sub_tile(int _sub_tile) {
 }
 
 bool Region::empty() {
-    return (region_bounds.xmax() < region_bounds.xmin() || region_bounds.ymax() < region_bounds.ymin());
+    return (region_bounds.xmax() < region_bounds.xmin()
+            || region_bounds.ymax() < region_bounds.ymin()
+            || layer_num < 0);
 }
 
 bool Region::is_loc_in_reg(t_pl_loc loc) {
     bool is_loc_in_reg = false;
+    int loc_layer_num = loc.layer;
+
+    if (layer_num != loc_layer_num) {
+        return is_loc_in_reg;
+    }
 
     vtr::Point<int> loc_coord(loc.x, loc.y);
 
@@ -58,10 +71,21 @@ bool Region::is_loc_in_reg(t_pl_loc loc) {
 bool do_regions_intersect(Region r1, Region r2) {
     bool intersect = true;
 
-    vtr::Rect<int> r1_rect = r1.get_region_rect();
-    vtr::Rect<int> r2_rect = r2.get_region_rect();
+    const auto r1_reg_coord = r1.get_region_rect();
+    const auto r2_reg_coord = r2.get_region_rect();
+
+    vtr::Rect<int> r1_rect(r1_reg_coord.xmin, r1_reg_coord.ymin, r1_reg_coord.xmax, r1_reg_coord.ymax);
+    vtr::Rect<int> r2_rect(r2_reg_coord.xmin, r2_reg_coord.ymin, r2_reg_coord.xmax, r2_reg_coord.ymax);
+
+    int r1_layer_num = r1_reg_coord.layer_num;
+    int r2_layer_num = r2_reg_coord.layer_num;
+
     vtr::Rect<int> intersect_rect;
 
+    if (r1_layer_num != r2_layer_num) {
+        return intersect;
+    }
+
     intersect_rect = intersection(r1_rect, r2_rect);
 
     /**
@@ -77,10 +101,22 @@ bool do_regions_intersect(Region r1, Region r2) {
 
 Region intersection(const Region& r1, const Region& r2) {
     Region intersect;
-    vtr::Rect<int> r1_rect = r1.get_region_rect();
-    vtr::Rect<int> r2_rect = r2.get_region_rect();
+
+    const auto r1_reg_coord = r1.get_region_rect();
+    const auto r2_reg_coord = r2.get_region_rect();
+
+    vtr::Rect<int> r1_rect(r1_reg_coord.xmin, r1_reg_coord.ymin, r1_reg_coord.xmax, r1_reg_coord.ymax);
+    vtr::Rect<int> r2_rect(r2_reg_coord.xmin, r2_reg_coord.ymin, r2_reg_coord.xmax, r2_reg_coord.ymax);
+
+    int r1_layer_num = r1_reg_coord.layer_num;
+    int r2_layer_num = r2_reg_coord.layer_num;
+
     vtr::Rect<int> intersect_rect;
 
+    if (r1_layer_num != r2_layer_num) {
+        return intersect;
+    }
+
     /*
      * If the subtiles of two regions match (i.e. they both have no subtile specified, or the same subtile specified),
      * the regions are intersected. The resulting intersection region will have a rectangle that reflects their overlap,
@@ -97,24 +133,27 @@ Region intersection(const Region& r1, const Region& r2) {
     if (r1.get_sub_tile() == r2.get_sub_tile()) {
         intersect.set_sub_tile(r1.get_sub_tile());
         intersect_rect = intersection(r1_rect, r2_rect);
-        intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax());
+        intersect.set_region_rect({intersect_rect, r1_layer_num});
 
     } else if (r1.get_sub_tile() == NO_SUBTILE && r2.get_sub_tile() != NO_SUBTILE) {
         intersect.set_sub_tile(r2.get_sub_tile());
         intersect_rect = intersection(r1_rect, r2_rect);
-        intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax());
+        intersect.set_region_rect({intersect_rect, r1_layer_num});
 
     } else if (r1.get_sub_tile() != NO_SUBTILE && r2.get_sub_tile() == NO_SUBTILE) {
         intersect.set_sub_tile(r1.get_sub_tile());
         intersect_rect = intersection(r1_rect, r2_rect);
-        intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax());
+        intersect.set_region_rect({intersect_rect, r1_layer_num});
     }
 
     return intersect;
 }
 
 void print_region(FILE* fp, Region region) {
+    const auto region_coord = region.get_region_rect();
+    const auto region_rect = vtr::Rect<int>(region_coord.xmin, region_coord.ymin, region_coord.xmax, region_coord.ymax);
     fprintf(fp, "\tRegion: \n");
-    print_rect(fp, region.get_region_rect());
+    fprintf(fp, "\tlayer: %d\n", region.get_layer_num());
+    print_rect(fp, region_rect);
     fprintf(fp, "\tsubtile: %d\n\n", region.get_sub_tile());
 }
diff --git a/vpr/src/base/region.h b/vpr/src/base/region.h
index 75a25f5071d..7b1ceec6dda 100644
--- a/vpr/src/base/region.h
+++ b/vpr/src/base/region.h
@@ -4,6 +4,50 @@
 #include <vtr_geometry.h>
 #include "vpr_types.h"
 
+/**
+ * @brief This class stores the data for each constraint region on a layer
+ * @param xmin The minimum x coordinate of the region
+ * @param ymin The minimum y coordinate of the region
+ * @param xmax The maximum x coordinate of the region
+ * @param ymax The maximum y coordinate of the region
+ * @param layer_num The layer number of the region
+ */
+struct RegionRectCoord {
+    RegionRectCoord() = default;
+    RegionRectCoord(int _xmin, int _ymin, int _xmax, int _ymax, int _layer_num)
+        : xmin(_xmin)
+        , ymin(_ymin)
+        , xmax(_xmax)
+        , ymax(_ymax)
+        , layer_num(_layer_num) {}
+
+    RegionRectCoord(const vtr::Rect<int>& rect, int _layer_num)
+        : xmin(rect.xmin())
+        , ymin(rect.ymin())
+        , xmax(rect.xmax())
+        , ymax(rect.ymax())
+        , layer_num(_layer_num) {}
+
+    int xmin;
+    int ymin;
+    int xmax;
+    int ymax;
+    int layer_num;
+
+    /// @brief Convert to a vtr::Rect
+    vtr::Rect<int> get_rect() const {
+        return vtr::Rect<int>(xmin, ymin, xmax, ymax);
+    }
+
+    /// @brief Equality operator
+    bool operator==(const RegionRectCoord& rhs) const {
+        vtr::Rect<int> lhs_rect(xmin, ymin, xmax, ymax);
+        vtr::Rect<int> rhs_rect(rhs.xmin, rhs.ymin, rhs.xmax, rhs.ymax);
+        return lhs_rect == rhs_rect
+               && layer_num == rhs.layer_num;
+    }
+};
+
 /**
  * @file
  * @brief This file defines the Region class. The Region class stores the data for each constraint region.
@@ -26,12 +70,17 @@ class Region {
     /**
      * @brief Accessor for the region's rectangle
      */
-    vtr::Rect<int> get_region_rect() const;
+    RegionRectCoord get_region_rect() const;
 
     /**
      * @brief Mutator for the region's rectangle
      */
-    void set_region_rect(int _xmin, int _ymin, int _xmax, int _ymax);
+    void set_region_rect(const RegionRectCoord& rect_coord);
+
+    /**
+     * @brief Accessor for the region's layer number
+     */
+    int get_layer_num() const;
 
     /**
      * @brief Accessor for the region's subtile
@@ -59,12 +108,15 @@ class Region {
     bool is_loc_in_reg(t_pl_loc loc);
 
     bool operator==(const Region& reg) const {
-        return (reg.get_region_rect() == this->get_region_rect() && reg.get_sub_tile() == this->get_sub_tile());
+        return (reg.get_region_rect() == this->get_region_rect()
+                && reg.get_sub_tile() == this->get_sub_tile()
+                && reg.layer_num == this->layer_num);
     }
 
   private:
     //may need to include zmin, zmax for future use in 3D FPGA designs
     vtr::Rect<int> region_bounds; ///< xmin, ymin, xmax, ymax inclusive
+    int layer_num;                ///< layer number of the region
     int sub_tile;                 ///< users will optionally select a subtile
 };
 
@@ -96,11 +148,12 @@ namespace std {
 template<>
 struct hash<Region> {
     std::size_t operator()(const Region& reg) const noexcept {
-        vtr::Rect<int> rect = reg.get_region_rect();
-        std::size_t seed = std::hash<int>{}(rect.xmin());
-        vtr::hash_combine(seed, rect.ymin());
-        vtr::hash_combine(seed, rect.xmax());
-        vtr::hash_combine(seed, rect.ymax());
+        const auto region_coord = reg.get_region_rect();
+        std::size_t seed = std::hash<int>{}(region_coord.xmin);
+        vtr::hash_combine(seed, region_coord.ymin);
+        vtr::hash_combine(seed, region_coord.xmax);
+        vtr::hash_combine(seed, region_coord.ymax);
+        vtr::hash_combine(seed, region_coord.layer_num);
         vtr::hash_combine(seed, reg.get_sub_tile());
         return seed;
     }
diff --git a/vpr/src/base/setup_noc.cpp b/vpr/src/base/setup_noc.cpp
index e836dd808c4..ad59fb21f10 100644
--- a/vpr/src/base/setup_noc.cpp
+++ b/vpr/src/base/setup_noc.cpp
@@ -40,7 +40,7 @@ void setup_noc(const t_arch& arch) {
 
     // store the reference to device grid with
     // need to set this first before adding routers to the model
-    noc_ctx.noc_model.set_device_grid_width((int)device_ctx.grid.width());
+    noc_ctx.noc_model.set_device_grid_spec((int)device_ctx.grid.width(), (int)device_ctx.grid.height());
 
     // generate noc model
     generate_noc(arch, noc_ctx, noc_router_tiles);
@@ -59,9 +59,7 @@ void setup_noc(const t_arch& arch) {
 }
 
 void identify_and_store_noc_router_tile_positions(const DeviceGrid& device_grid, std::vector<t_noc_router_tile_position>& noc_router_tiles, std::string noc_router_tile_name) {
-    int grid_width = device_grid.width();
-    int grid_height = device_grid.height();
-
+    const int num_layers = device_grid.get_num_layers();
     int curr_tile_width;
     int curr_tile_height;
     int curr_tile_width_offset;
@@ -72,35 +70,37 @@ void identify_and_store_noc_router_tile_positions(const DeviceGrid& device_grid,
     double curr_tile_centroid_y;
 
     // go through the device
-    for (int i = 0; i < grid_width; i++) {
-        for (int j = 0; j < grid_height; j++) {
-            // get some information from the current tile
-            const auto& type = device_grid.get_physical_type(i, j);
-            int width_offset = device_grid.get_width_offset(i, j);
-            int height_offset = device_grid.get_height_offset(i, j);
-
-            curr_tile_name.assign(type->name);
-            curr_tile_width_offset = width_offset;
-            curr_tile_height_offset = height_offset;
-
-            curr_tile_height = type->height;
-            curr_tile_width = type->width;
-
-            /* 
-             * Only store the tile position if it is a noc router.
-             * Additionally, since a router tile can span multiple grid locations, we only add the tile if the height and width offset are zero (this prevents the router from being added multiple times for each grid location it spans).
-             */
-            if (!(noc_router_tile_name.compare(curr_tile_name)) && !curr_tile_width_offset && !curr_tile_height_offset) {
-                // calculating the centroid position of the current tile
-                curr_tile_centroid_x = (curr_tile_width - 1) / (double)2 + i;
-                curr_tile_centroid_y = (curr_tile_height - 1) / (double)2 + j;
-
-                noc_router_tiles.push_back({i, j, curr_tile_centroid_x, curr_tile_centroid_y});
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        int grid_width = (int)device_grid.width();
+        int grid_height = (int)device_grid.height();
+        for (int i = 0; i < grid_width; i++) {
+            for (int j = 0; j < grid_height; j++) {
+                // get some information from the current tile
+                const auto& type = device_grid.get_physical_type({i, j, layer_num});
+                int width_offset = device_grid.get_width_offset({i, j, layer_num});
+                int height_offset = device_grid.get_height_offset({i, j, layer_num});
+
+                curr_tile_name.assign(type->name);
+                curr_tile_width_offset = width_offset;
+                curr_tile_height_offset = height_offset;
+
+                curr_tile_height = type->height;
+                curr_tile_width = type->width;
+
+                /*
+                 * Only store the tile position if it is a noc router.
+                 * Additionally, since a router tile can span multiple grid locations, we only add the tile if the height and width offset are zero (this prevents the router from being added multiple times for each grid location it spans).
+                 */
+                if (!(noc_router_tile_name.compare(curr_tile_name)) && !curr_tile_width_offset && !curr_tile_height_offset) {
+                    // calculating the centroid position of the current tile
+                    curr_tile_centroid_x = (curr_tile_width - 1) / (double)2 + i;
+                    curr_tile_centroid_y = (curr_tile_height - 1) / (double)2 + j;
+
+                    noc_router_tiles.emplace_back(i, j, layer_num, curr_tile_centroid_x, curr_tile_centroid_y);
+                }
             }
         }
     }
-
-    return;
 }
 
 void generate_noc(const t_arch& arch, NocContext& noc_ctx, std::vector<t_noc_router_tile_position>& noc_router_tiles) {
@@ -215,8 +215,10 @@ void create_noc_routers(const t_noc_inf& noc_info, NocStorage* noc_model, std::v
 
         // at this point, the closest user described router to the current physical router was found
         // so add the router to the NoC
-        noc_model->add_router(logical_router->id, noc_router_tiles[closest_physical_router].grid_width_position,
-                              noc_router_tiles[closest_physical_router].grid_height_position);
+        noc_model->add_router(logical_router->id,
+                              noc_router_tiles[closest_physical_router].grid_width_position,
+                              noc_router_tiles[closest_physical_router].grid_height_position,
+                              noc_router_tiles[closest_physical_router].layer_position);
 
         // add the new assignment to the tracker
         router_assignments[closest_physical_router] = logical_router->id;
diff --git a/vpr/src/base/setup_noc.h b/vpr/src/base/setup_noc.h
index 2f96268f787..23737d1c5b1 100644
--- a/vpr/src/base/setup_noc.h
+++ b/vpr/src/base/setup_noc.h
@@ -49,8 +49,16 @@
 
 // a data structure to store the position information of a noc router in the FPGA device
 struct t_noc_router_tile_position {
+    t_noc_router_tile_position(int x, int y, int layer_num, double centroid_x, double centroid_y)
+        : grid_width_position(x)
+        , grid_height_position(y)
+        , layer_position(layer_num)
+        , tile_centroid_x(centroid_x)
+        , tile_centroid_y(centroid_y) {}
+
     int grid_width_position;
     int grid_height_position;
+    int layer_position;
 
     double tile_centroid_x;
     double tile_centroid_y;
diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp
index 337a1964d6b..bc09e68418d 100644
--- a/vpr/src/base/stats.cpp
+++ b/vpr/src/base/stats.cpp
@@ -70,19 +70,21 @@ void routing_stats(const Netlist<>& net_list,
     VTR_LOG("Logic area (in minimum width transistor areas, excludes I/Os and empty grid tiles)...\n");
 
     area = 0;
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
-            auto type = device_ctx.grid.get_physical_type(i, j);
-            int width_offset = device_ctx.grid.get_width_offset(i, j);
-            int height_offset = device_ctx.grid.get_height_offset(i, j);
-            if (width_offset == 0
-                && height_offset == 0
-                && !is_io_type(type)
-                && type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                if (type->area == UNDEFINED) {
-                    area += grid_logic_tile_area * type->width * type->height;
-                } else {
-                    area += type->area;
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+            for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
+                auto type = device_ctx.grid.get_physical_type({i, j, layer_num});
+                int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num});
+                int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num});
+                if (width_offset == 0
+                    && height_offset == 0
+                    && !is_io_type(type)
+                    && type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                    if (type->area == UNDEFINED) {
+                        area += grid_logic_tile_area * type->width * type->height;
+                    } else {
+                        area += type->area;
+                    }
                 }
             }
         }
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 23353e22d15..c75645fd5d8 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -478,14 +478,14 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
             continue;
         }
 
-        if (device_ctx.grid.num_instances(&type) != 0) {
+        if (device_ctx.grid.num_instances(&type, -1) != 0) {
             VTR_LOG("\tPhysical Tile %s:\n", type.name);
 
             auto equivalent_sites = get_equivalent_sites_set(&type);
 
             for (auto logical_block : equivalent_sites) {
                 float util = 0.;
-                size_t num_inst = device_ctx.grid.num_instances(&type);
+                size_t num_inst = device_ctx.grid.num_instances(&type, -1);
                 if (num_inst != 0) {
                     util = float(num_type_instances[logical_block]) / num_inst;
                 }
diff --git a/vpr/src/base/vpr_constraints_serializer.h b/vpr/src/base/vpr_constraints_serializer.h
index 4007b7c5c3b..5405eb0e21a 100644
--- a/vpr/src/base/vpr_constraints_serializer.h
+++ b/vpr/src/base/vpr_constraints_serializer.h
@@ -163,6 +163,11 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase<VprConstr
      *   <xs:attribute name="subtile" type="xs:int" />
      * </xs:complexType>
      */
+
+    virtual inline int get_add_region_layer_num(Region& r) final {
+        return r.get_layer_num();
+    }
+
     virtual inline int get_add_region_subtile(Region& r) final {
         return r.get_sub_tile();
     }
@@ -172,23 +177,23 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase<VprConstr
     }
 
     virtual inline int get_add_region_x_high(Region& r) final {
-        vtr::Rect<int> rect = r.get_region_rect();
-        return rect.xmax();
+        const auto reg_coord = r.get_region_rect();
+        return reg_coord.xmax;
     }
 
     virtual inline int get_add_region_x_low(Region& r) final {
-        vtr::Rect<int> rect = r.get_region_rect();
-        return rect.xmin();
+        const auto reg_coord = r.get_region_rect();
+        return reg_coord.xmin;
     }
 
     virtual inline int get_add_region_y_high(Region& r) final {
-        vtr::Rect<int> rect = r.get_region_rect();
-        return rect.ymax();
+        const auto reg_coord = r.get_region_rect();
+        return reg_coord.ymax;
     }
 
     virtual inline int get_add_region_y_low(Region& r) final {
-        vtr::Rect<int> rect = r.get_region_rect();
-        return rect.ymin();
+        const auto reg_coord = r.get_region_rect();
+        return reg_coord.ymin;
     }
 
     /** Generated for complex type "partition":
@@ -232,7 +237,7 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase<VprConstr
     virtual inline void preallocate_partition_add_region(void*& /*ctx*/, size_t /*size*/) final {}
 
     virtual inline void* add_partition_add_region(void*& /*ctx*/, int x_high, int x_low, int y_high, int y_low) final {
-        loaded_region.set_region_rect(x_low, y_low, x_high, y_high);
+        loaded_region.set_region_rect({x_low, y_low, x_high, y_high, 0});
 
         return nullptr;
     }
diff --git a/vpr/src/base/vpr_constraints_writer.cpp b/vpr/src/base/vpr_constraints_writer.cpp
index bf216881b77..de8c91dedbb 100644
--- a/vpr/src/base/vpr_constraints_writer.cpp
+++ b/vpr/src/base/vpr_constraints_writer.cpp
@@ -67,7 +67,11 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex
 
         auto loc = place_ctx.block_locs[blk_id].loc;
 
-        reg.set_region_rect(loc.x - expand, loc.y - expand, loc.x + expand, loc.y + expand);
+        reg.set_region_rect({loc.x - expand,
+                             loc.y - expand,
+                             loc.x + expand,
+                             loc.y + expand,
+                             loc.layer});
         if (subtile) {
             int st = loc.sub_tile;
             reg.set_sub_tile(st);
@@ -98,6 +102,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int
 
     std::vector<int> vertical_cuts;
 
+    // This function has not been tested for multi-layer grids
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
     int horizontal_interval = device_ctx.grid.width() / horizontal_cutpoints;
     VTR_LOG("Device grid width is %d, horizontal interval is %d\n", device_ctx.grid.width(), horizontal_interval);
 
@@ -138,7 +144,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int
             int ymax = vertical_cuts[j + 1] - 1;
 
             Region reg;
-            reg.set_region_rect(xmin, ymin, xmax, ymax);
+            // This function has not been tested for multi-layer grids. An assertion is used earlier to make sure that the grid has only one layer
+            reg.set_region_rect({xmin, ymin, xmax, ymax, 0});
             std::vector<AtomBlockId> atoms;
 
             region_atoms.insert({reg, atoms});
@@ -176,7 +183,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int
         }
 
         Region current_reg;
-        current_reg.set_region_rect(xminimum, yminimum, xmaximum, ymaximum);
+        // This function has not been tested for multi-layer grids. An assertion is used earlier to make sure that the grid has only one layer
+        current_reg.set_region_rect({xminimum, yminimum, xmaximum, ymaximum, 0});
 
         auto got = region_atoms.find(current_reg);
 
@@ -192,8 +200,9 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int
         Partition part;
         PartitionId partid(num_partitions);
         std::string part_name = "Part" + std::to_string(num_partitions);
-        vtr::Rect<int> rect = region.first.get_region_rect();
-        create_partition(part, part_name, rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax());
+        const auto reg_coord = region.first.get_region_rect();
+        create_partition(part, part_name,
+                         {reg_coord.xmin, reg_coord.ymin, reg_coord.xmax, reg_coord.ymax, reg_coord.layer_num});
         constraints.add_partition(part);
 
         for (unsigned int k = 0; k < region.second.size(); k++) {
@@ -204,11 +213,11 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int
     }
 }
 
-void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax) {
+void create_partition(Partition& part, std::string part_name, const RegionRectCoord& region_cord) {
     part.set_name(part_name);
     PartitionRegion part_pr;
     Region part_region;
-    part_region.set_region_rect(xmin, ymin, xmax, ymax);
+    part_region.set_region_rect(region_cord);
     std::vector<Region> part_regions;
     part_regions.push_back(part_region);
     part_pr.set_partition_region(part_regions);
diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h
index 756f8c17c29..955542be637 100644
--- a/vpr/src/base/vpr_constraints_writer.h
+++ b/vpr/src/base/vpr_constraints_writer.h
@@ -45,6 +45,6 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex
  */
 void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints);
 
-void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax);
+void create_partition(Partition& part, std::string part_name, const RegionRectCoord& region_cord);
 
 #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 7d615e36949..7c83980fcc4 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -369,7 +369,7 @@ struct PlacementContext : public Context {
     vtr::vector_map<ClusterPinId, int> physical_pins;
 
     ///@brief Clustered block associated with each grid location (i.e. inverse of block_locs)
-    vtr::Matrix<t_grid_blocks> grid_blocks; //[0..device_ctx.grid.width()-1][0..device_ctx.grid.width()-1]
+    GridBlock grid_blocks;
 
     ///@brief The pl_macros array stores all the placement macros (usually carry chains).
     std::vector<t_pl_macro> pl_macros;
@@ -379,6 +379,7 @@ struct PlacementContext : public Context {
      *
      * Used to efficiently find logically 'adjacent' blocks of the same
      * block type even though the may be physically far apart
+     * Indexed with logical block type index: [0...num_logical_block_types-1] -> logical block compressed grid
      */
     t_compressed_block_grids compressed_block_grids;
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index b793b361b3d..5db0c4b82be 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -575,10 +575,19 @@ struct t_net_power {
  *        the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1)
  */
 struct t_bb {
-    int xmin = 0;
-    int xmax = 0;
-    int ymin = 0;
-    int ymax = 0;
+    t_bb() = default;
+    t_bb(int xmin_, int xmax_, int ymin_, int ymax_)
+        : xmin(xmin_)
+        , xmax(xmax_)
+        , ymin(ymin_)
+        , ymax(ymax_) {
+        VTR_ASSERT(xmax_ >= xmin_);
+        VTR_ASSERT(ymax_ >= ymin_);
+    }
+    int xmin = OPEN;
+    int xmax = OPEN;
+    int ymin = OPEN;
+    int ymax = OPEN;
 };
 
 /**
@@ -660,22 +669,26 @@ struct hash<t_pl_offset> {
  *
  * x: x-coordinate
  * y: y-coordinate
- * z: z-coordinate (capacity postion)
+ * sub_tile: sub-tile number (capacity position)
+ * layer: layer (die) number
  *
  * @note t_pl_offset should be used to represent an offset between t_pl_loc.
  */
 struct t_pl_loc {
     t_pl_loc() = default;
-    t_pl_loc(int xloc, int yloc, int sub_tile_loc)
+    t_pl_loc(int xloc, int yloc, int sub_tile_loc, int layer_num)
         : x(xloc)
         , y(yloc)
-        , sub_tile(sub_tile_loc) {}
+        , sub_tile(sub_tile_loc)
+        , layer(layer_num) {}
 
     int x = OPEN;
     int y = OPEN;
     int sub_tile = OPEN;
+    int layer = OPEN;
 
     t_pl_loc& operator+=(const t_pl_offset& rhs) {
+        VTR_ASSERT(this->layer != OPEN);
         x += rhs.x;
         y += rhs.y;
         sub_tile += rhs.sub_tile;
@@ -683,6 +696,7 @@ struct t_pl_loc {
     }
 
     t_pl_loc& operator-=(const t_pl_offset& rhs) {
+        VTR_ASSERT(this->layer != OPEN);
         x -= rhs.x;
         y -= rhs.y;
         sub_tile -= rhs.sub_tile;
@@ -706,15 +720,17 @@ struct t_pl_loc {
     }
 
     friend t_pl_offset operator-(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        return t_pl_offset(lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile);
+        VTR_ASSERT(lhs.layer == rhs.layer);
+        return {lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile};
     }
 
     friend bool operator<(const t_pl_loc& lhs, const t_pl_loc& rhs) {
+        VTR_ASSERT(lhs.layer == rhs.layer);
         return std::tie(lhs.x, lhs.y, lhs.sub_tile) < std::tie(rhs.x, rhs.y, rhs.sub_tile);
     }
 
     friend bool operator==(const t_pl_loc& lhs, const t_pl_loc& rhs) {
-        return std::tie(lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.x, rhs.y, rhs.sub_tile);
+        return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile);
     }
 
     friend bool operator!=(const t_pl_loc& lhs, const t_pl_loc& rhs) {
@@ -778,6 +794,50 @@ struct t_grid_blocks {
     }
 };
 
+class GridBlock {
+  public:
+    GridBlock() = default;
+
+    GridBlock(size_t width, size_t height, size_t layers) {
+        grid_blocks_.resize({layers, width, height});
+    }
+
+    inline void initialized_grid_block_at_location(const t_physical_tile_loc& loc, int num_sub_tiles) {
+        grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.resize(num_sub_tiles, EMPTY_BLOCK_ID);
+    }
+
+    inline void set_block_at_location(const t_pl_loc& loc, ClusterBlockId blk_id) {
+        grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile] = blk_id;
+    }
+
+    inline ClusterBlockId block_at_location(const t_pl_loc& loc) const {
+        return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile];
+    }
+
+    inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const {
+        return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size();
+    }
+
+    inline int set_usage(const t_physical_tile_loc loc, int usage) {
+        return grid_blocks_[loc.layer_num][loc.x][loc.y].usage = usage;
+    }
+
+    inline int get_usage(const t_physical_tile_loc loc) const {
+        return grid_blocks_[loc.layer_num][loc.x][loc.y].usage;
+    }
+
+    inline bool is_sub_tile_empty(const t_physical_tile_loc loc, int sub_tile) const {
+        return grid_blocks_[loc.layer_num][loc.x][loc.y].subtile_empty(sub_tile);
+    }
+
+    inline void clear() {
+        grid_blocks_.clear();
+    }
+
+  private:
+    vtr::NdMatrix<t_grid_blocks, 3> grid_blocks_;
+};
+
 ///@brief Names of various files
 struct t_file_name_opts {
     std::string ArchFile;
diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp
index ca89925be94..e8568639986 100644
--- a/vpr/src/draw/draw.cpp
+++ b/vpr/src/draw/draw.cpp
@@ -985,20 +985,24 @@ static void highlight_blocks(double x, double y) {
     /// determine block ///
     ezgl::rectangle clb_bbox;
 
+    //TODO: Change when graphics supports 3D FPGAs
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
+    int layer_num = 0;
     // iterate over grid x
-    for (size_t i = 0; i < device_ctx.grid.width(); ++i) {
+    for (int i = 0; i < (int)device_ctx.grid.width(); ++i) {
         if (draw_coords->tile_x[i] > x) {
             break; // we've gone to far in the x direction
         }
         // iterate over grid y
-        for (size_t j = 0; j < device_ctx.grid.height(); ++j) {
+        for (int j = 0; j < (int)device_ctx.grid.height(); ++j) {
             if (draw_coords->tile_y[j] > y) {
                 break; // we've gone to far in the y direction
             }
             // iterate over sub_blocks
-            const auto& type = device_ctx.grid.get_physical_type(i, j);
+            const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num});
             for (int k = 0; k < type->capacity; ++k) {
-                clb_index = place_ctx.grid_blocks[i][j].blocks[k];
+                // TODO: Change when graphics supports 3D
+                clb_index = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
                 if (clb_index != EMPTY_BLOCK_ID) {
                     clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index,
                                                                   cluster_ctx.clb_nlist.block_type(clb_index));
@@ -1371,6 +1375,8 @@ bool highlight_loc_with_specific_color(int x, int y, ezgl::color& loc_color) {
     t_pl_loc curr_loc;
     curr_loc.x = x;
     curr_loc.y = y;
+    //TODO: Graphic currently doesn't support 3D FPGAs
+    curr_loc.layer = 0;
 
     //search for the current location in the vector of colored locations
     auto it = std::find_if(draw_state->colored_locations.begin(),
diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp
index 7beb012d3d2..75802edf8c5 100644
--- a/vpr/src/draw/draw_basic.cpp
+++ b/vpr/src/draw/draw_basic.cpp
@@ -108,13 +108,17 @@ void drawplace(ezgl::renderer* g) {
     ClusterBlockId bnum;
     int num_sub_tiles;
 
+    //TODO: Change when graphics supports 3D FPGAs
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
+    int layer_num = 0;
+
     g->set_line_width(0);
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
+    for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+        for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
             /* Only the first block of a group should control drawing */
-            const auto& type = device_ctx.grid.get_physical_type(i, j);
-            int width_offset = device_ctx.grid.get_width_offset(i, j);
-            int height_offset = device_ctx.grid.get_height_offset(i, j);
+            const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num});
+            int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num});
+            int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num});
 
             if (width_offset > 0
                 || height_offset > 0)
@@ -128,7 +132,8 @@ void drawplace(ezgl::renderer* g) {
 
             for (int k = 0; k < num_sub_tiles; ++k) {
                 /* Look at the tile at start of large block */
-                bnum = place_ctx.grid_blocks[i][j].blocks[k];
+                //TODO: Change when graphics supports 3D
+                bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0});
                 /* Fill background for the clb. Do not fill if "show_blk_internal"
                  * is toggled.
                  */
@@ -161,7 +166,10 @@ void drawplace(ezgl::renderer* g) {
 
                 g->set_color(block_color);
                 /* Get coords of current sub_tile */
-                ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(i, j, k,
+                ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(layer_num,
+                                                                                  i,
+                                                                                  j,
+                                                                                  k,
                                                                                   logical_block_type);
                 ezgl::point2d center = abs_clb_bbox.center();
 
diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp
index 24f1f94679b..126bbd63212 100644
--- a/vpr/src/draw/draw_floorplanning.cpp
+++ b/vpr/src/draw/draw_floorplanning.cpp
@@ -100,12 +100,17 @@ static void highlight_partition(ezgl::renderer* g, int partitionID, int alpha) {
     // the on screen units for ezgl to use.
 
     for (int region = 0; (size_t)region < regions.size(); region++) {
-        auto tile_rect = regions[region].get_region_rect();
-
-        ezgl::rectangle top_right = draw_coords->get_absolute_clb_bbox(tile_rect.xmax(),
-                                                                       tile_rect.ymax(), 0);
-        ezgl::rectangle bottom_left = draw_coords->get_absolute_clb_bbox(tile_rect.xmin(),
-                                                                         tile_rect.ymin(), 0);
+        const auto reg_coord = regions[region].get_region_rect();
+
+        //TODO: 0 should be replaced with the actual z value of the region when graph is 3D
+        ezgl::rectangle top_right = draw_coords->get_absolute_clb_bbox(reg_coord.layer_num,
+                                                                       reg_coord.xmax,
+                                                                       reg_coord.ymax,
+                                                                       0);
+        ezgl::rectangle bottom_left = draw_coords->get_absolute_clb_bbox(reg_coord.layer_num,
+                                                                         reg_coord.xmin,
+                                                                         reg_coord.ymin,
+                                                                         0);
 
         ezgl::rectangle on_screen_rect(bottom_left.bottom_left(), top_right.top_right());
 
diff --git a/vpr/src/draw/draw_noc.cpp b/vpr/src/draw/draw_noc.cpp
index 700f82a132c..771be541034 100644
--- a/vpr/src/draw/draw_noc.cpp
+++ b/vpr/src/draw/draw_noc.cpp
@@ -37,8 +37,9 @@ void draw_noc(ezgl::renderer* g) {
     // check that the NoC tile has a capacity greater than 0 (can we assume it always will?) and if not then we cant draw anythign as the NoC tile wont be drawn
     /* since the vector of routers all have a reference positions on the grid to the corresponding physical tile, just use the first router in the vector and get its position, then use this to get the capcity of a noc router tile
      */
-    const auto& type = device_ctx.grid.get_physical_type(router_list.begin()->get_router_grid_position_x(),
-                                                         router_list.begin()->get_router_grid_position_y());
+    const auto& type = device_ctx.grid.get_physical_type({router_list.begin()->get_router_grid_position_x(),
+                                                          router_list.begin()->get_router_grid_position_y(),
+                                                          router_list.begin()->get_router_layer_position()});
     int num_subtiles = type->capacity;
 
     if (num_subtiles == 0) {
@@ -230,10 +231,12 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic
     NocRouterId sink_router;
 
     // source router grid coordinates
+    int source_router_layer_position = 0;
     int source_router_x_position = 0;
     int source_router_y_position = 0;
 
     // sink router grid coordinates
+    int sink_router_layer_position = 0;
     int sink_router_x_position = 0;
     int sink_router_y_position = 0;
 
@@ -262,16 +265,18 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic
         sink_router = noc_link_list[link_id].get_sink_router();
 
         // calculate the grid positions of the source and sink routers
+        source_router_layer_position = router_list[source_router].get_router_layer_position();
         source_router_x_position = router_list[source_router].get_router_grid_position_x();
         source_router_y_position = router_list[source_router].get_router_grid_position_y();
 
+        sink_router_layer_position = router_list[sink_router].get_router_layer_position();
         sink_router_x_position = router_list[sink_router].get_router_grid_position_x();
         sink_router_y_position = router_list[sink_router].get_router_grid_position_y();
 
         // get the initial drawing coordinates of the noc link
         // it will be drawn from the center of two routers it connects
-        link_coords.start = draw_coords->get_absolute_clb_bbox(source_router_x_position, source_router_y_position, 0, noc_router_logical_block_type).center();
-        link_coords.end = draw_coords->get_absolute_clb_bbox(sink_router_x_position, sink_router_y_position, 0, noc_router_logical_block_type).center();
+        link_coords.start = draw_coords->get_absolute_clb_bbox(source_router_layer_position, source_router_x_position, source_router_y_position, 0, noc_router_logical_block_type).center();
+        link_coords.end = draw_coords->get_absolute_clb_bbox(sink_router_layer_position, sink_router_x_position, sink_router_y_position, 0, noc_router_logical_block_type).center();
 
         // determine the current noc link type
         link_type = determine_noc_link_type(link_coords.start, link_coords.end);
diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp
index 0e634421568..c4a4cde6278 100644
--- a/vpr/src/draw/draw_rr.cpp
+++ b/vpr/src/draw/draw_rr.cpp
@@ -592,7 +592,9 @@ void draw_get_rr_src_sink_coords(const t_rr_node& node, float* xcen, float* ycen
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
     RRNodeId rr_node = node.id();
-    t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node));
+    t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node),
+                                                                            rr_graph.node_ylow(rr_node),
+                                                                            rr_graph.node_layer(rr_node)});
 
     //Number of classes (i.e. src/sinks) we need to draw
     float num_class = tile_type->class_inf.size();
@@ -678,6 +680,10 @@ int draw_check_rr_node_hit(float click_x, float click_y) {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
 
+    //TODO: Change when graphics supports 3D FPGAs
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
+    int layer_num = 0;
+
     for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) {
         size_t inode = (size_t)rr_id;
         switch (rr_graph.node_type(rr_id)) {
@@ -685,9 +691,9 @@ int draw_check_rr_node_hit(float click_x, float click_y) {
             case OPIN: {
                 int i = rr_graph.node_xlow(rr_id);
                 int j = rr_graph.node_ylow(rr_id);
-                t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(i, j);
-                int width_offset = device_ctx.grid.get_width_offset(i, j);
-                int height_offset = device_ctx.grid.get_height_offset(i, j);
+                t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num});
+                int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num});
+                int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num});
                 int ipin = rr_graph.node_pin_num(rr_id);
                 float xcen, ycen;
                 for (const e_side& iside : SIDES) {
@@ -852,12 +858,13 @@ void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, con
 
     i = rr_graph.node_xlow(rr_node);
     j = rr_graph.node_ylow(rr_node);
+    int layer_num = rr_graph.node_layer(rr_node);
 
     xc = draw_coords->tile_x[i];
     yc = draw_coords->tile_y[j];
 
     ipin = rr_graph.node_pin_num(rr_node);
-    type = device_ctx.grid.get_physical_type(i, j);
+    type = device_ctx.grid.get_physical_type({i, j, layer_num});
     pins_per_sub_tile = type->num_pins / type->capacity;
     k = ipin / pins_per_sub_tile;
 
diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp
index 6c1b5bfc4c4..6ff00263676 100644
--- a/vpr/src/draw/draw_rr_edges.cpp
+++ b/vpr/src/draw/draw_rr_edges.cpp
@@ -412,10 +412,15 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) {
     auto pin_rr = RRNodeId(pin_node);
     auto chan_rr = RRNodeId(chan_node);
 
-    const auto& grid_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr));
-    int width_offset = device_ctx.grid.get_width_offset(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr));
-    int height_offset = device_ctx.grid.get_height_offset(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr));
-    ;
+    const auto& grid_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(pin_rr),
+                                                               rr_graph.node_ylow(pin_rr),
+                                                               rr_graph.node_layer(pin_rr)});
+    int width_offset = device_ctx.grid.get_width_offset({rr_graph.node_xlow(pin_rr),
+                                                         rr_graph.node_ylow(pin_rr),
+                                                         rr_graph.node_layer(pin_rr)});
+    int height_offset = device_ctx.grid.get_height_offset({rr_graph.node_xlow(pin_rr),
+                                                           rr_graph.node_ylow(pin_rr),
+                                                           rr_graph.node_layer(pin_rr)});
 
     float x1 = 0, y1 = 0;
     /* If there is only one side, no need for the following inference!!!
diff --git a/vpr/src/draw/draw_types.cpp b/vpr/src/draw/draw_types.cpp
index 428dc4f9c5e..d1532564938 100644
--- a/vpr/src/draw/draw_types.cpp
+++ b/vpr/src/draw/draw_types.cpp
@@ -79,10 +79,15 @@ float t_draw_coords::get_tile_height() {
 ezgl::rectangle t_draw_coords::get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode) {
     auto& place_ctx = g_vpr_ctx.placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    return get_pb_bbox(place_ctx.block_locs[clb_index].loc.x, place_ctx.block_locs[clb_index].loc.y, place_ctx.block_locs[clb_index].loc.sub_tile, cluster_ctx.clb_nlist.block_type(clb_index), pb_gnode);
+    return get_pb_bbox(place_ctx.block_locs[clb_index].loc.layer,
+                       place_ctx.block_locs[clb_index].loc.x,
+                       place_ctx.block_locs[clb_index].loc.y,
+                       place_ctx.block_locs[clb_index].loc.sub_tile,
+                       cluster_ctx.clb_nlist.block_type(clb_index),
+                       pb_gnode);
 }
 
-ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type, const t_pb_graph_node& pb_gnode) {
+ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type, const t_pb_graph_node& pb_gnode) {
     auto& device_ctx = g_vpr_ctx.device();
     t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index);
 
@@ -90,7 +95,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block
 
     // if getting clb bbox, apply location info.
     if (pb_gnode.is_root()) {
-        const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y);
+        const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer});
         float sub_blk_offset = this->tile_width * (sub_block_index / (float)type->capacity);
 
         result += ezgl::point2d(this->tile_x[grid_x], this->tile_y[grid_y]);
@@ -101,7 +106,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block
     return result;
 }
 
-ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) {
+ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) {
     auto& device_ctx = g_vpr_ctx.device();
     t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index);
 
@@ -110,7 +115,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block
 
     // if getting clb bbox, apply location info.
     if (pb_gnode.is_root()) {
-        const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y);
+        const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer});
         float sub_blk_offset = this->tile_width * (sub_block_index / (float)type->capacity);
 
         result += ezgl::point2d(this->tile_x[grid_x], this->tile_y[grid_y]);
@@ -139,17 +144,17 @@ ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(const ClusterBlockId clb_in
     auto& place_ctx = g_vpr_ctx.placement();
 
     t_pl_loc loc = place_ctx.block_locs[clb_index].loc;
-    return get_pb_bbox(loc.x, loc.y, loc.sub_tile, block_type);
+    return get_pb_bbox(loc.layer, loc.x, loc.y, loc.sub_tile, block_type);
 }
 
-ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index) {
+ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index) {
     auto& device_ctx = g_vpr_ctx.device();
-    const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y);
-    return get_pb_bbox(grid_x, grid_y, sub_block_index, pick_logical_type(type));
+    const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer});
+    return get_pb_bbox(grid_layer, grid_x, grid_y, sub_block_index, pick_logical_type(type));
 }
 
-ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) {
-    return get_pb_bbox(grid_x, grid_y, sub_block_index, logical_block_type);
+ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) {
+    return get_pb_bbox(grid_layer, grid_x, grid_y, sub_block_index, logical_block_type);
 }
 
 #endif // NO_GRAPHICS
diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h
index 5ccc7e80a71..857519ba25f 100644
--- a/vpr/src/draw/draw_types.h
+++ b/vpr/src/draw/draw_types.h
@@ -349,10 +349,10 @@ struct t_draw_coords {
     ezgl::rectangle get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode);
 
     ///@brief returns bounding box of sub block at given location of given type w. given pb
-    ezgl::rectangle get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type, const t_pb_graph_node& pb_gnode);
+    ezgl::rectangle get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type, const t_pb_graph_node& pb_gnode);
 
     ///@brief returns pb of sub block of given idx/given type at location
-    ezgl::rectangle get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type);
+    ezgl::rectangle get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type);
 
     /**
      * @brief returns a bounding box for the given pb in the given
@@ -367,13 +367,13 @@ struct t_draw_coords {
      * @brief Returns a bounding box for the clb at device_ctx.grid[grid_x][grid_y].blocks[sub_block_index],
      * even if it is empty.
      */
-    ezgl::rectangle get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index);
+    ezgl::rectangle get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index);
 
     /**
      * @brief Returns a bounding box for the clb at device_ctx.grid[grid_x][grid_y].blocks[sub_block_index],
      * of given type even if it is empty.
      */
-    ezgl::rectangle get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr block_type);
+    ezgl::rectangle get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr block_type);
 
   private:
     float tile_width;
diff --git a/vpr/src/draw/intra_logic_block.cpp b/vpr/src/draw/intra_logic_block.cpp
index 66bfc8bd640..285ade3c027 100644
--- a/vpr/src/draw/intra_logic_block.cpp
+++ b/vpr/src/draw/intra_logic_block.cpp
@@ -154,12 +154,15 @@ void draw_internal_draw_subblk(ezgl::renderer* g) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
 
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
+    //TODO: Change when graphics supports 3D FPGAs
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
+    int layer_num = 0;
+    for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+        for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
             /* Only the first block of a group should control drawing */
-            const auto& type = device_ctx.grid.get_physical_type(i, j);
-            int width_offset = device_ctx.grid.get_width_offset(i, j);
-            int height_offset = device_ctx.grid.get_height_offset(i, j);
+            const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num});
+            int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num});
+            int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num});
 
             if (width_offset > 0 || height_offset > 0)
                 continue;
@@ -171,11 +174,13 @@ void draw_internal_draw_subblk(ezgl::renderer* g) {
             int num_sub_tiles = type->capacity;
             for (int k = 0; k < num_sub_tiles; ++k) {
                 /* Don't draw if block is empty. */
-                if (place_ctx.grid_blocks[i][j].blocks[k] == EMPTY_BLOCK_ID || place_ctx.grid_blocks[i][j].blocks[k] == INVALID_BLOCK_ID)
+                // TODO: Change when graphics supports 3D
+                if (place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == EMPTY_BLOCK_ID || place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == INVALID_BLOCK_ID)
                     continue;
 
                 /* Get block ID */
-                ClusterBlockId bnum = place_ctx.grid_blocks[i][j].blocks[k];
+                // TODO: Change when graphics supports 3D
+                ClusterBlockId bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0});
                 /* Safety check, that physical blocks exists in the CLB */
                 if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr)
                     continue;
@@ -282,8 +287,9 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node
     double left, bot, right, top;
 
     int capacity = device_ctx.physical_tile_types[type_descrip_index].capacity;
-    const auto& type = device_ctx.grid.get_physical_type(1, 0);
-    if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && place_ctx.grid_blocks[1][0].usage != 0
+    // TODO: this is a hack - should be fixed for the layer_num
+    const auto& type = device_ctx.grid.get_physical_type({1, 0, 0});
+    if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && place_ctx.grid_blocks.get_usage({1, 0, 0}) != 0
         && type_descrip_index == type->index) {
         // that should test for io blocks, and setting capacity_divisor > 1
         // will squish every thing down
diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp
index 554334f63cd..f2005c2bc6c 100644
--- a/vpr/src/draw/manual_moves.cpp
+++ b/vpr/src/draw/manual_moves.cpp
@@ -115,7 +115,8 @@ void calculate_cost_callback(GtkWidget* /*widget*/, GtkWidget* grid) {
         valid_input = false;
     }
 
-    t_pl_loc to = t_pl_loc(x_location, y_location, subtile_location);
+    // TODO: When graphic is updated to support 3D, this will need to be updated
+    t_pl_loc to = t_pl_loc(x_location, y_location, subtile_location, 0);
     valid_input = is_manual_move_legal(ClusterBlockId(block_id), to);
 
     if (valid_input) {
@@ -160,7 +161,7 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) {
     }
 
     //If the block s not compatible
-    auto physical_tile = device_ctx.grid.get_physical_type(to.x, to.y);
+    auto physical_tile = device_ctx.grid.get_physical_type({to.x, to.y, to.layer});
     auto logical_block = cluster_ctx.clb_nlist.block_type(block_id);
     if (to.sub_tile < 0 || to.sub_tile >= physical_tile->capacity || !is_sub_tile_compatible(physical_tile, logical_block, to.sub_tile)) {
         invalid_breakpoint_entry_window("Blocks are not compatible");
@@ -168,7 +169,7 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) {
     }
 
     //If the destination block is user constrained, abort this swap
-    auto b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+    auto b_to = place_ctx.grid_blocks.block_at_location(to);
     if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) {
         if (place_ctx.block_locs[b_to].is_fixed) {
             invalid_breakpoint_entry_window("Block is fixed");
diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp
index 2c3c6627ffa..361728a904c 100644
--- a/vpr/src/draw/search_bar.cpp
+++ b/vpr/src/draw/search_bar.cpp
@@ -241,9 +241,10 @@ void auto_zoom_rr_node(int rr_node_id) {
         case OPIN: {
             int i = rr_graph.node_xlow(RRNodeId(rr_node_id));
             int j = rr_graph.node_ylow(RRNodeId(rr_node_id));
-            t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(i, j);
-            int width_offset = device_ctx.grid.get_width_offset(i, j);
-            int height_offset = device_ctx.grid.get_height_offset(i, j);
+            int layer_num = rr_graph.node_layer(RRNodeId(rr_node_id));
+            t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num});
+            int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num});
+            int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num});
             int ipin = rr_graph.node_ptc_num(RRNodeId(rr_node_id));
             float xcen, ycen;
 
diff --git a/vpr/src/noc/noc_router.cpp b/vpr/src/noc/noc_router.cpp
index 74eb32edb26..5ea2c05c1b9 100644
--- a/vpr/src/noc/noc_router.cpp
+++ b/vpr/src/noc/noc_router.cpp
@@ -1,10 +1,11 @@
 #include "noc_router.h"
 
 // constructor
-NocRouter::NocRouter(int id, int grid_position_x, int grid_position_y)
+NocRouter::NocRouter(int id, int grid_position_x, int grid_position_y, int layer_position)
     : router_user_id(id)
     , router_grid_position_x(grid_position_x)
-    , router_grid_position_y(grid_position_y) {
+    , router_grid_position_y(grid_position_y)
+    , router_layer_position(layer_position) {
     // initialize variables
     router_block_ref = ClusterBlockId(0);
 }
@@ -22,6 +23,10 @@ int NocRouter::get_router_grid_position_y(void) const {
     return router_grid_position_y;
 }
 
+int NocRouter::get_router_layer_position(void) const {
+    return router_layer_position;
+}
+
 ClusterBlockId NocRouter::get_router_block_ref(void) const {
     return router_block_ref;
 }
diff --git a/vpr/src/noc/noc_router.h b/vpr/src/noc/noc_router.h
index 337dabc7921..a48c64cb971 100644
--- a/vpr/src/noc/noc_router.h
+++ b/vpr/src/noc/noc_router.h
@@ -45,11 +45,12 @@ class NocRouter {
     // device position of the physical router tile
     int router_grid_position_x; /*<! Represents the horizontal grid position on the device the physical router tile is located*/
     int router_grid_position_y; /*<! Represents the vertical grid position on the device the physical router is located*/
+    int router_layer_position;  /*<! Represents the layer number of the die that the physical router is located*/
 
     ClusterBlockId router_block_ref; /*<! A unique identifier that represents a router block in the clustered netlist that is placed on the physical router*/
 
   public:
-    NocRouter(int id, int grid_position_x, int grid_position_y);
+    NocRouter(int id, int grid_position_x, int grid_position_y, int layer_position);
 
     // getters
 
@@ -71,6 +72,12 @@ class NocRouter {
      */
     int get_router_grid_position_y(void) const;
 
+    /**
+     * @brief Gets the layer number of the die the the physical router is located
+     * @return A numerical value (integer) that represents layer position of the physical router
+     */
+    int get_router_layer_position(void) const;
+
     /**
      * @brief Gets the unique id of the router block that is current placed on the physical router
      * @return A ClusterBlockId that identifies a router block in the clustered netlist
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index cb5382cd13e..5c08c79ef2b 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -58,7 +58,9 @@ NocLink& NocStorage::get_single_mutable_noc_link(NocLinkId id) {
 
 NocRouterId NocStorage::get_router_at_grid_location(const t_pl_loc& hard_router_location) const {
     // get the key to identify the corresponding hard router block at the provided grid location
-    int router_key = generate_router_key_from_grid_location(hard_router_location.x, hard_router_location.y);
+    int router_key = generate_router_key_from_grid_location(hard_router_location.x,
+                                                            hard_router_location.y,
+                                                            hard_router_location.layer);
 
     // get the hard router block id at the given grid location
     auto hard_router_block = grid_location_to_router_id.find(router_key);
@@ -70,10 +72,10 @@ NocRouterId NocStorage::get_router_at_grid_location(const t_pl_loc& hard_router_
 
 // setters for the NoC
 
-void NocStorage::add_router(int id, int grid_position_x, int grid_position_y) {
+void NocStorage::add_router(int id, int grid_position_x, int grid_posistion_y, int layer_position) {
     VTR_ASSERT_MSG(!built_noc, "NoC already built, cannot modify further.");
 
-    router_storage.emplace_back(id, grid_position_x, grid_position_y);
+    router_storage.emplace_back(id, grid_position_x, grid_posistion_y, layer_position);
 
     /* Get the corresponding NocRouterId for the newly added router and
      * add it to the conversion table.
@@ -86,7 +88,7 @@ void NocStorage::add_router(int id, int grid_position_x, int grid_position_y) {
 
     /* need to associate the current router with its grid position */
     // get the key to identify the current router
-    int router_key = generate_router_key_from_grid_location(grid_position_x, grid_position_y);
+    int router_key = generate_router_key_from_grid_location(grid_position_x, grid_posistion_y, layer_position);
     grid_location_to_router_id.insert(std::pair<int, NocRouterId>(router_key, converted_id));
 
     return;
@@ -123,6 +125,12 @@ void NocStorage::set_device_grid_width(int grid_width) {
     return;
 }
 
+void NocStorage::set_device_grid_spec(int grid_width, int grid_height) {
+    device_grid_width = grid_width;
+    num_layer_blocks = grid_width * grid_height;
+    return;
+}
+
 bool NocStorage::remove_link(NocRouterId src_router_id, NocRouterId sink_router_id) {
     // This status variable is used to report externally whether the link was removed or not
     bool link_removed_status = false;
@@ -221,9 +229,9 @@ NocLinkId NocStorage::get_parallel_link(NocLinkId current_link) const {
     return parallel_link;
 }
 
-int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y) const {
+int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const {
     // calculate the key value
-    return (device_grid_width * grid_position_y + grid_position_x);
+    return (num_layer_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x);
 }
 
 void NocStorage::echo_noc(char* file_name) const {
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index c1d1e025af0..71e95b8f838 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -138,6 +138,7 @@ class NocStorage {
      * 
      */
     int device_grid_width;
+    int num_layer_blocks;
 
     // prevent "copying" of this object
     NocStorage(const NocStorage&) = delete;
@@ -288,7 +289,7 @@ class NocStorage {
      * @param grid_position_y The vertical position on the FPGA of the physical
      * tile that this router represents.
      */
-    void add_router(int id, int grid_position_x, int grid_position_y);
+    void add_router(int id, int grid_position_x, int grid_position_y, int layer_poisition);
 
     /**
      * @brief Creates a new link and adds it to the NoC. The newly created
@@ -336,7 +337,9 @@ class NocStorage {
 
     void set_device_grid_width(int grid_width);
 
-    // general utility functions
+    void set_device_grid_spec(int grid_width, int grid_height);
+
+    // general utiliy functions
     /**
      * @brief The link is removed from the outgoing vector of links for
      * the source router. The link is not removed from the vector of all
@@ -433,12 +436,17 @@ class NocStorage {
      * 
      * @param grid_position_x The horizontal position on the FPGA of the physical
      * tile that this router represents.
-     * @param grid_position_y The vertical position on the FPGA of the physical
-     * tile that this router represents. 
+     * 
+     * @param grid_position_y The vertical position on the FPGA of the phyical
+     * tile that this router represents.
+     * 
+     * @param layer_position The layer number of the phyical
+     * tile that this router represents.
+     *  
      * @return int Represents a unique key that can be used to identify a
      * hard router block.
      */
-    int generate_router_key_from_grid_location(int grid_position_x, int grid_position_y) const;
+    int generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const;
 
     /**
      * @brief Writes out the NocStorage class information to a file.
diff --git a/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp b/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp
index 7a20109d041..b785d2c4da6 100644
--- a/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp
+++ b/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp
@@ -252,7 +252,9 @@ t_physical_tile_type_ptr get_physical_type_of_noc_router_tile(const DeviceContex
     VTR_ASSERT(physical_noc_router != noc_ctx.noc_model.get_noc_routers().end());
 
     //Using the routers grid position go to the device and identify the physical type of the tile located there.
-    return device_ctx.grid.get_physical_type(physical_noc_router->get_router_grid_position_x(), physical_noc_router->get_router_grid_position_y());
+    return device_ctx.grid.get_physical_type({physical_noc_router->get_router_grid_position_x(),
+                                              physical_noc_router->get_router_grid_position_y(),
+                                              physical_noc_router->get_router_layer_position()});
 }
 
 bool check_that_all_router_blocks_have_an_associated_traffic_flow(NocContext& noc_ctx, t_physical_tile_type_ptr noc_router_tile_type, std::string noc_flows_file) {
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 8bc8e87923d..0e12305dc70 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -2078,9 +2078,9 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
                              int rhs_num_instances = 0;
                              // Count number of instances for each type
                              for (auto type : lhs->equivalent_tiles)
-                                 lhs_num_instances += device_ctx.grid.num_instances(type);
+                                 lhs_num_instances += device_ctx.grid.num_instances(type, -1);
                              for (auto type : rhs->equivalent_tiles)
-                                 rhs_num_instances += device_ctx.grid.num_instances(type);
+                                 rhs_num_instances += device_ctx.grid.num_instances(type, -1);
 
                              float lhs_util = vtr::safe_ratio<float>(num_used_type_instances[lhs], lhs_num_instances);
                              float rhs_util = vtr::safe_ratio<float>(num_used_type_instances[rhs], rhs_num_instances);
@@ -2179,7 +2179,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats,
     // Check used type instances against the possible equivalent physical locations
     unsigned int num_instances = 0;
     for (auto equivalent_tile : block_type->equivalent_tiles) {
-        num_instances += device_ctx.grid.num_instances(equivalent_tile);
+        num_instances += device_ctx.grid.num_instances(equivalent_tile, -1);
     }
 
     if (num_used_type_instances[block_type] > num_instances) {
diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp
index 77e612cc8b3..f75823aefab 100644
--- a/vpr/src/pack/constraints_report.cpp
+++ b/vpr/src/pack/constraints_report.cpp
@@ -42,7 +42,7 @@ bool floorplan_constraints_regions_overfull() {
     bool floorplan_regions_overfull = false;
 
     for (auto& region_info : regions_count_info) {
-        vtr::Rect<int> rect = region_info.first.get_region_rect();
+        const auto rect = region_info.first.get_region_rect();
         for (unsigned int j = 0; j < block_types.size(); j++) {
             int num_assigned_blocks = region_info.second[j];
             int num_tiles = 0;
@@ -50,7 +50,7 @@ bool floorplan_constraints_regions_overfull() {
             if (num_assigned_blocks > num_tiles) {
                 floorplan_regions_overfull = true;
                 floorplanning_ctx.overfull_regions.push_back(region_info.first);
-                VTR_LOG("\n \nRegion (%d, %d) to (%d, %d) st %d \n", rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax(), region_info.first.get_sub_tile());
+                VTR_LOG("\n \nRegion (%d, %d) to (%d, %d) st %d \n", rect.xmin, rect.ymin, rect.xmax, rect.ymax, region_info.first.get_sub_tile());
                 VTR_LOG("Assigned %d blocks of type %s, but only has %d tiles of that type\n", num_assigned_blocks, block_types[j].name, num_tiles);
             }
         }
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index a1868c80778..252dc37a98d 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -237,7 +237,7 @@ bool try_pack(t_packer_opts* packer_opts,
 
                 int num_instances = 0;
                 for (auto type : iter->first->equivalent_tiles)
-                    num_instances += grid.num_instances(type);
+                    num_instances += grid.num_instances(type, -1);
 
                 resource_avail += std::string(iter->first->name) + ": " + std::to_string(num_instances);
             }
@@ -369,7 +369,7 @@ static bool try_size_device_grid(const t_arch& arch, const std::map<t_logical_bl
 
         float num_total_instances = 0.;
         for (const auto& equivalent_tile : type.equivalent_tiles) {
-            num_total_instances += device_ctx.grid.num_instances(equivalent_tile);
+            num_total_instances += device_ctx.grid.num_instances(equivalent_tile, -1);
         }
 
         if (num_total_instances != 0) {
diff --git a/vpr/src/pack/post_routing_pb_pin_fixup.cpp b/vpr/src/pack/post_routing_pb_pin_fixup.cpp
index a86bd655316..ceb9263e12b 100644
--- a/vpr/src/pack/post_routing_pb_pin_fixup.cpp
+++ b/vpr/src/pack/post_routing_pb_pin_fixup.cpp
@@ -52,16 +52,19 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
                                                          const DeviceContext& device_ctx,
                                                          ClusteringContext& clustering_ctx,
                                                          const vtr::vector<RRNodeId, ParentNetId>& rr_node_nets,
-                                                         const vtr::Point<size_t>& grid_coord,
+                                                         const t_pl_loc& grid_coord,
                                                          const ClusterBlockId& blk_id,
-                                                         const int& sub_tile_z,
                                                          size_t& num_mismatches,
                                                          const bool& verbose,
                                                          bool is_flat) {
+    const int sub_tile_z = grid_coord.sub_tile;
+    const int coord_x = grid_coord.x;
+    const int coord_y = grid_coord.y;
+    const int coord_layer = grid_coord.layer;
     const auto& node_lookup = device_ctx.rr_graph.node_lookup();
     /* Handle each pin */
     auto logical_block = clustering_ctx.clb_nlist.block_type(blk_id);
-    auto physical_tile = device_ctx.grid.get_physical_type(grid_coord.x(), grid_coord.y());
+    auto physical_tile = device_ctx.grid.get_physical_type({coord_x, coord_y, coord_layer});
 
     /* Narrow down side search for grids
      *   The wanted side depends on the location of the grid.
@@ -87,16 +90,16 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
      *   -------------------------------------------------------
      */
     std::vector<e_side> wanted_sides;
-    if (device_ctx.grid.height() - 1 == grid_coord.y()) { /* TOP side */
+    if ((int)device_ctx.grid.height() - 1 == coord_y) { /* TOP side */
         wanted_sides.push_back(BOTTOM);
     }
-    if (device_ctx.grid.width() - 1 == grid_coord.x()) { /* RIGHT side */
+    if ((int)device_ctx.grid.width() - 1 == coord_x) { /* RIGHT side */
         wanted_sides.push_back(LEFT);
     }
-    if (0 == grid_coord.y()) { /* BOTTOM side */
+    if (0 == coord_y) { /* BOTTOM side */
         wanted_sides.push_back(TOP);
     }
-    if (0 == grid_coord.x()) { /* LEFT side */
+    if (0 == coord_x) { /* LEFT side */
         wanted_sides.push_back(RIGHT);
     }
 
@@ -155,7 +158,7 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
         short valid_routing_net_cnt = 0;
         for (const e_side& pin_side : pin_sides) {
             /* Find the net mapped to this pin in routing results */
-            RRNodeId rr_node = node_lookup.find_node(grid_coord.x(), grid_coord.y(), rr_node_type, physical_pin, pin_side);
+            RRNodeId rr_node = node_lookup.find_node(coord_layer, coord_x, coord_y, rr_node_type, physical_pin, pin_side);
 
             /* Bypass invalid nodes, after that we must have a valid rr_node id */
             if (!rr_node) {
@@ -238,13 +241,14 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li
         }
 
         VTR_LOGV(verbose,
-                 "Fixed up net '%s' mapping mismatch at clustered block '%s' pin 'grid[%ld][%ld].%s.%s[%d]' (was net '%s')\n",
+                 "Fixed up net '%s' mapping mismatch at clustered block '%s' pin 'grid[%ld][%ld].%s.%s[%d] - layer %d' (was net '%s')\n",
                  routing_net_name.c_str(),
                  clustering_ctx.clb_nlist.block_pb(blk_id)->name,
-                 grid_coord.x(), grid_coord.y(),
+                 coord_x, coord_y,
                  clustering_ctx.clb_nlist.block_pb(blk_id)->pb_graph_node->pb_type->name,
                  get_pb_graph_node_pin_from_block_pin(blk_id, physical_pin)->port->name,
                  get_pb_graph_node_pin_from_block_pin(blk_id, physical_pin)->pin_number,
+                 coord_layer,
                  cluster_net_name.c_str());
 
         /* Update counter */
@@ -1079,8 +1083,6 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
             clb_blk_id = convert_to_cluster_block_id(blk_id);
         }
         VTR_ASSERT(clb_blk_id != ClusterBlockId::INVALID());
-        vtr::Point<size_t> grid_coord(placement_ctx.block_locs[clb_blk_id].loc.x,
-                                      placement_ctx.block_locs[clb_blk_id].loc.y);
 
         if (seen_block_ids.insert(clb_blk_id).second) {
             update_cluster_pin_with_post_routing_results(net_list,
@@ -1088,9 +1090,8 @@ void sync_netlists_to_routing(const Netlist<>& net_list,
                                                          device_ctx,
                                                          clustering_ctx,
                                                          rr_node_nets,
-                                                         grid_coord,
+                                                         placement_ctx.block_locs[clb_blk_id].loc,
                                                          clb_blk_id,
-                                                         placement_ctx.block_locs[clb_blk_id].loc.sub_tile,
                                                          num_mismatches,
                                                          verbose,
                                                          is_flat);
diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp
index 92f52b8f8e5..34e0ada9669 100644
--- a/vpr/src/pack/re_cluster.cpp
+++ b/vpr/src/pack/re_cluster.cpp
@@ -26,7 +26,7 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule,
 
     unsigned int num_instances = 0;
     for (auto equivalent_tile : block_type->equivalent_tiles) {
-        num_instances += device_ctx.grid.num_instances(equivalent_tile);
+        num_instances += device_ctx.grid.num_instances(equivalent_tile, -1);
     }
 
     if (helper_ctx.num_used_type_instances[block_type] == num_instances) {
diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp
index 68b579787d6..22e2a4ed6a9 100644
--- a/vpr/src/place/centroid_move_generator.cpp
+++ b/vpr/src/place/centroid_move_generator.cpp
@@ -20,7 +20,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_range_limiters range_limiters;
diff --git a/vpr/src/place/compressed_grid.cpp b/vpr/src/place/compressed_grid.cpp
index cb7e184845b..7ba21771524 100644
--- a/vpr/src/place/compressed_grid.cpp
+++ b/vpr/src/place/compressed_grid.cpp
@@ -5,20 +5,27 @@
 std::vector<t_compressed_block_grid> create_compressed_block_grids() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
+    const int num_layers = grid.get_num_layers();
 
     //Collect the set of x/y locations for each instace of a block type
-    std::vector<std::vector<vtr::Point<int>>> block_locations(device_ctx.logical_block_types.size());
-    for (size_t x = 0; x < grid.width(); ++x) {
-        for (size_t y = 0; y < grid.height(); ++y) {
-            int width_offset = grid.get_width_offset(x, y);
-            int height_offset = grid.get_height_offset(x, y);
-            if (width_offset == 0 && height_offset == 0) {
-                const auto& type = grid.get_physical_type(x, y);
-                auto equivalent_sites = get_equivalent_sites_set(type);
-
-                for (auto& block : equivalent_sites) {
-                    //Only record at block root location
-                    block_locations[block->index].emplace_back(x, y);
+    std::vector<std::vector<std::vector<vtr::Point<int>>>> block_locations(device_ctx.logical_block_types.size()); // [logical_block_type][layer_num][0...num_instance_on_layer] -> (x, y)
+    for (int block_type_num = 0; block_type_num < (int)device_ctx.logical_block_types.size(); block_type_num++) {
+        block_locations[block_type_num].resize(num_layers);
+    }
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        for (int x = 0; x < (int)grid.width(); ++x) {
+            for (int y = 0; y < (int)grid.height(); ++y) {
+                int width_offset = grid.get_width_offset({x, y, layer_num});
+                int height_offset = grid.get_height_offset(t_physical_tile_loc(x, y, layer_num));
+                if (width_offset == 0 && height_offset == 0) {
+                    const auto& type = grid.get_physical_type({x, y, layer_num});
+                    auto equivalent_sites = get_equivalent_sites_set(type);
+
+                    for (auto& block : equivalent_sites) {
+                        //Only record at block root location
+                        block_locations[block->index][layer_num].emplace_back(x, y);
+                    }
                 }
             }
         }
@@ -26,7 +33,7 @@ std::vector<t_compressed_block_grid> create_compressed_block_grids() {
 
     std::vector<t_compressed_block_grid> compressed_type_grids(device_ctx.logical_block_types.size());
     for (const auto& logical_block : device_ctx.logical_block_types) {
-        auto compressed_block_grid = create_compressed_block_grid(block_locations[logical_block.index]);
+        auto compressed_block_grid = create_compressed_block_grid(block_locations[logical_block.index], num_layers);
 
         for (const auto& physical_tile : logical_block.equivalent_tiles) {
             std::vector<int> compatible_sub_tiles;
@@ -55,7 +62,7 @@ std::vector<t_compressed_block_grid> create_compressed_block_grids() {
 }
 
 //Given a set of locations, returns a 2D matrix in a compressed space
-t_compressed_block_grid create_compressed_block_grid(const std::vector<vtr::Point<int>>& locations) {
+t_compressed_block_grid create_compressed_block_grid(const std::vector<std::vector<vtr::Point<int>>>& locations, int num_layers) {
     t_compressed_block_grid compressed_grid;
 
     if (locations.empty()) {
@@ -63,120 +70,111 @@ t_compressed_block_grid create_compressed_block_grid(const std::vector<vtr::Poin
     }
 
     {
-        std::vector<int> x_locs;
-        std::vector<int> y_locs;
-
-        //Record all the x/y locations seperately
-        for (auto point : locations) {
-            x_locs.emplace_back(point.x());
-            y_locs.emplace_back(point.y());
-        }
+        std::vector<std::vector<int>> x_locs(num_layers);
+        std::vector<std::vector<int>> y_locs(num_layers);
+        compressed_grid.compressed_to_grid_x.resize(num_layers);
+        compressed_grid.compressed_to_grid_y.resize(num_layers);
+        for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+            auto& layer_x_locs = x_locs[layer_num];
+            auto& layer_y_locs = y_locs[layer_num];
+            //Record all the x/y locations seperately
+            for (auto point : locations[layer_num]) {
+                layer_x_locs.emplace_back(point.x());
+                layer_y_locs.emplace_back(point.y());
+            }
 
-        //Uniquify x/y locations
-        std::sort(x_locs.begin(), x_locs.end());
-        x_locs.erase(unique(x_locs.begin(), x_locs.end()), x_locs.end());
+            //Uniquify x/y locations
+            std::sort(layer_x_locs.begin(), layer_x_locs.end());
+            layer_x_locs.erase(unique(layer_x_locs.begin(), layer_x_locs.end()), layer_x_locs.end());
 
-        std::sort(y_locs.begin(), y_locs.end());
-        y_locs.erase(unique(y_locs.begin(), y_locs.end()), y_locs.end());
+            std::sort(layer_y_locs.begin(), layer_y_locs.end());
+            layer_y_locs.erase(unique(layer_y_locs.begin(), layer_y_locs.end()), layer_y_locs.end());
 
-        //The index of an x-position in x_locs corresponds to it's compressed
-        //x-coordinate (similarly for y)
-        compressed_grid.compressed_to_grid_x = x_locs;
-        compressed_grid.compressed_to_grid_y = y_locs;
+            //The index of an x-position in x_locs corresponds to it's compressed
+            //x-coordinate (similarly for y)
+            if (!layer_x_locs.empty()) {
+                compressed_grid.compressed_to_grid_layer.push_back(layer_num);
+            }
+            compressed_grid.compressed_to_grid_x[layer_num] = std::move(layer_x_locs);
+            compressed_grid.compressed_to_grid_y[layer_num] = std::move(layer_y_locs);
+        }
     }
 
-    //
-    //Build the compressed grid
-    //
-
-    //Create a full/dense x-dimension (since there must be at least one
-    //block per x location)
-    compressed_grid.grid.resize(compressed_grid.compressed_to_grid_x.size());
-
-    //Fill-in the y-dimensions
-    //
-    //Note that we build the y-dimension sparsely (using a flat map), since
-    //there may not be full columns of blocks at each x location, this makes
-    //it efficient to find the non-empty blocks in the y dimension
-    for (auto point : locations) {
-        //Determine the compressed indices in the x & y dimensions
-        auto x_itr = std::lower_bound(compressed_grid.compressed_to_grid_x.begin(), compressed_grid.compressed_to_grid_x.end(), point.x());
-        int cx = std::distance(compressed_grid.compressed_to_grid_x.begin(), x_itr);
-
-        auto y_itr = std::lower_bound(compressed_grid.compressed_to_grid_y.begin(), compressed_grid.compressed_to_grid_y.end(), point.y());
-        int cy = std::distance(compressed_grid.compressed_to_grid_y.begin(), y_itr);
-
-        VTR_ASSERT(cx >= 0 && cx < (int)compressed_grid.compressed_to_grid_x.size());
-        VTR_ASSERT(cy >= 0 && cy < (int)compressed_grid.compressed_to_grid_y.size());
-
-        VTR_ASSERT(compressed_grid.compressed_to_grid_x[cx] == point.x());
-        VTR_ASSERT(compressed_grid.compressed_to_grid_y[cy] == point.y());
-
-        auto result = compressed_grid.grid[cx].insert(std::make_pair(cy, t_type_loc(point.x(), point.y())));
-
-        VTR_ASSERT_MSG(result.second, "Duplicates should not exist in compressed grid space");
+    compressed_grid.grid.resize(num_layers);
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        auto& layer_compressed_grid = compressed_grid.grid[layer_num];
+        const auto& layer_compressed_x_locs = compressed_grid.compressed_to_grid_x[layer_num];
+        const auto& layer_compressed_y_locs = compressed_grid.compressed_to_grid_y[layer_num];
+        //
+        //Build the compressed grid
+        //
+
+        //Create a full/dense x-dimension (since there must be at least one
+        //block per x location)
+        layer_compressed_grid.resize(layer_compressed_x_locs.size());
+
+        //Fill-in the y-dimensions
+        //
+        //Note that we build the y-dimension sparsely (using a flat map), since
+        //there may not be full columns of blocks at each x location, this makes
+        //it efficient to find the non-empty blocks in the y dimension
+        for (auto point : locations[layer_num]) {
+            //Determine the compressed indices in the x & y dimensions
+            auto x_itr = std::lower_bound(layer_compressed_x_locs.begin(), layer_compressed_x_locs.end(), point.x());
+            int cx = std::distance(layer_compressed_x_locs.begin(), x_itr);
+
+            auto y_itr = std::lower_bound(layer_compressed_y_locs.begin(), layer_compressed_y_locs.end(), point.y());
+            int cy = std::distance(layer_compressed_y_locs.begin(), y_itr);
+
+            VTR_ASSERT(cx >= 0 && cx < (int)layer_compressed_x_locs.size());
+            VTR_ASSERT(cy >= 0 && cy < (int)layer_compressed_y_locs.size());
+
+            VTR_ASSERT(layer_compressed_x_locs[cx] == point.x());
+            VTR_ASSERT(layer_compressed_y_locs[cy] == point.y());
+
+            auto result = layer_compressed_grid[cx].insert(std::make_pair(cy, t_physical_tile_loc(point.x(), point.y(), layer_num)));
+
+            VTR_ASSERT_MSG(result.second, "Duplicates should not exist in compressed grid space");
+        }
     }
 
     return compressed_grid;
 }
 
-int grid_to_compressed(const std::vector<int>& coords, int point) {
-    auto itr = std::lower_bound(coords.begin(), coords.end(), point);
-    VTR_ASSERT(*itr == point);
-
-    return std::distance(coords.begin(), itr);
-}
-
-/**
- * @brief  find the nearest location in the compressed grid.
- *
- * Useful when the point is of a different block type from coords.
- * 
- *   @param point represents a coordinate in one dimension of the point
- *   @param coords represents vector of coordinate values of a single type only
- *
- * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return 
- * the nearest compressed location to point by rounding it down 
- */
-int grid_to_compressed_approx(const std::vector<int>& coords, int point) {
-    auto itr = std::lower_bound(coords.begin(), coords.end(), point);
-    if (itr == coords.end())
-        return std::distance(coords.begin(), itr - 1);
-    return std::distance(coords.begin(), itr);
-}
-
 /*Print the contents of the compressed grids to an echo file*/
 void echo_compressed_grids(char* filename, const std::vector<t_compressed_block_grid>& comp_grids) {
     FILE* fp;
     fp = vtr::fopen(filename, "w");
 
     auto& device_ctx = g_vpr_ctx.device();
+    int num_layers = device_ctx.grid.get_num_layers();
 
     fprintf(fp, "--------------------------------------------------------------\n");
     fprintf(fp, "Compressed Grids: \n");
     fprintf(fp, "--------------------------------------------------------------\n");
     fprintf(fp, "\n");
-
-    for (int i = 0; i < (int)comp_grids.size(); i++) {
-        fprintf(fp, "\n\nGrid type: %s \n", device_ctx.logical_block_types[i].name);
-
-        fprintf(fp, "X coordinates: \n");
-        for (int j = 0; j < (int)comp_grids[i].compressed_to_grid_x.size(); j++) {
-            fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_x[j]);
-        }
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        fprintf(fp, "Layer Num: %d \n", layer_num);
+        fprintf(fp, "--------------------------------------------------------------\n");
         fprintf(fp, "\n");
+        for (int i = 0; i < (int)comp_grids.size(); i++) {
+            fprintf(fp, "\n\nGrid type: %s \n", device_ctx.logical_block_types[i].name);
 
-        fprintf(fp, "Y coordinates: \n");
-        for (int k = 0; k < (int)comp_grids[i].compressed_to_grid_y.size(); k++) {
-            fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_y[k]);
-        }
-        fprintf(fp, "\n");
+            fprintf(fp, "X coordinates: \n");
+            for (int j = 0; j < (int)comp_grids[i].compressed_to_grid_x.size(); j++) {
+                auto grid_loc = comp_grids[i].compressed_loc_to_grid_loc({j, 0, layer_num});
+                fprintf(fp, "%d ", grid_loc.x);
+            }
+            fprintf(fp, "\n");
 
-        fprintf(fp, "Subtiles: \n");
-        for (int s = 0; s < (int)comp_grids[i].compatible_sub_tiles_for_tile.size(); s++) {
-            fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_y[s]);
+            fprintf(fp, "Y coordinates: \n");
+            for (int k = 0; k < (int)comp_grids[i].compressed_to_grid_y.size(); k++) {
+                auto grid_loc = comp_grids[i].compressed_loc_to_grid_loc({0, k, layer_num});
+                fprintf(fp, "%d ", grid_loc.y);
+            }
+            fprintf(fp, "\n");
+            //TODO: Print the compatible sub-tiles for a logical block type
         }
-        fprintf(fp, "\n");
     }
 
     fclose(fp);
diff --git a/vpr/src/place/compressed_grid.h b/vpr/src/place/compressed_grid.h
index 49c652607fb..067815591cc 100644
--- a/vpr/src/place/compressed_grid.h
+++ b/vpr/src/place/compressed_grid.h
@@ -6,38 +6,102 @@
 #include "vtr_geometry.h"
 #include "vtr_flat_map.h"
 
-struct t_type_loc {
-    int x = OPEN;
-    int y = OPEN;
-
-    t_type_loc(int x_val, int y_val)
-        : x(x_val)
-        , y(y_val) {}
-
-    //Returns true if this type location has valid x/y values
-    operator bool() const {
-        return !(x == OPEN || y == OPEN);
-    }
-};
-
 struct t_compressed_block_grid {
+    // The compressed grid of a block type stores only the coordinates that are occupied by that particular block type.
+    // For instance, if a DSP block exists only in the 2nd, 3rd, and 5th columns, the compressed grid of X axis will solely store the values 2, 3, and 5.
+    // Consequently, the compressed to_grid_x will contain only three members. The same approach is applicable to other compressed directions.
+    // This compressed data structure helps to move blocks in a more efficient way. For instance, if I need to move a DSP block to the next compatible column, I can simply get
+    // the next compatible column number by accessing the next element in the compressed grid instead of iterating over all columns to find the next compatible column.
     //If 'cx' is an index in the compressed grid space, then
     //'compressed_to_grid_x[cx]' is the corresponding location in the
     //full (uncompressed) device grid.
-    std::vector<int> compressed_to_grid_x;
-    std::vector<int> compressed_to_grid_y;
+    std::vector<std::vector<int>> compressed_to_grid_x; // [0...num_layers-1][0...num_columns-1] -> uncompressed x
+    std::vector<std::vector<int>> compressed_to_grid_y; // [0...num_layers-1][0...num_rows-1] -> uncompressed y
+    std::vector<int> compressed_to_grid_layer;          // [0...num_layers-1] -> uncompressed layer
 
     //The grid is stored with a full/dense x-dimension (since only
     //x values which exist are considered), while the y-dimension is
     //stored sparsely, since we may not have full columns of blocks.
     //This makes it easy to check whether there exist
-    std::vector<vtr::flat_map2<int, t_type_loc>> grid;
+    std::vector<std::vector<vtr::flat_map2<int, t_physical_tile_loc>>> grid;
 
     //The sub type compatibility for a given physical tile and a compressed block grid
     //corresponding to the possible placement location for a given logical block
     //  - key: physical tile index
     //  - value: vector of compatible sub tiles for the physical tile/logical block pair
     std::unordered_map<int, std::vector<int>> compatible_sub_tiles_for_tile;
+
+    inline size_t get_num_columns(int layer_num) const {
+        return compressed_to_grid_x[layer_num].size();
+    }
+
+    inline size_t get_num_rows(int layer_num) const {
+        return compressed_to_grid_y[layer_num].size();
+    }
+
+    inline t_physical_tile_loc grid_loc_to_compressed_loc(t_physical_tile_loc grid_loc) const {
+        int cx = OPEN;
+        int cy = OPEN;
+        int layer_num = grid_loc.layer_num;
+
+        auto itr_x = std::lower_bound(compressed_to_grid_x[layer_num].begin(), compressed_to_grid_x[layer_num].end(), grid_loc.x);
+        VTR_ASSERT(*itr_x == grid_loc.x);
+        cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x);
+
+        auto itr_y = std::lower_bound(compressed_to_grid_y[layer_num].begin(), compressed_to_grid_y[layer_num].end(), grid_loc.y);
+        VTR_ASSERT(*itr_y == grid_loc.y);
+        cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y);
+
+        return {cx, cy, layer_num};
+    }
+
+    /**
+     * @brief  find the nearest location in the compressed grid.
+     *
+     * Useful when the point is of a different block type from coords.
+     *
+     *   @param point represents a coordinate in one dimension of the point
+     *   @param coords represents vector of coordinate values of a single type only
+     *
+     * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return
+     * the nearest compressed location to point by rounding it down
+     */
+    inline t_physical_tile_loc grid_loc_to_compressed_loc_approx(t_physical_tile_loc grid_loc) const {
+        int cx = OPEN;
+        int cy = OPEN;
+        int layer_num = grid_loc.layer_num;
+
+        auto itr_x = std::lower_bound(compressed_to_grid_x[layer_num].begin(), compressed_to_grid_x[layer_num].end(), grid_loc.x);
+        if (itr_x == compressed_to_grid_x[layer_num].end())
+            cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x - 1);
+        else
+            cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x);
+
+        auto itr_y = std::lower_bound(compressed_to_grid_y[layer_num].begin(), compressed_to_grid_y[layer_num].end(), grid_loc.y);
+        if (itr_y == compressed_to_grid_y[layer_num].end())
+            cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y - 1);
+        else
+            cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y);
+
+        return {cx, cy, layer_num};
+    }
+
+    inline t_physical_tile_loc compressed_loc_to_grid_loc(t_physical_tile_loc compressed_loc) const {
+        int layer_num = compressed_loc.layer_num;
+        return {compressed_to_grid_x[layer_num][compressed_loc.x], compressed_to_grid_y[layer_num][compressed_loc.y], layer_num};
+    }
+
+    inline const std::vector<int>& compatible_sub_tile_num(int physical_type_index) const {
+        return compatible_sub_tiles_for_tile.at(physical_type_index);
+    }
+
+    inline const vtr::flat_map2<int, t_physical_tile_loc>& get_column_block_map(int cx, int layer_num) const {
+        return grid[layer_num][cx];
+    }
+
+    inline const std::vector<int>& get_layer_nums() const {
+        return compressed_to_grid_layer;
+    }
 };
 
 //Compressed grid space for each block type
@@ -47,22 +111,7 @@ typedef std::vector<t_compressed_block_grid> t_compressed_block_grids;
 
 std::vector<t_compressed_block_grid> create_compressed_block_grids();
 
-t_compressed_block_grid create_compressed_block_grid(const std::vector<vtr::Point<int>>& locations);
-
-int grid_to_compressed(const std::vector<int>& coords, int point);
-
-/**
- * @brief  find the nearest location in the compressed grid.
- *
- * Useful when the point is of a different block type from coords.
- * 
- *   @param point represents a coordinate in one dimension of the point
- *   @param coords represents vector of coordinate values of a single type only
- *
- * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return 
- * the nearest compressed location to point by rounding it down 
- */
-int grid_to_compressed_approx(const std::vector<int>& coords, int point);
+t_compressed_block_grid create_compressed_block_grid(const std::vector<std::vector<vtr::Point<int>>>& locations, int num_layers);
 
 /**
  * @brief  print the contents of the compressed grids to an echo file
diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp
index 4c50c4688ab..32d531138a5 100644
--- a/vpr/src/place/critical_uniform_move_generator.cpp
+++ b/vpr/src/place/critical_uniform_move_generator.cpp
@@ -18,7 +18,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
diff --git a/vpr/src/place/cut_spreader.cpp b/vpr/src/place/cut_spreader.cpp
index 4bce2068b70..695b8ffc275 100644
--- a/vpr/src/place/cut_spreader.cpp
+++ b/vpr/src/place/cut_spreader.cpp
@@ -175,14 +175,16 @@ void CutSpreader::init() {
 }
 
 int CutSpreader::occ_at(int x, int y) {
-    if (!is_loc_on_chip(x, y)) {
+    //TODO: layer_num should be passed
+    if (!is_loc_on_chip({x, y, 0})) {
         return 0;
     }
     return occupancy[x][y];
 }
 
 int CutSpreader::tiles_at(int x, int y) {
-    if (!is_loc_on_chip(x, y)) {
+    //TODO: layer_num should be passed
+    if (!is_loc_on_chip({x, y, 0})) {
         return 0;
     }
     return int(subtiles_at_location[x][y].size());
@@ -200,7 +202,8 @@ int CutSpreader::tiles_at(int x, int y) {
 void CutSpreader::merge_regions(SpreaderRegion& merged, SpreaderRegion& mergee) {
     for (int x = mergee.bb.xmin(); x <= mergee.bb.xmax(); x++)
         for (int y = mergee.bb.ymin(); y <= mergee.bb.ymax(); y++) {
-            if (!is_loc_on_chip(x, y)) { //location is not within the chip
+            //TODO: layer_num should be passed
+            if (!is_loc_on_chip({x, y, 0})) { //location is not within the chip
                 continue;
             }
             //x and y might belong to "merged" region already, no further action is required
@@ -235,7 +238,8 @@ void CutSpreader::grow_region(SpreaderRegion& r, vtr::Rect<int> rect_to_include,
 
     auto process_location = [&](int x, int y) {
         //x and y should represent a location on the chip, otherwise no processing is required
-        if (!is_loc_on_chip(x, y)) {
+        //TODO: layer_num should be passed
+        if (!is_loc_on_chip({x, y, 0})) {
             return;
         }
         // kicks in only when grid is not claimed, claimed by another region, or part of a macro
@@ -403,6 +407,10 @@ std::pair<int, int> CutSpreader::cut_region(SpreaderRegion& r, bool dir) {
     const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
     PlacementContext& place_ctx = g_vpr_ctx.mutable_placement();
 
+    // TODO: CutSpreader is not compatible with 3D FPGA
+    VTR_ASSERT(device_ctx.grid.get_num_layers() == 1);
+    int layer_num = 0;
+
     std::vector<ClusterBlockId> cut_blks;
     init_cut_blks(r, cut_blks); // copy all logic blocks to cut into cut_blks
 
@@ -416,13 +424,13 @@ std::pair<int, int> CutSpreader::cut_region(SpreaderRegion& r, bool dir) {
         auto blk = cut_blks.at(0);
         auto& tiles_type = clb_nlist.block_type(blk)->equivalent_tiles;
         auto loc = ap->blk_locs[blk].loc;
-        if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type(loc.x, loc.y)) == tiles_type.end()) {
+        if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer})) == tiles_type.end()) {
             // logic block type doesn't match tile type
             // exhaustive search for tile of right type
             // this search should be fast as region must be small at this point (only 1 logic block left)
             for (int x = r.bb.xmin(); x <= r.bb.xmax(); x++)
                 for (int y = r.bb.ymin(); y <= r.bb.ymax(); y++) {
-                    if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type(x, y)) != tiles_type.end()) {
+                    if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type({x, y, layer_num})) != tiles_type.end()) {
                         VTR_ASSERT(blks_at_location[x][y].empty());
                         ap->blk_locs[blk].rawx = x;
                         ap->blk_locs[blk].rawy = y;
@@ -954,11 +962,12 @@ void CutSpreader::strict_legalize() {
  */
 void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
-    VTR_ASSERT(place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] == EMPTY_BLOCK_ID);
+    VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) == EMPTY_BLOCK_ID);
     VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false);
-    place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] = blk;
+    place_ctx.grid_blocks.set_block_at_location(sub_tile, blk);
     place_ctx.block_locs[blk].loc = sub_tile;
-    place_ctx.grid_blocks[sub_tile.x][sub_tile.y].usage++;
+    place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer},
+                                    place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) + 1);
     ap->blk_locs[blk].loc = sub_tile;
 }
 
@@ -968,12 +977,13 @@ void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) {
  */
 void CutSpreader::unbind_tile(t_pl_loc sub_tile) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
-    VTR_ASSERT(place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] != EMPTY_BLOCK_ID);
-    ClusterBlockId blk = place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile];
+    VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) != EMPTY_BLOCK_ID);
+    ClusterBlockId blk = place_ctx.grid_blocks.block_at_location(sub_tile);
     VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false);
     place_ctx.block_locs[blk].loc = t_pl_loc{};
-    place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] = EMPTY_BLOCK_ID;
-    place_ctx.grid_blocks[sub_tile.x][sub_tile.y].usage--;
+    place_ctx.grid_blocks.set_block_at_location(sub_tile, EMPTY_BLOCK_ID);
+    place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer},
+                                    place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) - 1);
 }
 
 /*
@@ -985,7 +995,7 @@ bool CutSpreader::is_placed(ClusterBlockId blk) {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     if (place_ctx.block_locs[blk].loc != t_pl_loc{}) {
         auto loc = place_ctx.block_locs[blk].loc;
-        VTR_ASSERT(place_ctx.grid_blocks[loc.x][loc.y].blocks[loc.sub_tile] == blk);
+        VTR_ASSERT(place_ctx.grid_blocks.block_at_location(loc) == blk);
         return true;
     }
     return false;
@@ -1022,7 +1032,7 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk,
     // then blk is placed in best_subtile
     if (exceeds_explore_limit && best_subtile != t_pl_loc{}) {
         // find the logic block bound to (placed on) best_subtile
-        ClusterBlockId bound_blk = place_ctx.grid_blocks[best_subtile.x][best_subtile.y].blocks[best_subtile.sub_tile];
+        ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(best_subtile);
         if (bound_blk != EMPTY_BLOCK_ID) {   // if best_subtile has a logic block
             unbind_tile(best_subtile);       // clear bound_block and best_subtile's placement info
             remaining.emplace(1, bound_blk); // put bound_blk back into remaining blocks to place
@@ -1032,8 +1042,8 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk,
     }
 
     // if exploration limit is not met or a candidate sub_tile is not found yet
-    for (auto sub_t : subtiles_at_location[nx][ny]) {                                              // for each available sub_tile at random location
-        ClusterBlockId bound_blk = place_ctx.grid_blocks[sub_t.x][sub_t.y].blocks[sub_t.sub_tile]; // logic blk at [nx, ny]
+    for (auto sub_t : subtiles_at_location[nx][ny]) {                              // for each available sub_tile at random location
+        ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(sub_t); // logic blk at [nx, ny]
         if (bound_blk == EMPTY_BLOCK_ID
             || ripup_radius_met
             || rand() % (20000) < 10) {
@@ -1109,7 +1119,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk,
 
             // ensure the target location has compatible tile
             auto blk_t = clb_nlist.block_type(blk);
-            auto result = std::find(blk_t->equivalent_tiles.begin(), blk_t->equivalent_tiles.end(), g_vpr_ctx.device().grid.get_physical_type(target.x, target.y));
+            auto result = std::find(blk_t->equivalent_tiles.begin(), blk_t->equivalent_tiles.end(), g_vpr_ctx.device().grid.get_physical_type({target.x, target.y, target.layer}));
             if (result == blk_t->equivalent_tiles.end()) {
                 placement_impossible = true;
                 break;
@@ -1117,7 +1127,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk,
 
             // if the target location has a logic block, ensure it's not part of a macro
             // because a macro placed before the current one has higher priority (longer chain)
-            ClusterBlockId bound = place_ctx.grid_blocks[target.x][target.y].blocks[target.sub_tile];
+            ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target);
             if (bound != EMPTY_BLOCK_ID && imacro(bound) != NO_MACRO) {
                 placement_impossible = true;
                 break;
@@ -1136,7 +1146,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk,
 
         if (!placement_impossible) { // if placement is possible, apply this placement
             for (auto& target : targets) {
-                ClusterBlockId bound = place_ctx.grid_blocks[target.second.x][target.second.y].blocks[target.second.sub_tile];
+                ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target.second);
                 if (bound != EMPTY_BLOCK_ID) {
                     // if target location has a logic block, displace it and put it in remaining queue to be placed later
                     unbind_tile(target.second);
diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp
index bf412386057..4e1c3f618b1 100644
--- a/vpr/src/place/directed_moves_util.cpp
+++ b/vpr/src/place/directed_moves_util.cpp
@@ -1,6 +1,6 @@
 #include "directed_moves_util.h"
 
-void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y) {
+void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
     auto& place_ctx = g_vpr_ctx.placement();
@@ -9,22 +9,27 @@ void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y) {
     int pnum = tile_pin_index(pin);
     ClusterBlockId block = cluster_ctx.clb_nlist.pin_block(pin);
 
-    x = place_ctx.block_locs[block].loc.x + physical_tile_type(block)->pin_width_offset[pnum];
-    y = place_ctx.block_locs[block].loc.y + physical_tile_type(block)->pin_height_offset[pnum];
+    tile_loc.x = place_ctx.block_locs[block].loc.x + physical_tile_type(block)->pin_width_offset[pnum];
+    tile_loc.y = place_ctx.block_locs[block].loc.y + physical_tile_type(block)->pin_height_offset[pnum];
+    tile_loc.layer_num = place_ctx.block_locs[block].loc.layer;
 
-    x = std::max(std::min(x, (int)grid.width() - 2), 1);  //-2 for no perim channels
-    y = std::max(std::min(y, (int)grid.height() - 2), 1); //-2 for no perim channels
+    tile_loc.x = std::max(std::min(tile_loc.x, (int)grid.width() - 2), 1);  //-2 for no perim channels
+    tile_loc.y = std::max(std::min(tile_loc.y, (int)grid.height() - 2), 1); //-2 for no perim channels
 }
 
 void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc& centroid, const PlacerCriticalities* criticalities) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    int x, y, ipin;
+    t_physical_tile_loc tile_loc;
+    int ipin;
     float acc_weight = 0;
     float acc_x = 0;
     float acc_y = 0;
     float weight = 1;
 
+    int from_block_layer_num = g_vpr_ctx.placement().block_locs[b_from].loc.layer;
+    VTR_ASSERT(from_block_layer_num != OPEN);
+
     //iterate over the from block pins
     for (ClusterPinId pin_id : cluster_ctx.clb_nlist.block_pins(b_from)) {
         ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
@@ -56,10 +61,10 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
                     weight = 1;
                 }
 
-                get_coordinate_of_pin(sink_pin_id, x, y);
+                get_coordinate_of_pin(sink_pin_id, tile_loc);
 
-                acc_x += x * weight;
-                acc_y += y * weight;
+                acc_x += tile_loc.x * weight;
+                acc_y += tile_loc.y * weight;
                 acc_weight += weight;
             }
         }
@@ -75,10 +80,10 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
 
             ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id);
 
-            get_coordinate_of_pin(source_pin, x, y);
+            get_coordinate_of_pin(source_pin, tile_loc);
 
-            acc_x += x * weight;
-            acc_y += y * weight;
+            acc_x += tile_loc.x * weight;
+            acc_y += tile_loc.y * weight;
             acc_weight += weight;
         }
     }
@@ -86,6 +91,8 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc
     //Calculate the centroid location
     centroid.x = acc_x / acc_weight;
     centroid.y = acc_y / acc_weight;
+    // TODO: For now, we don't move the centroid to a different layer
+    centroid.layer = from_block_layer_num;
 }
 
 static std::map<std::string, e_reward_function> available_reward_function = {
diff --git a/vpr/src/place/directed_moves_util.h b/vpr/src/place/directed_moves_util.h
index 602d79312a6..d706028dc04 100644
--- a/vpr/src/place/directed_moves_util.h
+++ b/vpr/src/place/directed_moves_util.h
@@ -17,7 +17,7 @@ enum e_reward_function {
 e_reward_function string_to_reward(std::string st);
 
 ///@brief Helper function that returns the x, y coordinates of a pin
-void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y);
+void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc);
 
 /**
  * @brief Calculates the exact centroid location
diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp
index 01759fe3ccb..ee69aeda5f0 100644
--- a/vpr/src/place/feasible_region_move_generator.cpp
+++ b/vpr/src/place/feasible_region_move_generator.cpp
@@ -22,7 +22,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     //from block data
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     /* Calculate the feasible region */
@@ -113,6 +113,8 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
         t_pl_loc center;
         center.x = (FR_coords.xmin + FR_coords.xmax) / 2;
         center.y = (FR_coords.ymin + FR_coords.ymax) / 2;
+        // TODO: Currently, we don't move blocks between different types of layers
+        center.layer = from.layer;
         if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from))
             return e_create_move::ABORT;
     }
diff --git a/vpr/src/place/grid_tile_lookup.cpp b/vpr/src/place/grid_tile_lookup.cpp
index 3b4355ef5e1..92504b6cc55 100644
--- a/vpr/src/place/grid_tile_lookup.cpp
+++ b/vpr/src/place/grid_tile_lookup.cpp
@@ -1,11 +1,11 @@
 #include "grid_tile_lookup.h"
 
-void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix<int, 2>& type_count) {
+void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix<int, 3>& type_count) {
     auto& device_ctx = g_vpr_ctx.device();
 
-    int num_rows = device_ctx.grid.height();
-    int num_cols = device_ctx.grid.width();
-
+    int num_layers = device_ctx.grid.get_num_layers();
+    int width = (int)device_ctx.grid.width();
+    int height = (int)device_ctx.grid.height();
     /*
      * Iterating through every location on the grid to store the number of subtiles of
      * the correct type at each location. For each location, we store the cumulative
@@ -13,39 +13,45 @@ void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::
      * subtiles at the location, plus the number of subtiles at the locations above and to
      * the right of it.
      */
-    for (int i_col = type_count.dim_size(0) - 1; i_col >= 0; i_col--) {
-        for (int j_row = type_count.dim_size(1) - 1; j_row >= 0; j_row--) {
-            const auto& tile = device_ctx.grid.get_physical_type(i_col, j_row);
-            int height_offset = device_ctx.grid.get_height_offset(i_col, j_row);
-            int width_offset = device_ctx.grid.get_width_offset(i_col, j_row);
-            type_count[i_col][j_row] = 0;
-
-            if (is_tile_compatible(tile, block_type) && height_offset == 0 && width_offset == 0) {
-                for (const auto& sub_tile : tile->sub_tiles) {
-                    if (is_sub_tile_compatible(tile, block_type, sub_tile.capacity.low)) {
-                        type_count[i_col][j_row] = sub_tile.capacity.total();
+    std::vector<int> layer_acc_type_count(num_layers, 0);
+    for (int layer_num = num_layers - 1; layer_num >= 0; layer_num--) {
+        int num_rows = (int)device_ctx.grid.height();
+        int num_cols = (int)device_ctx.grid.width();
+
+        for (int i_col = width - 1; i_col >= 0; i_col--) {
+            for (int j_row = height - 1; j_row >= 0; j_row--) {
+                const auto& tile = device_ctx.grid.get_physical_type({i_col, j_row, layer_num});
+                int height_offset = device_ctx.grid.get_height_offset({i_col, j_row, layer_num});
+                int width_offset = device_ctx.grid.get_width_offset({i_col, j_row, layer_num});
+                type_count[layer_num][i_col][j_row] = 0;
+
+                if (is_tile_compatible(tile, block_type) && height_offset == 0 && width_offset == 0) {
+                    for (const auto& sub_tile : tile->sub_tiles) {
+                        if (is_sub_tile_compatible(tile, block_type, sub_tile.capacity.low)) {
+                            type_count[layer_num][i_col][j_row] = sub_tile.capacity.total();
+                            layer_acc_type_count[layer_num] += sub_tile.capacity.total();
+                        }
                     }
                 }
-            }
 
-            if (i_col < num_cols - 1) {
-                type_count[i_col][j_row] += type_count[i_col + 1][j_row];
-            }
-            if (j_row < num_rows - 1) {
-                type_count[i_col][j_row] += type_count[i_col][j_row + 1];
-            }
-            if (i_col < (num_cols - 1) && j_row < (num_rows - 1)) {
-                type_count[i_col][j_row] -= type_count[i_col + 1][j_row + 1];
+                if (i_col < num_cols - 1) {
+                    type_count[layer_num][i_col][j_row] += type_count[layer_num][i_col + 1][j_row];
+                }
+                if (j_row < num_rows - 1) {
+                    type_count[layer_num][i_col][j_row] += type_count[layer_num][i_col][j_row + 1];
+                }
+                if (i_col < (num_cols - 1) && j_row < (num_rows - 1)) {
+                    type_count[layer_num][i_col][j_row] -= type_count[layer_num][i_col + 1][j_row + 1];
+                }
+                if (layer_num < num_layers - 1) {
+                    type_count[layer_num][i_col][j_row] += layer_acc_type_count[layer_num + 1];
+                }
             }
         }
     }
 
     //The total number of subtiles for the block type will be at [0][0]
-    max_placement_locations[block_type->index] = type_count[0][0];
-}
-
-vtr::NdMatrix<int, 2>& GridTileLookup::get_type_grid(t_logical_block_type_ptr block_type) {
-    return block_type_matrices[block_type->index];
+    max_placement_locations[block_type->index] = type_count[0][0][0];
 }
 
 int GridTileLookup::total_type_tiles(t_logical_block_type_ptr block_type) {
@@ -62,41 +68,46 @@ int GridTileLookup::total_type_tiles(t_logical_block_type_ptr block_type) {
 int GridTileLookup::region_tile_count(const Region& reg, t_logical_block_type_ptr block_type) {
     auto& device_ctx = g_vpr_ctx.device();
     int subtile = reg.get_sub_tile();
-
+    int layer_num = reg.get_layer_num();
     /*Intersect the region with the grid, in case the region passed in goes out of bounds
      * By intersecting with the grid, we ensure that we are only counting tiles for the part of the
      * region that fits on the grid.*/
     Region grid_reg;
-    grid_reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
+    grid_reg.set_region_rect({0,
+                              0,
+                              (int)device_ctx.grid.width() - 1,
+                              (int)device_ctx.grid.height() - 1,
+                              layer_num});
     Region intersect_reg;
     intersect_reg = intersection(reg, grid_reg);
 
-    vtr::Rect<int> intersect_rect = intersect_reg.get_region_rect();
+    const auto intersect_coord = intersect_reg.get_region_rect();
+    VTR_ASSERT(intersect_coord.layer_num == layer_num);
 
-    int xmin = intersect_rect.xmin();
-    int ymin = intersect_rect.ymin();
-    int xmax = intersect_rect.xmax();
-    int ymax = intersect_rect.ymax();
-    auto& type_grid = block_type_matrices[block_type->index];
+    int xmin = intersect_coord.xmin;
+    int ymin = intersect_coord.ymin;
+    int xmax = intersect_coord.xmax;
+    int ymax = intersect_coord.ymax;
+    auto& layer_type_grid = block_type_matrices[block_type->index];
 
-    int xdim = type_grid.dim_size(0);
-    int ydim = type_grid.dim_size(1);
+    int xdim = (int)layer_type_grid.dim_size(1);
+    int ydim = (int)layer_type_grid.dim_size(2);
 
     int num_tiles = 0;
 
     if (subtile == NO_SUBTILE) {
-        num_tiles = type_grid[xmin][ymin];
+        num_tiles = layer_type_grid[layer_num][xmin][ymin];
 
         if ((ymax + 1) < ydim) {
-            num_tiles -= type_grid[xmin][ymax + 1];
+            num_tiles -= layer_type_grid[layer_num][xmin][ymax + 1];
         }
 
         if ((xmax + 1) < xdim) {
-            num_tiles -= type_grid[xmax + 1][ymin];
+            num_tiles -= layer_type_grid[layer_num][xmax + 1][ymin];
         }
 
         if ((xmax + 1) < xdim && (ymax + 1) < ydim) {
-            num_tiles += type_grid[xmax + 1][ymax + 1];
+            num_tiles += layer_type_grid[layer_num][xmax + 1][ymax + 1];
         }
     } else {
         num_tiles = region_with_subtile_count(reg, block_type);
@@ -112,17 +123,18 @@ int GridTileLookup::region_tile_count(const Region& reg, t_logical_block_type_pt
 int GridTileLookup::region_with_subtile_count(const Region& reg, t_logical_block_type_ptr block_type) {
     auto& device_ctx = g_vpr_ctx.device();
     int num_sub_tiles = 0;
-    vtr::Rect<int> reg_rect = reg.get_region_rect();
+
+    const auto reg_coord = reg.get_region_rect();
     int subtile = reg.get_sub_tile();
 
-    int xmin = reg_rect.xmin();
-    int ymin = reg_rect.ymin();
-    int xmax = reg_rect.xmax();
-    int ymax = reg_rect.ymax();
+    int xmin = reg_coord.xmin;
+    int ymin = reg_coord.ymin;
+    int xmax = reg_coord.xmax;
+    int ymax = reg_coord.ymax;
 
     for (int i = xmax; i >= xmin; i--) {
         for (int j = ymax; j >= ymin; j--) {
-            const auto& tile = device_ctx.grid.get_physical_type(i, j);
+            const auto& tile = device_ctx.grid.get_physical_type({i, j, reg_coord.layer_num});
             if (is_sub_tile_compatible(tile, block_type, subtile)) {
                 num_sub_tiles++;
             }
diff --git a/vpr/src/place/grid_tile_lookup.h b/vpr/src/place/grid_tile_lookup.h
index a014e0d5786..3f79e847856 100644
--- a/vpr/src/place/grid_tile_lookup.h
+++ b/vpr/src/place/grid_tile_lookup.h
@@ -22,15 +22,14 @@ class GridTileLookup {
         max_placement_locations.resize(device_ctx.logical_block_types.size());
 
         for (const auto& type : device_ctx.logical_block_types) {
-            vtr::NdMatrix<int, 2> type_count({device_ctx.grid.width(), device_ctx.grid.height()});
+            int num_layers = device_ctx.grid.get_num_layers();
+            vtr::NdMatrix<int, 3> type_count({static_cast<unsigned long>(num_layers), device_ctx.grid.width(), device_ctx.grid.height()});
             fill_type_matrix(&type, type_count);
             block_type_matrices.push_back(type_count);
         }
     }
 
-    vtr::NdMatrix<int, 2>& get_type_grid(t_logical_block_type_ptr block_type);
-
-    void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix<int, 2>& type_count);
+    void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix<int, 3>& type_count);
 
     int region_tile_count(const Region& reg, t_logical_block_type_ptr block_type);
 
@@ -46,7 +45,7 @@ class GridTileLookup {
      * give the number of placement locations that are at, or above and to the right of the given [x,y] for
      * the given block type.
      */
-    std::vector<vtr::NdMatrix<int, 2>> block_type_matrices;
+    std::vector<vtr::NdMatrix<int, 3>> block_type_matrices;
 
     /*
      * Stores the total number of placement locations (i.e. compatible subtiles) for each block type.
diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index cd484e7f7f3..38bc772fefc 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -271,11 +271,14 @@ static bool is_loc_legal(t_pl_loc& loc, PartitionRegion& pr, t_logical_block_typ
 
     //Check if the location is within its constraint region
     for (auto reg : pr.get_partition_region()) {
-        if (reg.get_region_rect().contains(vtr::Point<int>(loc.x, loc.y))) {
+        const auto reg_coord = reg.get_region_rect();
+        vtr::Rect<int> reg_rect(reg_coord.xmin, reg_coord.ymin, reg_coord.xmax, reg_coord.ymax);
+        if (reg_coord.layer_num != loc.layer) continue;
+        if (reg_rect.contains(vtr::Point<int>(loc.x, loc.y))) {
             //check if the location is compatible with the block type
-            const auto& type = grid.get_physical_type(loc.x, loc.y);
-            int height_offset = grid.get_height_offset(loc.x, loc.y);
-            int width_offset = grid.get_width_offset(loc.x, loc.y);
+            const auto& type = grid.get_physical_type({loc.x, loc.y, loc.layer});
+            int height_offset = grid.get_height_offset({loc.x, loc.y, loc.layer});
+            int width_offset = grid.get_width_offset({loc.x, loc.y, loc.layer});
             if (is_tile_compatible(type, block_type)) {
                 //Check if the location is an anchor position
                 if (height_offset == 0 && width_offset == 0) {
@@ -291,42 +294,45 @@ static bool is_loc_legal(t_pl_loc& loc, PartitionRegion& pr, t_logical_block_typ
 
 static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ptr block_type) {
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const int centroid_loc_layer_num = centroid_loc.layer;
 
     //Determine centroid location in the compressed space of the current block
-    int cx_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, centroid_loc.x);
-    int cy_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, centroid_loc.y);
+    auto compressed_centroid_loc = get_compressed_loc_approx(compressed_block_grid,
+                                                             centroid_loc,
+                                                             num_layers);
 
     //range limit (rlim) set a limit for the neighbor search in the centroid placement
     //the neighbor location should be within the defined range to calculated centroid location
     int first_rlim = 15;
-    int rlim_x = std::min<int>(compressed_block_grid.compressed_to_grid_x.size(), first_rlim);
-    int rlim_y = std::min<int>(compressed_block_grid.compressed_to_grid_y.size(), first_rlim);
 
-    //Determine the valid compressed grid location ranges
-    int min_cx, max_cx, delta_cx;
-    int min_cy, max_cy;
+    auto search_range = get_compressed_grid_target_search_range(compressed_block_grid,
+                                                                compressed_centroid_loc,
+                                                                first_rlim,
+                                                                num_layers);
 
-    min_cx = std::max(0, cx_centroid - rlim_x);
-    max_cx = std::min<int>(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_centroid + rlim_x);
-
-    min_cy = std::max(0, cy_centroid - rlim_y);
-    max_cy = std::min<int>(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_centroid + rlim_y);
-
-    delta_cx = max_cx - min_cx;
+    int delta_cx = search_range[centroid_loc_layer_num].xmax - search_range[centroid_loc_layer_num].xmin;
 
     //Block has not been placed yet, so the "from" coords will be (-1, -1)
-    int cx_from = -1;
-    int cy_from = -1;
+    int cx_from = OPEN;
+    int cy_from = OPEN;
+    int layer_from = centroid_loc_layer_num;
 
-    int cx_to, cy_to;
+    t_physical_tile_loc to_compressed_loc;
 
-    bool legal = find_compatible_compressed_loc_in_range(block_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false);
+    bool legal = find_compatible_compressed_loc_in_range(block_type,
+                                                         delta_cx,
+                                                         {cx_from, cy_from, layer_from},
+                                                         search_range[centroid_loc_layer_num],
+                                                         to_compressed_loc,
+                                                         false,
+                                                         centroid_loc_layer_num);
 
     if (!legal) {
         return false;
     }
 
-    compressed_grid_to_loc(block_type, cx_to, cy_to, centroid_loc);
+    compressed_grid_to_loc(block_type, to_compressed_loc, centroid_loc);
 
     return legal;
 }
@@ -334,12 +340,21 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_
 static std::vector<ClusterBlockId> find_centroid_loc(t_pl_macro pl_macro, t_pl_loc& centroid) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    int x, y;
+    t_physical_tile_loc tile_loc;
     float acc_weight = 0;
     float acc_x = 0;
     float acc_y = 0;
+    int head_layer_num = OPEN;
+    bool find_layer = false;
+    std::vector<int> layer_count(g_vpr_ctx.device().grid.get_num_layers(), 0);
 
     ClusterBlockId head_blk = pl_macro.members.at(0).blk_index;
+    // For now, we put the macro in the same layer as the head block
+    head_layer_num = g_vpr_ctx.placement().block_locs[head_blk].loc.layer;
+    // If block is placed, we use the layer of the block. Otherwise, the layer will be determined later
+    if (head_layer_num == OPEN) {
+        find_layer = true;
+    }
     std::vector<ClusterBlockId> connected_blocks_to_update;
 
     //iterate over the from block pins
@@ -374,10 +389,13 @@ static std::vector<ClusterBlockId> find_centroid_loc(t_pl_macro pl_macro, t_pl_l
                     continue;
                 }
 
-                get_coordinate_of_pin(sink_pin_id, x, y);
-
-                acc_x += x;
-                acc_y += y;
+                get_coordinate_of_pin(sink_pin_id, tile_loc);
+                if (find_layer) {
+                    VTR_ASSERT(tile_loc.layer_num != OPEN);
+                    layer_count[tile_loc.layer_num]++;
+                }
+                acc_x += tile_loc.x;
+                acc_y += tile_loc.y;
                 acc_weight++;
             }
         }
@@ -391,10 +409,13 @@ static std::vector<ClusterBlockId> find_centroid_loc(t_pl_macro pl_macro, t_pl_l
                 continue;
             }
 
-            get_coordinate_of_pin(source_pin, x, y);
-
-            acc_x += x;
-            acc_y += y;
+            get_coordinate_of_pin(source_pin, tile_loc);
+            if (find_layer) {
+                VTR_ASSERT(tile_loc.layer_num != OPEN);
+                layer_count[tile_loc.layer_num]++;
+            }
+            acc_x += tile_loc.x;
+            acc_y += tile_loc.y;
             acc_weight++;
         }
     }
@@ -403,19 +424,27 @@ static std::vector<ClusterBlockId> find_centroid_loc(t_pl_macro pl_macro, t_pl_l
     if (acc_weight > 0) {
         centroid.x = acc_x / acc_weight;
         centroid.y = acc_y / acc_weight;
+        if (find_layer) {
+            auto max_element = std::max_element(layer_count.begin(), layer_count.end());
+            VTR_ASSERT(*max_element != 0);
+            auto index = std::distance(layer_count.begin(), max_element);
+            centroid.layer = static_cast<int>(index);
+        } else {
+            centroid.layer = head_layer_num;
+        }
     }
 
     return connected_blocks_to_update;
 }
 
 static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_logical_block_type_ptr block_type, enum e_pad_loc_type pad_loc_type, vtr::vector<ClusterBlockId, t_block_score>& block_scores) {
-    t_pl_loc centroid_loc(OPEN, OPEN, OPEN);
+    t_pl_loc centroid_loc(OPEN, OPEN, OPEN, OPEN);
     std::vector<ClusterBlockId> unplaced_blocks_to_update_their_score;
 
     unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc);
 
     //no suggestion was available for this block type
-    if (!is_loc_on_chip(centroid_loc.x, centroid_loc.y)) {
+    if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer})) {
         return false;
     }
 
@@ -430,7 +459,7 @@ static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_l
     }
 
     //no neighbor were found that meet all our requirements, should be placed with random placement
-    if (!is_loc_on_chip(centroid_loc.x, centroid_loc.y) || !pr.is_loc_in_part_reg(centroid_loc)) {
+    if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer}) || !pr.is_loc_in_part_reg(centroid_loc)) {
         return false;
     }
 
@@ -440,12 +469,12 @@ static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_l
     //we don't need to find one agian
     if (!neighbor_legal_loc) {
         const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index];
-        const auto& type = device_ctx.grid.get_physical_type(centroid_loc.x, centroid_loc.y);
-        auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(type->index);
+        const auto& type = device_ctx.grid.get_physical_type({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
+        const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index);
         centroid_loc.sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)];
     }
-    int width_offset = device_ctx.grid.get_width_offset(centroid_loc.x, centroid_loc.y);
-    int height_offset = device_ctx.grid.get_height_offset(centroid_loc.x, centroid_loc.y);
+    int width_offset = device_ctx.grid.get_width_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
+    int height_offset = device_ctx.grid.get_height_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer});
     VTR_ASSERT(width_offset == 0);
     VTR_ASSERT(height_offset == 0);
 
@@ -507,6 +536,7 @@ static int get_blk_type_first_loc(t_pl_loc& loc, t_pl_macro pl_macro, std::vecto
         //set the coordinate of first location that can accomodate macro blocks
         loc.x = first_empty_loc.first_avail_loc.x;
         loc.y = get_y_loc_based_on_macro_direction(first_empty_loc, pl_macro);
+        loc.layer = first_empty_loc.first_avail_loc.layer;
         loc.sub_tile = first_empty_loc.first_avail_loc.sub_tile;
 
         return empty_loc_index;
@@ -519,29 +549,38 @@ static std::vector<t_grid_empty_locs_block_type> init_blk_types_empty_locations(
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type_index];
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& grid = device_ctx.grid;
+    int num_layers = grid.get_num_layers();
 
     //create a vector to store all columns containing block_type_index with their lowest y and number of remaining blocks
     std::vector<t_grid_empty_locs_block_type> block_type_empty_locs;
 
-    //create a region the size of grid to find out first location with a specific block type
-    Region reg;
-    reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
-    reg.set_sub_tile(NO_SUBTILE);
-
-    int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, reg.get_region_rect().xmin());
-    int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, reg.get_region_rect().xmax());
-
-    //traverse all column and store their empty locations in block_type_empty_locs
-    for (int x_loc = min_cx; x_loc <= max_cx; x_loc++) {
-        t_grid_empty_locs_block_type empty_loc;
-        auto first_avail_loc = compressed_block_grid.grid[x_loc].begin()->second;
-        empty_loc.first_avail_loc.x = first_avail_loc.x;
-        empty_loc.first_avail_loc.y = first_avail_loc.y;
-        const auto& physical_type = grid.get_physical_type(first_avail_loc.x, first_avail_loc.y);
-        const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(physical_type->index);
-        empty_loc.first_avail_loc.sub_tile = *std::min_element(compatible_sub_tiles.begin(), compatible_sub_tiles.end());
-        empty_loc.num_of_empty_locs_in_y_axis = compressed_block_grid.grid[x_loc].size();
-        block_type_empty_locs.push_back(empty_loc);
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        //create a region the size of grid to find out first location with a specific block type
+        Region reg;
+        reg.set_region_rect({0,
+                             0,
+                             (int)device_ctx.grid.width() - 1,
+                             (int)device_ctx.grid.height() - 1,
+                             layer_num});
+        reg.set_sub_tile(NO_SUBTILE);
+        const auto reg_coord = reg.get_region_rect();
+        int min_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, OPEN, layer_num}).x;
+        int max_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, OPEN, layer_num}).x;
+
+        //traverse all column and store their empty locations in block_type_empty_locs
+        for (int x_loc = min_cx; x_loc <= max_cx; x_loc++) {
+            t_grid_empty_locs_block_type empty_loc;
+            const auto& block_rows = compressed_block_grid.get_column_block_map(x_loc, layer_num);
+            auto first_avail_loc = block_rows.begin()->second;
+            empty_loc.first_avail_loc.x = first_avail_loc.x;
+            empty_loc.first_avail_loc.y = first_avail_loc.y;
+            empty_loc.first_avail_loc.layer = first_avail_loc.layer_num;
+            const auto& physical_type = grid.get_physical_type({first_avail_loc.x, first_avail_loc.y, first_avail_loc.layer_num});
+            const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(physical_type->index);
+            empty_loc.first_avail_loc.sub_tile = *std::min_element(compatible_sub_tiles.begin(), compatible_sub_tiles.end());
+            empty_loc.num_of_empty_locs_in_y_axis = block_rows.size();
+            block_type_empty_locs.push_back(empty_loc);
+        }
     }
 
     return block_type_empty_locs;
@@ -552,7 +591,7 @@ static inline void fix_IO_block_types(t_pl_macro pl_macro, t_pl_loc loc, enum e_
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     //If the user marked the IO block pad_loc_type as RANDOM, that means it should be randomly
     //placed and then stay fixed to that location, which is why the macro members are marked as fixed.
-    const auto& type = device_ctx.grid.get_physical_type(loc.x, loc.y);
+    const auto& type = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
     if (is_io_type(type) && pad_loc_type == RANDOM) {
         for (unsigned int imember = 0; imember < pl_macro.members.size(); imember++) {
             place_ctx.block_locs[pl_macro.members[imember].blk_index].is_fixed = true;
@@ -584,32 +623,36 @@ static bool try_random_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_log
     }
     Region reg = regions[region_index];
 
-    vtr::Rect<int> rect = reg.get_region_rect();
+    const auto reg_coord = reg.get_region_rect();
 
-    int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin());
-    int min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymin());
+    auto min_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, reg_coord.ymin, reg_coord.layer_num});
 
-    int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax());
-    int max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymax());
+    auto max_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, reg_coord.ymax, reg_coord.layer_num});
 
-    int delta_cx = max_cx - min_cx;
+    int delta_cx = max_compressed_loc.x - min_compressed_loc.x;
 
-    int cx_to;
-    int cy_to;
+    t_physical_tile_loc to_compressed_loc;
 
     bool legal;
-    legal = find_compatible_compressed_loc_in_range(block_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false);
+    legal = find_compatible_compressed_loc_in_range(block_type,
+                                                    delta_cx,
+                                                    {cx_from, cy_from, reg_coord.layer_num},
+                                                    {min_compressed_loc.x, max_compressed_loc.x,
+                                                     min_compressed_loc.y, max_compressed_loc.y},
+                                                    to_compressed_loc,
+                                                    false,
+                                                    reg_coord.layer_num);
     if (!legal) {
         //No valid position found
         return false;
     }
 
-    compressed_grid_to_loc(block_type, cx_to, cy_to, loc);
+    compressed_grid_to_loc(block_type, to_compressed_loc, loc);
 
     auto& device_ctx = g_vpr_ctx.device();
 
-    int width_offset = device_ctx.grid.get_width_offset(loc.x, loc.y);
-    int height_offset = device_ctx.grid.get_height_offset(loc.x, loc.y);
+    int width_offset = device_ctx.grid.get_width_offset({loc.x, loc.y, loc.layer});
+    int height_offset = device_ctx.grid.get_height_offset({loc.x, loc.y, loc.layer});
     VTR_ASSERT(width_offset == 0);
     VTR_ASSERT(height_offset == 0);
 
@@ -633,14 +676,22 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t
     t_pl_loc to_loc;
 
     for (unsigned int reg = 0; reg < regions.size() && placed == false; reg++) {
-        vtr::Rect<int> rect = regions[reg].get_region_rect();
+        const auto reg_coord = regions[reg].get_region_rect();
+        int layer_num = reg_coord.layer_num;
 
-        int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin());
-        int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax());
+        int min_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, OPEN, layer_num}).x;
+        int max_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, OPEN, layer_num}).x;
+
+        // There isn't any block of this type in this region
+        if (min_cx == OPEN) {
+            VTR_ASSERT(max_cx == OPEN);
+            continue;
+        }
 
         for (int cx = min_cx; cx <= max_cx && placed == false; cx++) {
-            auto y_lower_iter = compressed_block_grid.grid[cx].begin();
-            auto y_upper_iter = compressed_block_grid.grid[cx].end();
+            const auto& block_rows = compressed_block_grid.get_column_block_map(cx, layer_num);
+            auto y_lower_iter = block_rows.begin();
+            auto y_upper_iter = block_rows.end();
 
             int y_range = std::distance(y_lower_iter, y_upper_iter);
 
@@ -649,17 +700,19 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t
             for (int dy = 0; dy < y_range && placed == false; dy++) {
                 int cy = (y_lower_iter + dy)->first;
 
-                to_loc.x = compressed_block_grid.compressed_to_grid_x[cx];
-                to_loc.y = compressed_block_grid.compressed_to_grid_y[cy];
+                auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({cx, cy, layer_num});
+                to_loc.x = grid_loc.x;
+                to_loc.y = grid_loc.y;
+                to_loc.layer = grid_loc.layer_num;
 
                 auto& grid = g_vpr_ctx.device().grid;
-                auto tile_type = grid.get_physical_type(to_loc.x, to_loc.y);
+                auto tile_type = grid.get_physical_type({to_loc.x, to_loc.y, layer_num});
 
                 if (regions[reg].get_sub_tile() != NO_SUBTILE) {
                     int subtile = regions[reg].get_sub_tile();
 
                     to_loc.sub_tile = subtile;
-                    if (place_ctx.grid_blocks[to_loc.x][to_loc.y].blocks[to_loc.sub_tile] == EMPTY_BLOCK_ID) {
+                    if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) {
                         placed = try_place_macro(pl_macro, to_loc);
 
                         if (placed) {
@@ -674,7 +727,7 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t
 
                             for (int st = st_low; st <= st_high && placed == false; st++) {
                                 to_loc.sub_tile = st;
-                                if (place_ctx.grid_blocks[to_loc.x][to_loc.y].blocks[to_loc.sub_tile] == EMPTY_BLOCK_ID) {
+                                if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) {
                                     placed = try_place_macro(pl_macro, to_loc);
                                     if (placed) {
                                         fix_IO_block_types(pl_macro, to_loc, pad_loc_type);
@@ -700,14 +753,14 @@ static bool try_dense_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_logi
     int column_index = get_blk_type_first_loc(loc, pl_macro, blk_types_empty_locs_in_grid);
 
     //check if first available location is within the chip and macro's partition region, otherwise placement is not legal
-    if (!is_loc_on_chip(loc.x, loc.y) || !pr.is_loc_in_part_reg(loc)) {
+    if (!is_loc_on_chip({loc.x, loc.y, loc.layer}) || !pr.is_loc_in_part_reg(loc)) {
         return false;
     }
 
     auto& device_ctx = g_vpr_ctx.device();
 
-    int width_offset = device_ctx.grid.get_width_offset(loc.x, loc.y);
-    int height_offset = device_ctx.grid.get_height_offset(loc.x, loc.y);
+    int width_offset = device_ctx.grid.get_width_offset({loc.x, loc.y, loc.layer});
+    int height_offset = device_ctx.grid.get_height_offset({loc.x, loc.y, loc.layer});
     VTR_ASSERT(width_offset == 0);
     VTR_ASSERT(height_offset == 0);
 
@@ -730,7 +783,7 @@ static bool try_place_macro(t_pl_macro pl_macro, t_pl_loc head_pos) {
     bool macro_placed = false;
 
     // If that location is occupied, do nothing.
-    if (place_ctx.grid_blocks[head_pos.x][head_pos.y].blocks[head_pos.sub_tile] != EMPTY_BLOCK_ID) {
+    if (place_ctx.grid_blocks.block_at_location(head_pos) != EMPTY_BLOCK_ID) {
         return (macro_placed);
     }
 
@@ -776,9 +829,15 @@ static bool place_macro(int macros_max_num_tries, t_pl_macro pl_macro, enum e_pa
         pr = floorplanning_ctx.cluster_constraints[blk_id];
     } else { //If the block is not constrained, assign a region the size of the grid to its PartitionRegion
         Region reg;
-        reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
-        reg.set_sub_tile(NO_SUBTILE);
-        pr.add_to_part_region(reg);
+        for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+            reg.set_region_rect({0,
+                                 0,
+                                 (int)device_ctx.grid.width() - 1,
+                                 (int)device_ctx.grid.height() - 1,
+                                 layer_num});
+            reg.set_sub_tile(NO_SUBTILE);
+            pr.add_to_part_region(reg);
+        }
     }
 
     //If blk_types_empty_locs_in_grid is not NULL, means that initial placement has been failed in first iteration for this block type
@@ -961,15 +1020,17 @@ static void clear_block_type_grid_locs(std::unordered_set<int> unplaced_blk_type
     /* We'll use the grid to record where everything goes. Initialize to the grid has no
      * blocks placed anywhere.
      */
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
-            const auto& type = device_ctx.grid.get_physical_type(i, j);
-            itype = type->index;
-            if (clear_all_block_types || unplaced_blk_types_index.count(itype)) {
-                place_ctx.grid_blocks[i][j].usage = 0;
-                for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) {
-                    if (place_ctx.grid_blocks[i][j].blocks[k] != INVALID_BLOCK_ID) {
-                        place_ctx.grid_blocks[i][j].blocks[k] = EMPTY_BLOCK_ID;
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+            for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
+                const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num});
+                itype = type->index;
+                if (clear_all_block_types || unplaced_blk_types_index.count(itype)) {
+                    place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0);
+                    for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) {
+                        if (place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) != INVALID_BLOCK_ID) {
+                            place_ctx.grid_blocks.set_block_at_location({i, j, k, layer_num}, EMPTY_BLOCK_ID);
+                        }
                     }
                 }
             }
diff --git a/vpr/src/place/manual_move_generator.cpp b/vpr/src/place/manual_move_generator.cpp
index 930eaa9f59a..2cc80347df1 100644
--- a/vpr/src/place/manual_move_generator.cpp
+++ b/vpr/src/place/manual_move_generator.cpp
@@ -37,14 +37,14 @@ e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     //Gets the current location of the block to move.
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     //Retrieving the compressed block grid for this block type
     const auto& compressed_block_grid = place_ctx.compressed_block_grids[cluster_from_type->index];
     //Checking if the block has a compatible subtile.
-    auto to_type = device_ctx.grid.get_physical_type(to.x, to.y);
-    auto& compatible_subtiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(to_type->index);
+    auto to_type = device_ctx.grid.get_physical_type({to.x, to.y, to.layer});
+    auto& compatible_subtiles = compressed_block_grid.compatible_sub_tile_num(to_type->index);
 
     //No compatible subtile is found.
     if (std::find(compatible_subtiles.begin(), compatible_subtiles.end(), to.sub_tile) == compatible_subtiles.end()) {
diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp
index 2bd4ef7a622..9dae21bca5b 100644
--- a/vpr/src/place/median_move_generator.cpp
+++ b/vpr/src/place/median_move_generator.cpp
@@ -24,7 +24,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     /* Calculate the median region */
@@ -47,7 +47,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
         ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id);
         if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
             continue;
-        /* To speedup the calculation, we found it is useful to ignore high fanout nets.
+        /* To speed up the calculation, we found it is useful to ignore high fanout nets.
          * Especially that in most cases, these high fanout nets are scattered in many locations of
          * the device and don't guide to a specific location. We also assuered these assumpitions experimentally.
          */
@@ -118,6 +118,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_
     t_pl_loc median_point;
     median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2;
     median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
+    // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die.
+    median_point.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from))
         return e_create_move::ABORT;
 
diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp
index 1fe3ec7aff8..2c7d6dc180d 100644
--- a/vpr/src/place/move_transactions.cpp
+++ b/vpr/src/place/move_transactions.cpp
@@ -21,7 +21,7 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected,
         return e_block_move_result::ABORT;
     }
 
-    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks[to.x][to.y].blocks.size()));
+    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer})));
 
     // Sets up the blocks moved
     int imoved_blk = blocks_affected.num_moved_blocks;
@@ -46,7 +46,10 @@ void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
         place_ctx.block_locs[blk].loc = blocks_affected.moved_blocks[iblk].new_loc;
 
         //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins
-        if (device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].old_loc.x, blocks_affected.moved_blocks[iblk].old_loc.y) != device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y)) {
+        if (device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].old_loc.x,
+                                               blocks_affected.moved_blocks[iblk].old_loc.y,
+                                               blocks_affected.moved_blocks[iblk].old_loc.layer})
+            != device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y, blocks_affected.moved_blocks[iblk].new_loc.layer})) {
             place_sync_external_block_connections(blk);
         }
     }
@@ -66,17 +69,19 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) {
         t_pl_loc from = blocks_affected.moved_blocks[iblk].old_loc;
 
         //Remove from old location only if it hasn't already been updated by a previous block update
-        if (place_ctx.grid_blocks[from.x][from.y].blocks[from.sub_tile] == blk) {
-            place_ctx.grid_blocks[from.x][from.y].blocks[from.sub_tile] = EMPTY_BLOCK_ID;
-            --place_ctx.grid_blocks[from.x][from.y].usage;
+        if (place_ctx.grid_blocks.block_at_location(from) == blk) {
+            place_ctx.grid_blocks.set_block_at_location(from, EMPTY_BLOCK_ID);
+            place_ctx.grid_blocks.set_usage({from.x, from.y, from.layer},
+                                            place_ctx.grid_blocks.get_usage({from.x, from.y, from.layer}) - 1);
         }
 
         //Add to new location
-        if (place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile] == EMPTY_BLOCK_ID) {
+        if (place_ctx.grid_blocks.block_at_location(to) == EMPTY_BLOCK_ID) {
             //Only need to increase usage if previously unused
-            ++place_ctx.grid_blocks[to.x][to.y].usage;
+            place_ctx.grid_blocks.set_usage({to.x, to.y, to.layer},
+                                            place_ctx.grid_blocks.get_usage({to.x, to.y, to.layer}) + 1);
         }
-        place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile] = blk;
+        place_ctx.grid_blocks.set_block_at_location(to, blk);
 
     } // Finish updating clb for all blocks
 }
@@ -95,11 +100,14 @@ void revert_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) {
         place_ctx.block_locs[blk].loc = old;
 
         //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins
-        if (device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].old_loc.x, blocks_affected.moved_blocks[iblk].old_loc.y) != device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y)) {
+        if (device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].old_loc.x,
+                                               blocks_affected.moved_blocks[iblk].old_loc.y,
+                                               blocks_affected.moved_blocks[iblk].old_loc.layer})
+            != device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y, blocks_affected.moved_blocks[iblk].new_loc.layer})) {
             place_sync_external_block_connections(blk);
         }
 
-        VTR_ASSERT_SAFE_MSG(place_ctx.grid_blocks[old.x][old.y].blocks[old.sub_tile] == blk, "Grid blocks should only have been updated if swap commited (not reverted)");
+        VTR_ASSERT_SAFE_MSG(place_ctx.grid_blocks.block_at_location(old) == blk, "Grid blocks should only have been updated if swap commited (not reverted)");
     }
 }
 
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 53a7c0ad248..5e1188db6c3 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -40,7 +40,7 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock
         //Try inverting the swap direction
 
         auto& place_ctx = g_vpr_ctx.placement();
-        ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+        ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to);
 
         if (!b_to) {
             log_move_abort("inverted move no to block");
@@ -93,7 +93,7 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte
         VTR_ASSERT_SAFE(outcome != e_block_move_result::VALID || imember_from == int(pl_macros[imacro_from].members.size()));
 
     } else {
-        ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+        ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to);
         int imacro_to = -1;
         get_imacro_from_iblk(&imacro_to, b_to, pl_macros);
 
@@ -125,9 +125,9 @@ e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_aff
         return e_block_move_result::ABORT;
     }
 
-    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks[to.x][to.y].blocks.size()));
+    VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer})));
 
-    ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+    ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to);
 
     t_pl_loc curr_from = place_ctx.block_locs[b_from].loc;
 
@@ -190,7 +190,7 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected,
             log_move_abort("macro_from swap to location illegal");
             outcome = e_block_move_result::ABORT;
         } else {
-            ClusterBlockId b_to = place_ctx.grid_blocks[curr_to.x][curr_to.y].blocks[curr_to.sub_tile];
+            ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(curr_to);
             int imacro_to = -1;
             get_imacro_from_iblk(&imacro_to, b_to, pl_macros);
 
@@ -334,7 +334,7 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected,
             return e_block_move_result::ABORT;
         }
 
-        ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+        ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to);
 
         record_block_move(blocks_affected, member.blk_index, to);
 
@@ -365,7 +365,7 @@ e_block_move_result identify_macro_self_swap_affected_macros(std::vector<int>& m
             return e_block_move_result::ABORT;
         }
 
-        ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+        ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to);
 
         int imacro_to = -1;
         get_imacro_from_iblk(&imacro_to, blk_to, place_ctx.pl_macros);
@@ -458,7 +458,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
         return false;
     }
 
-    auto physical_tile = device_ctx.grid.get_physical_type(to.x, to.y);
+    auto physical_tile = device_ctx.grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer));
     auto logical_block = cluster_ctx.clb_nlist.block_type(blk);
 
     if (to.sub_tile < 0 || to.sub_tile >= physical_tile->capacity
@@ -466,7 +466,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
         return false;
     }
     // If the destination block is user constrained, abort this swap
-    auto b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];
+    auto b_to = place_ctx.grid_blocks.block_at_location(to);
     if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) {
         if (place_ctx.block_locs[b_to].is_fixed) {
             return false;
@@ -710,53 +710,60 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
 
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
-
-    //Determine the rlim in each dimension
-    int rlim_x = std::min<int>(compressed_block_grid.compressed_to_grid_x.size(), rlim);
-    int rlim_y = std::min<int>(compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const int from_layer_num = from.layer;
 
     //Determine the coordinates in the compressed grid space of the current block
-    int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from.x);
-    int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from.y);
+    std::vector<t_physical_tile_loc> compressed_locs = get_compressed_loc(compressed_block_grid,
+                                                                          from,
+                                                                          num_layers);
 
     //Determine the valid compressed grid location ranges
-    int min_cx = std::max(0, cx_from - rlim_x);
-    int max_cx = std::min<int>(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x);
-    int delta_cx = max_cx - min_cx;
-
-    int min_cy = std::max(0, cy_from - rlim_y);
-    int max_cy = std::min<int>(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y);
+    std::vector<t_bb> search_range = get_compressed_grid_target_search_range(compressed_block_grid,
+                                                                             compressed_locs,
+                                                                             rlim,
+                                                                             num_layers);
+    int delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin;
 
-    int cx_to = OPEN;
-    int cy_to = OPEN;
+    t_physical_tile_loc to_compressed_loc;
     bool legal = false;
 
+    //TODO: constraints should be adapted to 3D architecture
     if (is_cluster_constrained(b_from)) {
-        bool intersect = intersect_range_limit_with_floorplan_constraints(type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx);
+        bool intersect = intersect_range_limit_with_floorplan_constraints(type,
+                                                                          b_from,
+                                                                          search_range[from_layer_num],
+                                                                          delta_cx,
+                                                                          from_layer_num);
         if (!intersect) {
             return false;
         }
     }
-
-    legal = find_compatible_compressed_loc_in_range(type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false);
+    //TODO: For now, we only move the blocks on the same tile
+    legal = find_compatible_compressed_loc_in_range(type,
+                                                    delta_cx,
+                                                    compressed_locs[from_layer_num],
+                                                    search_range[from_layer_num],
+                                                    to_compressed_loc,
+                                                    false,
+                                                    from_layer_num);
 
     if (!legal) {
         //No valid position found
         return false;
     }
 
-    VTR_ASSERT(cx_to != OPEN);
-    VTR_ASSERT(cy_to != OPEN);
+    VTR_ASSERT(to_compressed_loc);
 
     //Convert to true (uncompressed) grid locations
-    compressed_grid_to_loc(type, cx_to, cy_to, to);
+    compressed_grid_to_loc(type, to_compressed_loc, to);
 
     auto& grid = g_vpr_ctx.device().grid;
-    const auto& to_type = grid.get_physical_type(to.x, to.y);
+    const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer));
 
     VTR_ASSERT_MSG(is_tile_compatible(to_type, type), "Type must be compatible");
-    VTR_ASSERT_MSG(grid.get_width_offset(to.x, to.y) == 0, "Should be at block base location");
-    VTR_ASSERT_MSG(grid.get_height_offset(to.x, to.y) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_width_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_height_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location");
 
     return true;
 }
@@ -775,60 +782,78 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type,
                         const t_bb* limit_coords,
                         t_pl_loc& to_loc,
                         ClusterBlockId b_from) {
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    int from_layer_num = from_loc.layer;
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
 
     //Determine the coordinates in the compressed grid space of the current block
-    int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from_loc.x);
-    int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from_loc.y);
+    std::vector<t_physical_tile_loc> from_compressed_locs = get_compressed_loc(compressed_block_grid,
+                                                                               from_loc,
+                                                                               g_vpr_ctx.device().grid.get_num_layers());
 
     VTR_ASSERT(limit_coords->xmin <= limit_coords->xmax);
     VTR_ASSERT(limit_coords->ymin <= limit_coords->ymax);
 
     //Determine the valid compressed grid location ranges
-    int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, limit_coords->xmin);
-    int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, limit_coords->xmax);
-
-    VTR_ASSERT(min_cx >= 0);
-    VTR_ASSERT(static_cast<int>(compressed_block_grid.compressed_to_grid_x.size()) - 1 - max_cx >= 0);
-    VTR_ASSERT(max_cx >= min_cx);
-    int delta_cx = max_cx - min_cx;
-
-    int min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, limit_coords->ymin);
-    int max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, limit_coords->ymax);
-    VTR_ASSERT(min_cy >= 0);
-    VTR_ASSERT(static_cast<int>(compressed_block_grid.compressed_to_grid_y.size()) - 1 - max_cy >= 0);
-    VTR_ASSERT(max_cy >= min_cy);
-
-    int cx_to = OPEN;
-    int cy_to = OPEN;
+    std::vector<t_physical_tile_loc> min_compressed_loc = get_compressed_loc_approx(compressed_block_grid,
+                                                                                    {limit_coords->xmin, limit_coords->ymin, 0, from_layer_num},
+                                                                                    num_layers);
+    std::vector<t_physical_tile_loc> max_compressed_loc = get_compressed_loc_approx(compressed_block_grid,
+                                                                                    {limit_coords->xmax, limit_coords->ymax, 0, from_layer_num},
+                                                                                    num_layers);
+
+    VTR_ASSERT(min_compressed_loc[from_layer_num].x >= 0);
+    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_columns(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].x >= 0);
+    VTR_ASSERT(max_compressed_loc[from_layer_num].x >= min_compressed_loc[from_layer_num].x);
+    int delta_cx = max_compressed_loc[from_layer_num].x - min_compressed_loc[from_layer_num].x;
+
+    VTR_ASSERT(min_compressed_loc[from_layer_num].y >= 0);
+    VTR_ASSERT(static_cast<int>(compressed_block_grid.get_num_rows(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].y >= 0);
+    VTR_ASSERT(max_compressed_loc[from_layer_num].y >= min_compressed_loc[from_layer_num].y);
+
+    t_bb search_range(min_compressed_loc[from_layer_num].x,
+                      max_compressed_loc[from_layer_num].x,
+                      min_compressed_loc[from_layer_num].y,
+                      max_compressed_loc[from_layer_num].y);
+
+    t_physical_tile_loc to_compressed_loc;
     bool legal = false;
 
     if (is_cluster_constrained(b_from)) {
-        bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx);
+        bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type,
+                                                                          b_from,
+                                                                          search_range,
+                                                                          delta_cx,
+                                                                          from_layer_num);
         if (!intersect) {
             return false;
         }
     }
 
-    legal = find_compatible_compressed_loc_in_range(blk_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, true);
+    legal = find_compatible_compressed_loc_in_range(blk_type,
+                                                    delta_cx,
+                                                    from_compressed_locs[from_layer_num],
+                                                    search_range,
+                                                    to_compressed_loc,
+                                                    true,
+                                                    from_layer_num);
 
     if (!legal) {
         //No valid position found
         return false;
     }
 
-    VTR_ASSERT(cx_to != OPEN);
-    VTR_ASSERT(cy_to != OPEN);
+    VTR_ASSERT(to_compressed_loc);
 
     //Convert to true (uncompressed) grid locations
-    compressed_grid_to_loc(blk_type, cx_to, cy_to, to_loc);
+    compressed_grid_to_loc(blk_type, to_compressed_loc, to_loc);
 
     auto& grid = g_vpr_ctx.device().grid;
-    const auto& to_type = grid.get_physical_type(to_loc.x, to_loc.y);
+    const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to_loc.x, to_loc.y, to_loc.layer));
 
     VTR_ASSERT_MSG(is_tile_compatible(to_type, blk_type), "Type must be compatible");
-    VTR_ASSERT_MSG(grid.get_width_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location");
-    VTR_ASSERT_MSG(grid.get_height_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
 
     return true;
 }
@@ -841,79 +866,77 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type,
                           ClusterBlockId b_from) {
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
+    const int from_layer_num = from_loc.layer;
+    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    //Determine the coordinates in the compressed grid space of the current block
-    int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from_loc.x);
-    int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from_loc.y);
-
-    //Determine the rlim in each dimension
-    int rlim_x = std::min<int>(compressed_block_grid.compressed_to_grid_x.size(), std::min<int>(range_limiters.original_rlim, range_limiters.dm_rlim));
-    int rlim_y = std::min<int>(compressed_block_grid.compressed_to_grid_y.size(), std::min<int>(range_limiters.original_rlim, range_limiters.dm_rlim)); /* for aspect_ratio != 1 case. */
+    std::vector<t_physical_tile_loc> from_compressed_loc = get_compressed_loc(compressed_block_grid,
+                                                                              from_loc,
+                                                                              num_layers);
 
     //Determine the coordinates in the compressed grid space of the current block
-    int cx_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, centroid.x);
-    int cy_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, centroid.y);
+    std::vector<t_physical_tile_loc> centroid_compressed_loc = get_compressed_loc_approx(compressed_block_grid,
+                                                                                         centroid,
+                                                                                         num_layers);
 
     //Determine the valid compressed grid location ranges
-    int min_cx, max_cx, delta_cx;
-    int min_cy, max_cy;
+    int delta_cx;
+    std::vector<t_bb> search_range;
 
     // If we are early in the anneal and the range limit still big enough --> search around the center location that the move proposed
     // If not --> search around the current location of the block but in the direction of the center location that the move proposed
     if (range_limiters.original_rlim > 0.15 * range_limiters.first_rlim) {
-        min_cx = std::max(0, cx_centroid - rlim_x);
-        max_cx = std::min<int>(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_centroid + rlim_x);
-
-        min_cy = std::max(0, cy_centroid - rlim_y);
-        max_cy = std::min<int>(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_centroid + rlim_y);
+        search_range = get_compressed_grid_target_search_range(compressed_block_grid,
+                                                               centroid_compressed_loc,
+                                                               std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim),
+                                                               num_layers);
     } else {
-        if (cx_centroid < cx_from) {
-            min_cx = std::max(0, cx_from - rlim_x);
-            max_cx = cx_from;
-        } else {
-            min_cx = cx_from;
-            max_cx = std::min<int>(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x);
-        }
-        if (cy_centroid < cy_from) {
-            min_cy = std::max(0, cy_from - rlim_y);
-            max_cy = cy_from;
-        } else {
-            min_cy = cy_from;
-            max_cy = std::min<int>(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y);
-        }
+        search_range = get_compressed_grid_bounded_search_range(compressed_block_grid,
+                                                                from_compressed_loc,
+                                                                centroid_compressed_loc,
+                                                                std::min<float>(range_limiters.original_rlim, range_limiters.dm_rlim),
+                                                                num_layers);
     }
-    delta_cx = max_cx - min_cx;
+    delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin;
 
-    int cx_to = OPEN;
-    int cy_to = OPEN;
+    t_physical_tile_loc to_compressed_loc;
     bool legal = false;
 
     if (is_cluster_constrained(b_from)) {
-        bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx);
+        bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type,
+                                                                          b_from,
+                                                                          search_range[from_layer_num],
+                                                                          delta_cx,
+                                                                          from_layer_num);
         if (!intersect) {
             return false;
         }
     }
 
-    legal = find_compatible_compressed_loc_in_range(blk_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false);
+    //TODO: For now, we only move the blocks on the same tile
+    legal = find_compatible_compressed_loc_in_range(blk_type,
+                                                    delta_cx,
+                                                    from_compressed_loc[from_layer_num],
+                                                    search_range[from_layer_num],
+                                                    to_compressed_loc,
+                                                    false,
+                                                    from_layer_num);
 
     if (!legal) {
         //No valid position found
         return false;
     }
 
-    VTR_ASSERT(cx_to != OPEN);
-    VTR_ASSERT(cy_to != OPEN);
+    VTR_ASSERT(to_compressed_loc);
 
     //Convert to true (uncompressed) grid locations
-    compressed_grid_to_loc(blk_type, cx_to, cy_to, to_loc);
+    compressed_grid_to_loc(blk_type, to_compressed_loc, to_loc);
 
     auto& grid = g_vpr_ctx.device().grid;
-    const auto& to_type = grid.get_physical_type(to_loc.x, to_loc.y);
+    const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to_loc.x, to_loc.y, to_loc.layer));
 
     VTR_ASSERT_MSG(is_tile_compatible(to_type, blk_type), "Type must be compatible");
-    VTR_ASSERT_MSG(grid.get_width_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location");
-    VTR_ASSERT_MSG(grid.get_height_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
+    VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location");
 
     return true;
 }
@@ -935,23 +958,33 @@ std::string move_type_to_string(e_move_type move) {
 }
 
 //Convert to true (uncompressed) grid locations
-void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t_pl_loc& to_loc) {
+void compressed_grid_to_loc(t_logical_block_type_ptr blk_type,
+                            t_physical_tile_loc compressed_loc,
+                            t_pl_loc& to_loc) {
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index];
-
-    to_loc.x = compressed_block_grid.compressed_to_grid_x[cx];
-    to_loc.y = compressed_block_grid.compressed_to_grid_y[cy];
+    auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc(compressed_loc);
 
     auto& grid = g_vpr_ctx.device().grid;
-    auto to_type = grid.get_physical_type(to_loc.x, to_loc.y);
+    auto to_type = grid.get_physical_type({grid_loc.x, grid_loc.y, grid_loc.layer_num});
 
     //Each x/y location contains only a single type, so we can pick a random z (capcity) location
-    auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(to_type->index);
-    to_loc.sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)];
+    auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(to_type->index);
+    int sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)];
+
+    to_loc = t_pl_loc(grid_loc.x, grid_loc.y, sub_tile, grid_loc.layer_num);
 }
 
-bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int min_cx, int max_cx, int min_cy, int max_cy, int delta_cx, int cx_from, int cy_from, int& cx_to, int& cy_to, bool is_median) {
+bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type,
+                                             const int delta_cx,
+                                             const t_physical_tile_loc& from_loc,
+                                             t_bb search_range,
+                                             t_physical_tile_loc& to_loc,
+                                             bool is_median,
+                                             int to_layer_num) {
+    //TODO For the time being, the blocks only moved in the same layer. This assertion should be removed after VPR is updated to move blocks between layers
+    VTR_ASSERT(to_layer_num == from_loc.layer_num);
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
-
+    to_loc.layer_num = to_layer_num;
     std::unordered_set<int> tried_cx_to;
     bool legal = false;
     int possibilities;
@@ -963,13 +996,13 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int
     while (!legal && (int)tried_cx_to.size() < possibilities) { //Until legal or all possibilities exhaused
         //Pick a random x-location within [min_cx, max_cx],
         //until we find a legal swap, or have exhuasted all possiblites
-        cx_to = min_cx + vtr::irand(delta_cx);
+        to_loc.x = search_range.xmin + vtr::irand(delta_cx);
 
-        VTR_ASSERT(cx_to >= min_cx);
-        VTR_ASSERT(cx_to <= max_cx);
+        VTR_ASSERT(to_loc.x >= search_range.xmin);
+        VTR_ASSERT(to_loc.x <= search_range.xmax);
 
         //Record this x location as tried
-        auto res = tried_cx_to.insert(cx_to);
+        auto res = tried_cx_to.insert(to_loc.x);
         if (!res.second) {
             continue; //Already tried this position
         }
@@ -981,25 +1014,26 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int
         //
         //The candidates are stored in a flat_map so we can efficiently find the set of valid
         //candidates with upper/lower bound.
-        auto y_lower_iter = compressed_block_grid.grid[cx_to].lower_bound(min_cy);
-        if (y_lower_iter == compressed_block_grid.grid[cx_to].end()) {
+        const auto& block_rows = compressed_block_grid.get_column_block_map(to_loc.x, to_layer_num);
+        auto y_lower_iter = block_rows.lower_bound(search_range.ymin);
+        if (y_lower_iter == block_rows.end()) {
             continue;
         }
 
-        auto y_upper_iter = compressed_block_grid.grid[cx_to].upper_bound(max_cy);
+        auto y_upper_iter = block_rows.upper_bound(search_range.ymax);
 
-        if (y_lower_iter->first > min_cy) {
+        if (y_lower_iter->first > search_range.ymin) {
             //No valid blocks at this x location which are within rlim_y
             //
             if (type->index != 1)
                 continue;
             else {
                 //Fall back to allow the whole y range
-                y_lower_iter = compressed_block_grid.grid[cx_to].begin();
-                y_upper_iter = compressed_block_grid.grid[cx_to].end();
+                y_lower_iter = block_rows.begin();
+                y_upper_iter = block_rows.end();
 
-                min_cy = y_lower_iter->first;
-                max_cy = (y_upper_iter - 1)->first;
+                search_range.ymin = y_lower_iter->first;
+                search_range.ymax = (y_upper_iter - 1)->first;
             }
         }
 
@@ -1021,12 +1055,12 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int
             }
 
             //Key in the y-dimension is the compressed index location
-            cy_to = (y_lower_iter + dy)->first;
+            to_loc.y = (y_lower_iter + dy)->first;
 
-            VTR_ASSERT(cy_to >= min_cy);
-            VTR_ASSERT(cy_to <= max_cy);
+            VTR_ASSERT(to_loc.y >= search_range.ymin);
+            VTR_ASSERT(to_loc.y <= search_range.ymax);
 
-            if (cx_from == cx_to && cy_from == cy_to) {
+            if (from_loc.x == to_loc.x && from_loc.y == to_loc.y && from_loc.layer_num == to_layer_num) {
                 continue; //Same from/to location -- try again for new y-position
             } else {
                 legal = true;
@@ -1036,16 +1070,135 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int
     return legal;
 }
 
-bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, ClusterBlockId b_from, int& min_cx, int& min_cy, int& max_cx, int& max_cy, int& delta_cx) {
+std::vector<t_physical_tile_loc> get_compressed_loc(const t_compressed_block_grid& compressed_block_grid,
+                                                    t_pl_loc grid_loc,
+                                                    int num_layers) {
+    //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers
+    std::vector<t_physical_tile_loc> compressed_locs(num_layers);
+
+    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
+        if (layer_num != grid_loc.layer) {
+            continue;
+        }
+        compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc({grid_loc.x, grid_loc.y, layer_num});
+    }
+
+    return compressed_locs;
+}
+
+std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid,
+                                                           t_pl_loc grid_loc,
+                                                           int num_layers) {
+    //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers
+    std::vector<t_physical_tile_loc> compressed_locs(num_layers);
+
+    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
+        if (layer_num != grid_loc.layer) {
+            continue;
+        }
+        compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc_approx({grid_loc.x, grid_loc.y, layer_num});
+    }
+
+    return compressed_locs;
+}
+
+std::vector<t_bb> get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                                          const std::vector<t_physical_tile_loc>& compressed_locs,
+                                                          float rlim,
+                                                          int num_layers) {
+    std::vector<t_bb> search_ranges(num_layers, t_bb());
+    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
+        const auto& layer_loc = compressed_locs[layer_num];
+        //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion
+        if (layer_loc.x == OPEN || layer_loc.y == OPEN || layer_loc.layer_num == OPEN) {
+            //No valid compressed location for this layer
+            continue;
+        }
+        int rlim_x_max_range = std::min<int>((int)compressed_block_grid.get_num_columns(layer_num), rlim);
+        int rlim_y_max_range = std::min<int>((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
+
+        search_ranges[layer_num].xmin = std::max(0, layer_loc.x - rlim_x_max_range);
+        search_ranges[layer_num].xmax = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, layer_loc.x + rlim_x_max_range);
+
+        search_ranges[layer_num].ymin = std::max(0, layer_loc.y - rlim_y_max_range);
+        search_ranges[layer_num].ymax = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, layer_loc.y + rlim_y_max_range);
+    }
+
+    return search_ranges;
+}
+
+std::vector<t_bb> get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                                           const std::vector<t_physical_tile_loc>& from_compressed_loc,
+                                                           const std::vector<t_physical_tile_loc>& target_compressed_loc,
+                                                           float rlim,
+                                                           int num_layers) {
+    std::vector<t_bb> search_range(num_layers, t_bb());
+
+    int min_cx, max_cx, min_cy, max_cy;
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion
+        if (from_compressed_loc[layer_num].x == OPEN || from_compressed_loc[layer_num].y == OPEN || from_compressed_loc[layer_num].layer_num == OPEN) {
+            continue;
+        }
+        VTR_ASSERT(from_compressed_loc[layer_num].layer_num == layer_num);
+        VTR_ASSERT(target_compressed_loc[layer_num].layer_num == layer_num);
+
+        int rlim_x_max_range = std::min<int>(compressed_block_grid.get_num_columns(layer_num), rlim);
+        int rlim_y_max_range = std::min<int>(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */
+
+        int cx_from = from_compressed_loc[layer_num].x;
+        int cy_from = from_compressed_loc[layer_num].y;
+        if (cx_from == OPEN || cy_from == OPEN) {
+            continue;
+        }
+
+        int cx_centroid = target_compressed_loc[layer_num].x;
+        int cy_centroid = target_compressed_loc[layer_num].y;
+
+        if (cx_centroid < cx_from) {
+            min_cx = std::max(0, cx_from - rlim_x_max_range);
+            max_cx = cx_from;
+        } else {
+            min_cx = cx_from;
+            max_cx = std::min<int>(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range);
+        }
+        if (cy_centroid < cy_from) {
+            min_cy = std::max(0, cy_from - rlim_y_max_range);
+            max_cy = cy_from;
+        } else {
+            min_cy = cy_from;
+            max_cy = std::min<int>(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range);
+        }
+
+        search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy);
+    }
+
+    return search_range;
+}
+
+bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type,
+                                                      ClusterBlockId b_from,
+                                                      t_bb& search_range,
+                                                      int& delta_cx,
+                                                      int layer_num) {
     //Retrieve the compressed block grid for this block type
     const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index];
 
-    int min_x = compressed_block_grid.compressed_to_grid_x[min_cx];
-    int max_x = compressed_block_grid.compressed_to_grid_x[max_cx];
-    int min_y = compressed_block_grid.compressed_to_grid_y[min_cy];
-    int max_y = compressed_block_grid.compressed_to_grid_y[max_cy];
+    auto min_grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({search_range.xmin,
+                                                                          search_range.ymin,
+                                                                          layer_num});
+
+    auto max_grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({search_range.xmax,
+                                                                          search_range.ymax,
+                                                                          layer_num});
+
     Region range_reg;
-    range_reg.set_region_rect(min_x, min_y, max_x, max_y);
+    range_reg.set_region_rect({min_grid_loc.x,
+                               min_grid_loc.y,
+                               max_grid_loc.x,
+                               max_grid_loc.y,
+                               layer_num});
 
     auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
 
@@ -1068,12 +1221,16 @@ bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr t
         if (intersect_reg.empty()) {
             return false;
         } else {
-            vtr::Rect<int> rect = intersect_reg.get_region_rect();
-            min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin());
-            max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax());
-            min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymin());
-            max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymax());
-            delta_cx = max_cx - min_cx;
+            const auto intersect_coord = intersect_reg.get_region_rect();
+            VTR_ASSERT(intersect_coord.layer_num == layer_num);
+            auto min_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({intersect_coord.xmin,
+                                                                                               intersect_coord.ymin,
+                                                                                               layer_num});
+
+            auto max_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({intersect_coord.xmax,
+                                                                                               intersect_coord.ymax,
+                                                                                               layer_num});
+            delta_cx = max_compressed_loc.x - min_compressed_loc.x;
         }
     }
 
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 36733624eed..9cdc908fa29 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -214,9 +214,11 @@ std::string move_type_to_string(e_move_type);
  * cy: the y coordinate of the compressed location
  * loc: the uncompressed output location (returned in reference)
  */
-void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t_pl_loc& loc);
+void compressed_grid_to_loc(t_logical_block_type_ptr blk_type,
+                            t_physical_tile_loc compressed_loc,
+                            t_pl_loc& to_loc);
 /**
- * @brief find compressed location in a compressed range for a specific type
+ * @brief find compressed location in a compressed range for a specific type in the given layer (to_layer_num)
  * 
  * type: defines the moving block type
  * min_cx, max_cx: the minimum and maximum x coordinates of the range in the compressed grid
@@ -224,8 +226,77 @@ void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t
  * cx_from, cy_from: the x and y coordinates of the old location 
  * cx_to, cy_to: the x and y coordinates of the new location on the compressed grid
  * is_median: true if this is called from find_to_loc_median
+ * to_layer_num: the layer number of the new location (set by the caller)
  */
-bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int min_cx, int max_cx, int min_cy, int max_cy, int delta_cx, int cx_from, int cy_from, int& cx_to, int& cy_to, bool is_median);
+bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type,
+                                             const int delta_cx,
+                                             const t_physical_tile_loc& from_loc,
+                                             t_bb search_range,
+                                             t_physical_tile_loc& to_loc,
+                                             bool is_median,
+                                             int to_layer_num);
+
+/**
+ * @brief Get the the compressed loc from the uncompressed loc (grid_loc)
+ * @note This assumes the grid_loc corresponds to a location of the block type that compressed_block_grid stores its
+ * compressed location. Otherwise, it would raise an assertion error.
+ * @param compressed_block_grid The class that stores the compressed block grid of the block
+ * @param grid_loc The actual location of the block
+ * @param num_layers The number of layers (dice) of the FPGA
+ * @return Returns the compressed location of the block on each layer
+ */
+std::vector<t_physical_tile_loc> get_compressed_loc(const t_compressed_block_grid& compressed_block_grid,
+                                                    t_pl_loc grid_loc,
+                                                    int num_layers);
+
+/**
+ * @brief Get the the compressed loc from the uncompressed loc (grid_loc). Return the nearest compressed location
+ * if grid_loc doesn't fall on a block of the type that compressed_block_grid stores its compressed location.
+ * @param compressed_block_grid
+ * @param grid_loc
+ * @param num_layers
+ * @return
+ */
+std::vector<t_physical_tile_loc> get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid,
+                                                           t_pl_loc grid_loc,
+                                                           int num_layers);
+
+/**
+ * @brief This function calculates the search range around the compressed locs, based on the given rlim value and
+ * the number of rows/columns containing the same block type as the one that compressed_loc belongs to.
+ * If rlim is greater than the number of columns containing the block type on the right side of the compressed_loc,
+ * the search range from the right is limited by that number. Similar constraints apply to other sides as well. The
+ * function returns the final search range based on these conditions.
+ * @param compressed_block_grid
+ * @param compressed_locs
+ * @param rlim
+ * @param num_layers
+ * @return A compressed search range for each layer
+ */
+std::vector<t_bb> get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                                          const std::vector<t_physical_tile_loc>& compressed_locs,
+                                                          float rlim,
+                                                          int num_layers);
+
+/**
+ * @brief This function calculates the search range based on the given rlim value and the number of columns/rows
+ * containing the same resource type as the one specified in the compressed_block_grid.
+ * The search range is determined in a square shape, with from_compressed_loc as one of the corners and
+ * directed towards the target_compressed_loc. The function returns the final search range based on these conditions.
+ * @Note This function differs from get_compressed_grid_target_search_range as it doesn't have from_compressed_loc
+ * in the center of the search range.
+ * @param compressed_block_grid
+ * @param from_compressed_loc
+ * @param target_compressed_loc
+ * @param rlim
+ * @param num_layers
+ * @return
+ */
+std::vector<t_bb> get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid,
+                                                           const std::vector<t_physical_tile_loc>& from_compressed_loc,
+                                                           const std::vector<t_physical_tile_loc>& target_compressed_loc,
+                                                           float rlim,
+                                                           int num_layers);
 
 /*
  * If the block to be moved (b_from) has a floorplan constraint, this routine changes the max and min coords
@@ -241,8 +312,14 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int
  * this routine is done for cpu time optimization, so we do not have to necessarily check each
  * complicated case to get correct functionality during place moves.
  *
+ * The intersection takes place in the layer (die) specified by layer_num.
+ *
  */
-bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, ClusterBlockId b_from, int& min_cx, int& min_cy, int& max_cx, int& max_cy, int& delta_cx);
+bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type,
+                                                      ClusterBlockId b_from,
+                                                      t_bb& search_range,
+                                                      int& delta_cx,
+                                                      int layer_num);
 
 std::string e_move_result_to_string(e_move_result move_outcome);
 
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 0a53e936d51..11d9121ff2b 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -474,7 +474,7 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     // now choose a compatible block to swap with
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6b21212cb2f..b6696bdb8ac 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -447,6 +447,13 @@ void try_place(const Netlist<>& net_list,
      * width of the widest channel.  Place_cost_exp says what exponent the   *
      * width should be taken to when calculating costs.  This allows a       *
      * greater bias for anisotropic architectures.                           */
+
+    /*
+     * Currently, the functions that require is_flat as their parameter and are called during placement should
+     * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
+     * if is_flat is false, even if is_flat is set to true from the command line.
+     */
+    VTR_ASSERT(!is_flat);
     auto& device_ctx = g_vpr_ctx.device();
     auto& atom_ctx = g_vpr_ctx.atom();
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -761,8 +768,12 @@ void try_place(const Netlist<>& net_list,
     /* Set the temperature low to ensure that initial placement quality will be preserved */
     first_t = EPSILON;
 
-    t_annealing_state state(annealing_sched, first_t, first_rlim,
-                            first_move_lim, first_crit_exponent);
+    t_annealing_state state(annealing_sched,
+                            first_t,
+                            first_rlim,
+                            first_move_lim,
+                            first_crit_exponent,
+                            device_ctx.grid.get_num_layers());
 
     /* Update the starting temperature for placement annealing to a more appropriate value */
     state.t = starting_t(&state, &costs, annealing_sched,
@@ -2962,53 +2973,66 @@ static int check_block_placement_consistency() {
         cluster_ctx.clb_nlist.blocks().size(), 0);
 
     /* Step through device grid and placement. Check it against blocks */
-    for (size_t i = 0; i < device_ctx.grid.width(); i++)
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
-            const auto& type = device_ctx.grid.get_physical_type(i, j);
-            if (place_ctx.grid_blocks[i][j].usage
-                > type->capacity) {
-                VTR_LOG_ERROR(
-                    "%d blocks were placed at grid location (%zu,%zu), but location capacity is %d.\n",
-                    place_ctx.grid_blocks[i][j].usage, i, j,
-                    type->capacity);
-                error++;
-            }
-            int usage_check = 0;
-            for (int k = 0; k < type->capacity; k++) {
-                auto bnum = place_ctx.grid_blocks[i][j].blocks[k];
-                if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
-                    continue;
-
-                auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
-                auto physical_tile = type;
-
-                if (physical_tile_type(bnum) != physical_tile) {
+    for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+            for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
+                const t_physical_tile_loc tile_loc(i, j, layer_num);
+                const auto& type = device_ctx.grid.get_physical_type(tile_loc);
+                if (place_ctx.grid_blocks.get_usage(tile_loc) > type->capacity) {
                     VTR_LOG_ERROR(
-                        "Block %zu type (%s) does not match grid location (%zu,%zu) type (%s).\n",
-                        size_t(bnum), logical_block->name, i, j,
-                        physical_tile->name);
+                        "%d blocks were placed at grid location (%d,%d,%d), but location capacity is %d.\n",
+                        place_ctx.grid_blocks.get_usage(tile_loc), i, j, layer_num,
+                        type->capacity);
                     error++;
                 }
-
-                auto& loc = place_ctx.block_locs[bnum].loc;
-                if (loc.x != int(i) || loc.y != int(j)
-                    || !is_sub_tile_compatible(physical_tile, logical_block,
-                                               loc.sub_tile)) {
+                int usage_check = 0;
+                for (int k = 0; k < type->capacity; k++) {
+                    auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num});
+                    if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
+                        continue;
+
+                    auto logical_block = cluster_ctx.clb_nlist.block_type(bnum);
+                    auto physical_tile = type;
+
+                    if (physical_tile_type(bnum) != physical_tile) {
+                        VTR_LOG_ERROR(
+                            "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n",
+                            size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name);
+                        error++;
+                    }
+
+                    auto& loc = place_ctx.block_locs[bnum].loc;
+                    if (loc.x != i || loc.y != j || loc.layer != layer_num
+                        || !is_sub_tile_compatible(physical_tile, logical_block,
+                                                   loc.sub_tile)) {
+                        VTR_LOG_ERROR(
+                            "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n",
+                            size_t(bnum),
+                            loc.x,
+                            loc.y,
+                            loc.sub_tile,
+                            tile_loc.x,
+                            tile_loc.y,
+                            tile_loc.layer_num,
+                            layer_num);
+                        error++;
+                    }
+                    ++usage_check;
+                    bdone[bnum]++;
+                }
+                if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) {
                     VTR_LOG_ERROR(
-                        "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d).\n",
-                        size_t(bnum), loc.x, loc.y, loc.sub_tile, i, j, k);
+                        "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n",
+                        place_ctx.grid_blocks.get_usage(tile_loc),
+                        tile_loc.x,
+                        tile_loc.y,
+                        tile_loc.layer_num,
+                        usage_check);
                     error++;
                 }
-                ++usage_check;
-                bdone[bnum]++;
-            }
-            if (usage_check != place_ctx.grid_blocks[i][j].usage) {
-                VTR_LOG_ERROR(
-                    "%d block(s) were placed at location (%zu,%zu), but location contains %d block(s).\n",
-                    place_ctx.grid_blocks[i][j].usage, i, j, usage_check);
-                error++;
             }
         }
+    }
 
     /* Check that every block exists in the device_ctx.grid and cluster_ctx.blocks arrays somewhere. */
     for (auto blk_id : cluster_ctx.clb_nlist.blocks())
@@ -3048,7 +3072,7 @@ int check_macro_placement_consistency() {
             }
 
             // Then check the place_ctx.grid data structure
-            if (place_ctx.grid_blocks[member_pos.x][member_pos.y].blocks[member_pos.sub_tile]
+            if (place_ctx.grid_blocks.block_at_location(member_pos)
                 != member_iblk) {
                 VTR_LOG_ERROR(
                     "Block %zu in pl_macro #%zu is not placed in the proper orientation.\n",
@@ -3167,7 +3191,7 @@ static void print_resources_utilization() {
         auto block_loc = place_ctx.block_locs[blk_id];
         auto loc = block_loc.loc;
 
-        auto physical_tile = device_ctx.grid.get_physical_type(loc.x, loc.y);
+        auto physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
         auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
 
         num_type_instances[logical_block]++;
diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp
index a0a811cb11d..f1c5045251b 100644
--- a/vpr/src/place/place_constraints.cpp
+++ b/vpr/src/place/place_constraints.cpp
@@ -80,9 +80,13 @@ PartitionRegion update_macro_head_pr(const t_pl_macro& pl_macro, const Partition
                 Region modified_reg;
                 auto offset = pl_macro.members[imember].offset;
 
-                vtr::Rect<int> reg_rect = block_regions[i].get_region_rect();
+                const auto block_reg_coord = block_regions[i].get_region_rect();
 
-                modified_reg.set_region_rect(reg_rect.xmin() - offset.x, reg_rect.ymin() - offset.y, reg_rect.xmax() - offset.x, reg_rect.ymax() - offset.y);
+                modified_reg.set_region_rect({block_reg_coord.xmin - offset.x,
+                                              block_reg_coord.ymin - offset.y,
+                                              block_reg_coord.xmax - offset.x,
+                                              block_reg_coord.ymax - offset.y,
+                                              block_reg_coord.layer_num});
 
                 //check that subtile is not an invalid value before changing, otherwise it just stays -1
                 if (block_regions[i].get_sub_tile() != NO_SUBTILE) {
@@ -118,9 +122,13 @@ PartitionRegion update_macro_member_pr(PartitionRegion& head_pr, const t_pl_offs
     for (unsigned int i = 0; i < block_regions.size(); i++) {
         Region modified_reg;
 
-        vtr::Rect<int> reg_rect = block_regions[i].get_region_rect();
+        const auto block_reg_coord = block_regions[i].get_region_rect();
 
-        modified_reg.set_region_rect(reg_rect.xmin() + offset.x, reg_rect.ymin() + offset.y, reg_rect.xmax() + offset.x, reg_rect.ymax() + offset.y);
+        modified_reg.set_region_rect({block_reg_coord.xmin + offset.x,
+                                      block_reg_coord.ymin + offset.y,
+                                      block_reg_coord.xmax + offset.x,
+                                      block_reg_coord.ymax + offset.y,
+                                      block_reg_coord.layer_num});
 
         //check that subtile is not an invalid value before changing, otherwise it just stays -1
         if (block_regions[i].get_sub_tile() != NO_SUBTILE) {
@@ -159,14 +167,19 @@ void propagate_place_constraints() {
     auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning();
     auto& device_ctx = g_vpr_ctx.device();
 
-    //Create a PartitionRegion with grid dimensions
-    //Will be used to check that updated PartitionRegions are within grid bounds
-    int width = device_ctx.grid.width() - 1;
-    int height = device_ctx.grid.height() - 1;
+    int num_layers = device_ctx.grid.get_num_layers();
     Region grid_reg;
-    grid_reg.set_region_rect(0, 0, width, height);
     PartitionRegion grid_pr;
-    grid_pr.add_to_part_region(grid_reg);
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        //Create a PartitionRegion with grid dimensions
+        //Will be used to check that updated PartitionRegions are within grid bounds
+        int width = device_ctx.grid.width() - 1;
+        int height = device_ctx.grid.height() - 1;
+
+        grid_reg.set_region_rect({0, 0, width, height, layer_num});
+        grid_pr.add_to_part_region(grid_reg);
+    }
 
     for (auto pl_macro : place_ctx.pl_macros) {
         if (is_macro_constrained(pl_macro)) {
@@ -296,12 +309,13 @@ void mark_fixed_blocks() {
  */
 int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_pl_loc& loc) {
     auto& device_ctx = g_vpr_ctx.device();
-    vtr::Rect<int> rb = reg.get_region_rect();
+    const auto reg_coord = reg.get_region_rect();
+    const int layer_num = reg.get_layer_num();
     int num_tiles = 0;
 
-    for (int x = rb.xmin(); x <= rb.xmax(); x++) {
-        for (int y = rb.ymin(); y <= rb.ymax(); y++) {
-            const auto& tile = device_ctx.grid.get_physical_type(x, y);
+    for (int x = reg_coord.xmin; x <= reg_coord.xmax; x++) {
+        for (int y = reg_coord.ymin; y <= reg_coord.ymax; y++) {
+            const auto& tile = device_ctx.grid.get_physical_type({x, y, reg_coord.layer_num});
 
             /*
              * If the tile at the grid location is not compatible with the cluster block
@@ -322,6 +336,7 @@ int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_
                     loc.x = x;
                     loc.y = y;
                     loc.sub_tile = reg.get_sub_tile();
+                    loc.layer = layer_num;
                     if (num_tiles > 1) {
                         return num_tiles;
                     }
@@ -342,6 +357,7 @@ int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_
                             loc.x = x;
                             loc.y = y;
                             loc.sub_tile = z;
+                            loc.layer = layer_num;
                         }
                         if (num_tiles > 1) {
                             return num_tiles;
@@ -368,13 +384,21 @@ bool is_pr_size_one(PartitionRegion& pr, t_logical_block_type_ptr block_type, t_
     bool pr_size_one;
     int pr_size = 0;
     int reg_size;
-
-    Region intersect_reg;
-    intersect_reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
-    Region current_reg;
+    int num_layers = device_ctx.grid.get_num_layers();
+
+    std::vector<Region> intersect_reg(num_layers);
+    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
+        intersect_reg[layer_num].set_region_rect({0,
+                                                  0,
+                                                  (int)device_ctx.grid.width() - 1,
+                                                  (int)device_ctx.grid.height() - 1,
+                                                  layer_num});
+    }
+    std::vector<Region> current_reg(num_layers);
 
     for (unsigned int i = 0; i < regions.size(); i++) {
         reg_size = region_tile_cover(regions[i], block_type, loc);
+        int layer_num = regions[i].get_layer_num();
 
         /*
          * If multiple regions in the PartitionRegion all have size 1,
@@ -387,9 +411,9 @@ bool is_pr_size_one(PartitionRegion& pr, t_logical_block_type_ptr block_type, t_
          */
         if (reg_size == 1) {
             //get the exact x, y, subtile location covered by the current region (regions[i])
-            current_reg.set_region_rect(loc.x, loc.y, loc.x, loc.y);
-            current_reg.set_sub_tile(loc.sub_tile);
-            intersect_reg = intersection(intersect_reg, current_reg);
+            current_reg[layer_num].set_region_rect({loc.x, loc.y, loc.x, loc.y, layer_num});
+            current_reg[layer_num].set_sub_tile(loc.sub_tile);
+            intersect_reg[layer_num] = intersection(intersect_reg[layer_num], current_reg[layer_num]);
 
             if (i == 0 || intersect_reg.empty()) {
                 pr_size = pr_size + reg_size;
diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h
index c1cf889f395..5fbc481fd2b 100644
--- a/vpr/src/place/place_constraints.h
+++ b/vpr/src/place/place_constraints.h
@@ -126,7 +126,7 @@ int get_part_reg_size(PartitionRegion& pr, t_logical_block_type_ptr block_type,
 
 /*
  * Return the floorplan score that will be used for sorting blocks during initial placement. This score is the
- * total number of subtilesfor the block type in the grid, minus the number of subtiles in the block's floorplan PartitionRegion.
+ * total number of subtiles for the block type in the grid, minus the number of subtiles in the block's floorplan PartitionRegion.
  * The resulting number is the number of tiles outside the block's floorplan region, meaning the higher
  * it is, the more difficult the block is to place.
  */
diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp
index c6e27c39c1f..44d8c4a0b49 100644
--- a/vpr/src/place/place_delay_model.cpp
+++ b/vpr/src/place/place_delay_model.cpp
@@ -27,26 +27,30 @@
 #endif /* VTR_ENABLE_CAPNPROTO */
 
 ///@brief DeltaDelayModel methods.
-float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/) const {
+float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const {
     int delta_x = std::abs(from_x - to_x);
     int delta_y = std::abs(from_y - to_y);
 
-    return delays_[delta_x][delta_y];
+    return delays_[layer_num][delta_x][delta_y];
 }
 
 void DeltaDelayModel::dump_echo(std::string filepath) const {
     FILE* f = vtr::fopen(filepath.c_str(), "w");
     fprintf(f, "         ");
-    for (size_t dx = 0; dx < delays_.dim_size(0); ++dx) {
-        fprintf(f, " %9zu", dx);
-    }
-    fprintf(f, "\n");
-    for (size_t dy = 0; dy < delays_.dim_size(1); ++dy) {
-        fprintf(f, "%9zu", dy);
-        for (size_t dx = 0; dx < delays_.dim_size(0); ++dx) {
-            fprintf(f, " %9.2e", delays_[dx][dy]);
+    for (size_t layer_num = 0; layer_num < delays_.dim_size(0); ++layer_num) {
+        fprintf(f, " %9zu", layer_num);
+        fprintf(f, "\n");
+        for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) {
+            fprintf(f, " %9zu", dx);
         }
         fprintf(f, "\n");
+        for (size_t dy = 0; dy < delays_.dim_size(2); ++dy) {
+            fprintf(f, "%9zu", dy);
+            for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) {
+                fprintf(f, " %9.2e", delays_[layer_num][dx][dy]);
+            }
+            fprintf(f, "\n");
+        }
     }
     vtr::fclose(f);
 }
@@ -56,13 +60,13 @@ const DeltaDelayModel* OverrideDelayModel::base_delay_model() const {
 }
 
 ///@brief OverrideDelayModel methods.
-float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const {
+float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const {
     //First check to if there is an override delay value
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
-    t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_x, from_y);
-    t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_x, to_y);
+    t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type({from_x, from_y, layer_num});
+    t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type({to_x, to_y, layer_num});
 
     t_override override_key;
     override_key.from_type = from_type_ptr->index;
@@ -82,7 +86,7 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x,
         delay_val = override_iter->second;
     } else {
         //Fall back to the base delay model if no override was found
-        delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin);
+        delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin, layer_num);
     }
 
     return delay_val;
@@ -221,7 +225,7 @@ void DeltaDelayModel::read(const std::string& file) {
     //
     // The second argument should be of type Matrix<X>::Reader where X is the
     // capnproto element type.
-    ToNdMatrix<2, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat);
+    ToNdMatrix<3, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat);
 }
 
 void DeltaDelayModel::write(const std::string& file) const {
@@ -237,7 +241,7 @@ void DeltaDelayModel::write(const std::string& file) const {
     // Matrix message.  It is the mirror function of ToNdMatrix described in
     // read above.
     auto delay_values = model.getDelays();
-    FromNdMatrix<2, VprFloatEntry, float>(&delay_values, delays_, FromFloat);
+    FromNdMatrix<3, VprFloatEntry, float>(&delay_values, delays_, FromFloat);
 
     // writeMessageToFile writes message to the specified file.
     writeMessageToFile(file, &builder);
@@ -250,9 +254,9 @@ void OverrideDelayModel::read(const std::string& file) {
     ::capnp::ReaderOptions opts = default_large_capnp_opts();
     ::capnp::FlatArrayMessageReader reader(f.getData(), opts);
 
-    vtr::Matrix<float> delays;
+    vtr::NdMatrix<float, 3> delays;
     auto model = reader.getRoot<VprOverrideDelayModel>();
-    ToNdMatrix<2, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat);
+    ToNdMatrix<3, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat);
 
     base_delay_model_ = std::make_unique<DeltaDelayModel>(delays, is_flat_);
 
@@ -280,7 +284,7 @@ void OverrideDelayModel::write(const std::string& file) const {
     auto model = builder.initRoot<VprOverrideDelayModel>();
 
     auto delays = model.getDelays();
-    FromNdMatrix<2, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat);
+    FromNdMatrix<3, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat);
 
     // Non-scalar capnproto fields should be first initialized with
     // init<field  name>(count), and then accessed from the returned
@@ -344,6 +348,7 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste
         int source_y = place_ctx.block_locs[source_block].loc.y;
         int sink_x = place_ctx.block_locs[sink_block].loc.x;
         int sink_y = place_ctx.block_locs[sink_block].loc.y;
+        int sink_layer_num = place_ctx.block_locs[sink_block].loc.layer;
 
         /**
          * This heuristic only considers delta_x and delta_y, a much better
@@ -357,7 +362,8 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste
                                                   source_block_ipin,
                                                   sink_x,
                                                   sink_y,
-                                                  sink_block_ipin);
+                                                  sink_block_ipin,
+                                                  sink_layer_num);
         if (delay_source_to_sink < 0) {
             VPR_ERROR(VPR_ERROR_PLACE,
                       "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n"
diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h
index f8b9f72f1b8..09b6969c011 100644
--- a/vpr/src/place/place_delay_model.h
+++ b/vpr/src/place/place_delay_model.h
@@ -62,7 +62,7 @@ class PlaceDelayModel {
      *
      * Either compute or read methods must be invoked before invoking delay.
      */
-    virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const = 0;
+    virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const = 0;
 
     ///@brief Dumps the delay model to an echo file.
     virtual void dump_echo(std::string filename) const = 0;
@@ -87,7 +87,7 @@ class DeltaDelayModel : public PlaceDelayModel {
   public:
     DeltaDelayModel(bool is_flat)
         : is_flat_(is_flat) {}
-    DeltaDelayModel(vtr::Matrix<float> delta_delays, bool is_flat)
+    DeltaDelayModel(vtr::NdMatrix<float, 3> delta_delays, bool is_flat)
         : delays_(std::move(delta_delays))
         , is_flat_(is_flat) {}
 
@@ -96,17 +96,17 @@ class DeltaDelayModel : public PlaceDelayModel {
         const t_placer_opts& placer_opts,
         const t_router_opts& router_opts,
         int longest_length) override;
-    float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/) const override;
+    float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const override;
     void dump_echo(std::string filepath) const override;
 
     void read(const std::string& file) override;
     void write(const std::string& file) const override;
-    const vtr::Matrix<float>& delays() const {
+    const vtr::NdMatrix<float, 3>& delays() const {
         return delays_;
     }
 
   private:
-    vtr::Matrix<float> delays_;
+    vtr::NdMatrix<float, 3> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
     bool is_flat_;
 };
 
@@ -119,7 +119,9 @@ class OverrideDelayModel : public PlaceDelayModel {
         const t_placer_opts& placer_opts,
         const t_router_opts& router_opts,
         int longest_length) override;
-    float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const override;
+    // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the
+    // specified from and to pins
+    float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const override;
     void dump_echo(std::string filepath) const override;
 
     void read(const std::string& file) override;
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index f8afaeb1363..75ff2d2bf12 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -10,7 +10,7 @@
 #include "place_constraints.h"
 
 /* File-scope routines */
-static vtr::Matrix<t_grid_blocks> init_grid_blocks();
+static GridBlock init_grid_blocks();
 
 /**
  * @brief Initialize the placer's block-grid dual direction mapping.
@@ -38,16 +38,19 @@ void init_placement_context() {
  * The container at each grid block location should have a length equal to the
  * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
  */
-static vtr::Matrix<t_grid_blocks> init_grid_blocks() {
+static GridBlock init_grid_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
+    int num_layers = device_ctx.grid.get_num_layers();
 
     /* Structure should have the same dimensions as the grid. */
-    auto grid_blocks = vtr::Matrix<t_grid_blocks>({device_ctx.grid.width(), device_ctx.grid.height()});
+    auto grid_blocks = GridBlock(device_ctx.grid.width(), device_ctx.grid.height(), num_layers);
 
-    for (size_t x = 0; x < device_ctx.grid.width(); ++x) {
-        for (size_t y = 0; y < device_ctx.grid.height(); ++y) {
-            auto type = device_ctx.grid.get_physical_type(x, y);
-            grid_blocks[x][y].blocks.resize(type->capacity, EMPTY_BLOCK_ID);
+    for (int layer_num = 0; layer_num < num_layers; ++layer_num) {
+        for (int x = 0; x < (int)device_ctx.grid.width(); ++x) {
+            for (int y = 0; y < (int)device_ctx.grid.height(); ++y) {
+                auto type = device_ctx.grid.get_physical_type({x, y, layer_num});
+                grid_blocks.initialized_grid_block_at_location({x, y, layer_num}, type->capacity);
+            }
         }
     }
     return grid_blocks;
@@ -75,7 +78,8 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
                                      float first_t,
                                      float first_rlim,
                                      int first_move_lim,
-                                     float first_crit_exponent) {
+                                     float first_crit_exponent,
+                                     int num_laters) {
     num_temps = 0;
     alpha = annealing_sched.alpha_min;
     t = first_t;
@@ -91,6 +95,8 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
         move_lim = move_lim_max;
     }
 
+    NUM_LAYERS = num_laters;
+
     /* Store this inverse value for speed when updating crit_exponent. */
     INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM);
 
@@ -347,8 +353,9 @@ void load_grid_blocks_from_block_locs() {
         VTR_ASSERT(location.x < (int)device_ctx.grid.width());
         VTR_ASSERT(location.y < (int)device_ctx.grid.height());
 
-        place_ctx.grid_blocks[location.x][location.y].blocks[location.sub_tile] = blk_id;
-        place_ctx.grid_blocks[location.x][location.y].usage++;
+        place_ctx.grid_blocks.set_block_at_location(location, blk_id);
+        place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer},
+                                        place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1);
     }
 }
 
@@ -358,17 +365,19 @@ void zero_initialize_grid_blocks() {
 
     /* Initialize all occupancy to zero. */
 
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
-            place_ctx.grid_blocks[i][j].usage = 0;
-            auto tile = device_ctx.grid.get_physical_type(i, j);
+    for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+            for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
+                place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0);
+                auto tile = device_ctx.grid.get_physical_type({i, j, layer_num});
 
-            for (auto sub_tile : tile->sub_tiles) {
-                auto capacity = sub_tile.capacity;
+                for (auto sub_tile : tile->sub_tiles) {
+                    auto capacity = sub_tile.capacity;
 
-                for (int k = 0; k < capacity.total(); k++) {
-                    if (place_ctx.grid_blocks[i][j].blocks[k + capacity.low] != INVALID_BLOCK_ID) {
-                        place_ctx.grid_blocks[i][j].blocks[k + capacity.low] = EMPTY_BLOCK_ID;
+                    for (int k = 0; k < capacity.total(); k++) {
+                        if (place_ctx.grid_blocks.block_at_location({i, j, k + capacity.low, layer_num}) != INVALID_BLOCK_ID) {
+                            place_ctx.grid_blocks.set_block_at_location({i, j, k + capacity.low, layer_num}, EMPTY_BLOCK_ID);
+                        }
                     }
                 }
             }
@@ -398,27 +407,30 @@ void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vecto
     }
 
     //load the legal placement positions
-    for (size_t i = 0; i < device_ctx.grid.width(); i++) {
-        for (size_t j = 0; j < device_ctx.grid.height(); j++) {
-            auto tile = device_ctx.grid.get_physical_type(i, j);
-
-            for (auto sub_tile : tile->sub_tiles) {
-                auto capacity = sub_tile.capacity;
-
-                for (int k = 0; k < capacity.total(); k++) {
-                    if (place_ctx.grid_blocks[i][j].blocks[k + capacity.low] == INVALID_BLOCK_ID) {
-                        continue;
-                    }
-                    // If this is the anchor position of a block, add it to the legal_pos.
-                    // Otherwise don't, so large blocks aren't added multiple times.
-                    if (device_ctx.grid.get_width_offset(i, j) == 0 && device_ctx.grid.get_height_offset(i, j) == 0) {
-                        int itype = tile->index;
-                        int isub_tile = sub_tile.index;
-                        t_pl_loc temp_loc;
-                        temp_loc.x = i;
-                        temp_loc.y = j;
-                        temp_loc.sub_tile = k + capacity.low;
-                        legal_pos[itype][isub_tile].push_back(temp_loc);
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int i = 0; i < (int)device_ctx.grid.width(); i++) {
+            for (int j = 0; j < (int)device_ctx.grid.height(); j++) {
+                auto tile = device_ctx.grid.get_physical_type({i, j, layer_num});
+
+                for (const auto& sub_tile : tile->sub_tiles) {
+                    auto capacity = sub_tile.capacity;
+
+                    for (int k = 0; k < capacity.total(); k++) {
+                        if (place_ctx.grid_blocks.block_at_location({i, j, k + capacity.low, layer_num}) == INVALID_BLOCK_ID) {
+                            continue;
+                        }
+                        // If this is the anchor position of a block, add it to the legal_pos.
+                        // Otherwise don't, so large blocks aren't added multiple times.
+                        if (device_ctx.grid.get_width_offset({i, j, layer_num}) == 0 && device_ctx.grid.get_height_offset({i, j, layer_num}) == 0) {
+                            int itype = tile->index;
+                            int isub_tile = sub_tile.index;
+                            t_pl_loc temp_loc;
+                            temp_loc.x = i;
+                            temp_loc.y = j;
+                            temp_loc.sub_tile = k + capacity.low;
+                            temp_loc.layer = layer_num;
+                            legal_pos[itype][isub_tile].push_back(temp_loc);
+                        }
                     }
                 }
             }
@@ -442,12 +454,10 @@ void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) {
     }
 
     //Set the location of the block
-    place_ctx.block_locs[blk_id].loc.x = location.x;
-    place_ctx.block_locs[blk_id].loc.y = location.y;
-    place_ctx.block_locs[blk_id].loc.sub_tile = location.sub_tile;
+    place_ctx.block_locs[blk_id].loc = location;
 
     //Check if block is at an illegal location
-    auto physical_tile = device_ctx.grid.get_physical_type(location.x, location.y);
+    auto physical_tile = device_ctx.grid.get_physical_type({location.x, location.y, location.layer});
     auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
 
     if (location.sub_tile >= physical_tile->capacity || location.sub_tile < 0) {
@@ -455,13 +465,18 @@ void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) {
     }
 
     if (!is_sub_tile_compatible(physical_tile, logical_block, place_ctx.block_locs[blk_id].loc.sub_tile)) {
-        VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d, %d). \n", block_name.c_str(), blk_id, location.x, location.y);
+        VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d,%d,%d). \n",
+                  block_name.c_str(),
+                  blk_id,
+                  location.x,
+                  location.y,
+                  location.layer);
     }
 
     //Mark the grid location and usage of the block
-    place_ctx.grid_blocks[location.x][location.y].blocks[location.sub_tile] = blk_id;
-    place_ctx.grid_blocks[location.x][location.y].usage++;
-
+    place_ctx.grid_blocks.set_block_at_location(location, blk_id);
+    place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer},
+                                    place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1);
     place_sync_external_block_connections(blk_id);
 }
 
@@ -482,7 +497,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
         t_pl_loc member_pos = head_pos + pl_macro.members[imember].offset;
 
         //Check that the member location is on the grid
-        if (!is_loc_on_chip(member_pos.x, member_pos.y)) {
+        if (!is_loc_on_chip({member_pos.x, member_pos.y, member_pos.layer})) {
             mac_can_be_placed = false;
             break;
         }
@@ -519,8 +534,8 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
         // Then check whether the location could still accommodate more blocks
         // Also check whether the member position is valid, and the member_z is allowed at that location on the grid
         if (member_pos.x < int(device_ctx.grid.width()) && member_pos.y < int(device_ctx.grid.height())
-            && is_tile_compatible(device_ctx.grid.get_physical_type(member_pos.x, member_pos.y), block_type)
-            && place_ctx.grid_blocks[member_pos.x][member_pos.y].blocks[member_pos.sub_tile] == EMPTY_BLOCK_ID) {
+            && is_tile_compatible(device_ctx.grid.get_physical_type({member_pos.x, member_pos.y, member_pos.layer}), block_type)
+            && place_ctx.grid_blocks.block_at_location(member_pos) == EMPTY_BLOCK_ID) {
             // Can still accommodate blocks here, check the next position
             continue;
         } else {
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 06373920bb9..cc903cf4f71 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -133,13 +133,15 @@ class t_annealing_state {
     float UPPER_RLIM;
     float FINAL_RLIM = 1.;
     float INVERSE_DELTA_RLIM;
+    int NUM_LAYERS = 1;
 
   public: //Constructor
     t_annealing_state(const t_annealing_sched& annealing_sched,
                       float first_t,
                       float first_rlim,
                       int first_move_lim,
-                      float first_crit_exponent);
+                      float first_crit_exponent,
+                      int num_layers);
 
   public: //Mutator
     bool outer_loop_update(float success_rate,
@@ -229,10 +231,13 @@ void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vecto
 void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location);
 
 /// @brief check if a specified location is within the device grid
-inline bool is_loc_on_chip(int x, int y) {
-    auto& device_ctx = g_vpr_ctx.device();
+inline bool is_loc_on_chip(t_physical_tile_loc loc) {
+    const auto& grid = g_vpr_ctx.device().grid;
+    int x = loc.x;
+    int y = loc.y;
+    int layer_num = loc.layer_num;
     //return false if the location is not within the chip
-    return (x >= 0 && x < int(device_ctx.grid.width()) && y >= 0 && y < int(device_ctx.grid.height()));
+    return (layer_num >= 0 && layer_num < int(grid.get_num_layers()) && x >= 0 && x < int(grid.width()) && y >= 0 && y < int(grid.height()));
 }
 
 /**
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index 55667ecb8d0..74682d220f3 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -69,6 +69,7 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts,
 
 static float route_connection_delay(
     RouterDelayProfiler& route_profiler,
+    int layer_num,
     int source_x_loc,
     int source_y_loc,
     int sink_x_loc,
@@ -86,6 +87,7 @@ typedef std::function<void(
     int,
     int,
     int,
+    int,
     const t_router_opts&,
     bool,
     const std::set<std::string>&,
@@ -95,6 +97,7 @@ typedef std::function<void(
 static void generic_compute_matrix_iterative_astar(
     RouterDelayProfiler& route_profiler,
     vtr::Matrix<std::vector<float>>& matrix,
+    int layer_num,
     int source_x,
     int source_y,
     int start_x,
@@ -109,6 +112,7 @@ static void generic_compute_matrix_iterative_astar(
 static void generic_compute_matrix_dijkstra_expansion(
     RouterDelayProfiler& route_profiler,
     vtr::Matrix<std::vector<float>>& matrix,
+    int layer_num,
     int source_x,
     int source_y,
     int start_x,
@@ -120,7 +124,7 @@ static void generic_compute_matrix_dijkstra_expansion(
     const std::set<std::string>& allowed_types,
     bool is_flat);
 
-static vtr::Matrix<float> compute_delta_delays(
+static vtr::NdMatrix<float, 3> compute_delta_delays(
     RouterDelayProfiler& route_profiler,
     const t_placer_opts& palcer_opts,
     const t_router_opts& router_opts,
@@ -130,7 +134,7 @@ static vtr::Matrix<float> compute_delta_delays(
 
 float delay_reduce(std::vector<float>& delays, e_reducer reducer);
 
-static vtr::Matrix<float> compute_delta_delay_model(
+static vtr::NdMatrix<float, 3> compute_delta_delay_model(
     RouterDelayProfiler& route_profiler,
     const t_placer_opts& placer_opts,
     const t_router_opts& router_opts,
@@ -148,14 +152,14 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
                                                  int* src_rr,
                                                  int* sink_rr);
 
-static bool verify_delta_delays(const vtr::Matrix<float>& delta_delays);
+static bool verify_delta_delays(const vtr::NdMatrix<float, 3>& delta_delays);
 
 static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf);
 
-static void fix_empty_coordinates(vtr::Matrix<float>& delta_delays);
-static void fix_uninitialized_coordinates(vtr::Matrix<float>& delta_delays);
+static void fix_empty_coordinates(vtr::NdMatrix<float, 3>& delta_delays);
+static void fix_uninitialized_coordinates(vtr::NdMatrix<float, 3>& delta_delays);
 
-static float find_neightboring_average(vtr::Matrix<float>& matrix, int x, int y, int max_distance);
+static float find_neightboring_average(vtr::NdMatrix<float, 3>& matrix, t_physical_tile_loc tile_loc, int max_distance);
 
 /******* Globally Accessible Functions **********/
 
@@ -174,7 +178,6 @@ std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&
 
     t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist);
 
-    //TODO: is_flat flag should not be set here - It should be passed to the function.
     alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf,
                           directs, num_directs, is_flat);
 
@@ -345,6 +348,7 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts,
 
 static float route_connection_delay(
     RouterDelayProfiler& route_profiler,
+    int layer_num,
     int source_x,
     int source_y,
     int sink_x,
@@ -360,20 +364,18 @@ static float route_connection_delay(
     bool successfully_routed = false;
 
     //Get the rr nodes to route between
-    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type(source_x, source_y));
-    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type(sink_x, sink_y));
+    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num}));
+    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}));
 
     for (int driver_ptc : best_driver_ptcs) {
         VTR_ASSERT(driver_ptc != OPEN);
-
-        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_x, source_y, SOURCE, driver_ptc);
+        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc);
 
         VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
 
         for (int sink_ptc : best_sink_ptcs) {
             VTR_ASSERT(sink_ptc != OPEN);
-
-            RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc);
+            RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc);
 
             VTR_ASSERT(sink_rr_node != RRNodeId::INVALID());
 
@@ -395,8 +397,8 @@ static float route_connection_delay(
     }
 
     if (!successfully_routed) {
-        VTR_LOG_WARN("Unable to route between blocks at (%d,%d) and (%d,%d) to characterize delay (setting to %g)\n",
-                     source_x, source_y, sink_x, sink_y, net_delay_value);
+        VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n",
+                     layer_num, source_x, source_y, layer_num, sink_x, sink_y, net_delay_value);
     }
 
     return (net_delay_value);
@@ -419,6 +421,7 @@ static void add_delay_to_matrix(
 static void generic_compute_matrix_dijkstra_expansion(
     RouterDelayProfiler& /*route_profiler*/,
     vtr::Matrix<std::vector<float>>& matrix,
+    int layer_num,
     int source_x,
     int source_y,
     int start_x,
@@ -431,7 +434,7 @@ static void generic_compute_matrix_dijkstra_expansion(
     bool is_flat) {
     auto& device_ctx = g_vpr_ctx.device();
 
-    t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type(source_x, source_y);
+    t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num});
     bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
     if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) {
         for (int sink_x = start_x; sink_x <= end_x; sink_x++) {
@@ -458,10 +461,10 @@ static void generic_compute_matrix_dijkstra_expansion(
 
     vtr::Matrix<bool> found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false);
 
-    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type(source_x, source_y));
+    auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num}));
     for (int driver_ptc : best_driver_ptcs) {
         VTR_ASSERT(driver_ptc != OPEN);
-        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_x, source_y, SOURCE, driver_ptc);
+        RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc);
 
         VTR_ASSERT(source_rr_node != RRNodeId::INVALID());
         auto delays = calculate_all_path_delays_from_rr_node(size_t(source_rr_node),
@@ -478,7 +481,7 @@ static void generic_compute_matrix_dijkstra_expansion(
                     continue;
                 }
 
-                t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type(sink_x, sink_y);
+                t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num});
                 if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
                     if (matrix[delta_x][delta_y].empty()) {
                         //Only set empty target if we don't already have a valid delta delay
@@ -494,11 +497,10 @@ static void generic_compute_matrix_dijkstra_expansion(
                     }
                 } else {
                     bool found_a_sink = false;
-                    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type(sink_x, sink_y));
+                    auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}));
                     for (int sink_ptc : best_sink_ptcs) {
                         VTR_ASSERT(sink_ptc != OPEN);
-
-                        RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc);
+                        RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc);
 
                         VTR_ASSERT(sink_rr_node != RRNodeId::INVALID());
 
@@ -555,6 +557,7 @@ static void generic_compute_matrix_dijkstra_expansion(
 static void generic_compute_matrix_iterative_astar(
     RouterDelayProfiler& route_profiler,
     vtr::Matrix<std::vector<float>>& matrix,
+    int layer_num,
     int source_x,
     int source_y,
     int start_x,
@@ -577,8 +580,8 @@ static void generic_compute_matrix_iterative_astar(
             delta_x = abs(sink_x - source_x);
             delta_y = abs(sink_y - source_y);
 
-            t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type(source_x, source_y);
-            t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type(sink_x, sink_y);
+            t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num});
+            t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num});
 
             bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE
                                         || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE);
@@ -600,7 +603,7 @@ static void generic_compute_matrix_iterative_astar(
             } else {
                 //Valid start/end
 
-                float delay = route_connection_delay(route_profiler, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect);
+                float delay = route_connection_delay(route_profiler, layer_num, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect);
 
 #ifdef VERBOSE
                 VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
@@ -621,7 +624,7 @@ static void generic_compute_matrix_iterative_astar(
     }
 }
 
-static vtr::Matrix<float> compute_delta_delays(
+static vtr::NdMatrix<float, 3> compute_delta_delays(
     RouterDelayProfiler& route_profiler,
     const t_placer_opts& placer_opts,
     const t_router_opts& router_opts,
@@ -635,187 +638,196 @@ static vtr::Matrix<float> compute_delta_delays(
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
-    vtr::Matrix<std::vector<float>> sampled_delta_delays({grid.width(), grid.height()});
+    vtr::NdMatrix<float, 3> delta_delays({static_cast<unsigned long>(grid.get_num_layers()), grid.width(), grid.height()});
 
-    size_t mid_x = vtr::nint(grid.width() / 2);
-    size_t mid_y = vtr::nint(grid.height() / 2);
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
+        vtr::Matrix<std::vector<float>> sampled_delta_delays({grid.width(), grid.height()});
 
-    size_t low_x = std::min(longest_length, mid_x);
-    size_t low_y = std::min(longest_length, mid_y);
-    size_t high_x = mid_x;
-    size_t high_y = mid_y;
-    if (longest_length <= grid.width()) {
-        high_x = std::max(grid.width() - longest_length, mid_x);
-    }
-    if (longest_length <= grid.height()) {
-        high_y = std::max(grid.height() - longest_length, mid_y);
-    }
+        size_t mid_x = vtr::nint(grid.width() / 2);
+        size_t mid_y = vtr::nint(grid.height() / 2);
 
-    std::set<std::string> allowed_types;
-    if (!placer_opts.allowed_tiles_for_delay_model.empty()) {
-        auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ",");
-        for (const auto& type : allowed_types_vector) {
-            allowed_types.insert(type);
+        size_t low_x = std::min(longest_length, mid_x);
+        size_t low_y = std::min(longest_length, mid_y);
+        size_t high_x = mid_x;
+        size_t high_y = mid_y;
+        if (longest_length <= grid.width()) {
+            high_x = std::max(grid.width() - longest_length, mid_x);
+        }
+        if (longest_length <= grid.height()) {
+            high_y = std::max(grid.height() - longest_length, mid_y);
         }
-    }
 
-    //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    //   +                 |                       |               +
-    //   +        A        |           B           |       C       +
-    //   +                 |                       |               +
-    //   +-----------------\-----------------------.---------------+
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +        D        |           E           |       F       +
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +                 |                       |               +
-    //   +-----------------*-----------------------/---------------+
-    //   +                 |                       |               +
-    //   +        G        |           H           |       I       +
-    //   +                 |                       |               +
-    //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-    //
-    //   * = (low_x, low_y)
-    //   . = (high_x, high_y)
-    //   / = (high_x, low_y)
-    //   \ = (low_x, high_y)
-    //   + = device edge
-
-    //Find the lowest y location on the left edge with a non-empty block
-    size_t y = 0;
-    size_t x = 0;
-    t_physical_tile_type_ptr src_type = nullptr;
-    for (x = 0; x < grid.width(); ++x) {
-        for (y = 0; y < grid.height(); ++y) {
-            auto type = grid.get_physical_type(x, y);
-
-            if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
-                    continue;
+        std::set<std::string> allowed_types;
+        if (!placer_opts.allowed_tiles_for_delay_model.empty()) {
+            auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ",");
+            for (const auto& type : allowed_types_vector) {
+                allowed_types.insert(type);
+            }
+        }
+
+        //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+        //   +                 |                       |               +
+        //   +        A        |           B           |       C       +
+        //   +                 |                       |               +
+        //   +-----------------\-----------------------.---------------+
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +        D        |           E           |       F       +
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +                 |                       |               +
+        //   +-----------------*-----------------------/---------------+
+        //   +                 |                       |               +
+        //   +        G        |           H           |       I       +
+        //   +                 |                       |               +
+        //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+        //
+        //   * = (low_x, low_y)
+        //   . = (high_x, high_y)
+        //   / = (high_x, low_y)
+        //   \ = (low_x, high_y)
+        //   + = device edge
+
+        //Find the lowest y location on the left edge with a non-empty block
+        int y = 0;
+        int x = 0;
+        t_physical_tile_type_ptr src_type = nullptr;
+        for (x = 0; x < (int)grid.width(); ++x) {
+            for (y = 0; y < (int)grid.height(); ++y) {
+                auto type = grid.get_physical_type({x, y, layer_num});
+
+                if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                    if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
+                        continue;
+                    }
+                    src_type = type;
+                    break;
                 }
-                src_type = type;
+            }
+            if (src_type) {
                 break;
             }
         }
-        if (src_type) {
-            break;
-        }
-    }
-    VTR_ASSERT(src_type != nullptr);
+        VTR_ASSERT(src_type != nullptr);
 
-    t_compute_delta_delay_matrix generic_compute_matrix;
-    switch (placer_opts.place_delta_delay_matrix_calculation_method) {
-        case e_place_delta_delay_algorithm::ASTAR_ROUTE:
-            generic_compute_matrix = generic_compute_matrix_iterative_astar;
-            break;
-        case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION:
-            generic_compute_matrix = generic_compute_matrix_dijkstra_expansion;
-            break;
-        default:
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method);
-    }
+        t_compute_delta_delay_matrix generic_compute_matrix;
+        switch (placer_opts.place_delta_delay_matrix_calculation_method) {
+            case e_place_delta_delay_algorithm::ASTAR_ROUTE:
+                generic_compute_matrix = generic_compute_matrix_iterative_astar;
+                break;
+            case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION:
+                generic_compute_matrix = generic_compute_matrix_dijkstra_expansion;
+                break;
+            default:
+                VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method);
+        }
 
 #ifdef VERBOSE
-    VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y);
+        VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y);
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           x, y,
-                           x, y,
-                           grid.width() - 1, grid.height() - 1,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    //Find the lowest x location on the bottom edge with a non-empty block
-    src_type = nullptr;
-    for (y = 0; y < grid.height(); ++y) {
-        for (x = 0; x < grid.width(); ++x) {
-            auto type = grid.get_physical_type(x, y);
-
-            if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
-                if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
-                    continue;
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               x, y,
+                               x, y,
+                               grid.width() - 1, grid.height() - 1,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        //Find the lowest x location on the bottom edge with a non-empty block
+        src_type = nullptr;
+        for (y = 0; y < (int)grid.height(); ++y) {
+            for (x = 0; x < (int)grid.width(); ++x) {
+                auto type = grid.get_physical_type({x, y, layer_num});
+
+                if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) {
+                    if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
+                        continue;
+                    }
+                    src_type = type;
+                    break;
                 }
-                src_type = type;
+            }
+            if (src_type) {
                 break;
             }
         }
-        if (src_type) {
-            break;
-        }
-    }
-    VTR_ASSERT(src_type != nullptr);
+        VTR_ASSERT(src_type != nullptr);
 #ifdef VERBOSE
-    VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y);
+        VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y);
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           x, y,
-                           x, y,
-                           grid.width() - 1, grid.height() - 1,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    //Since the other delta delay values may have suffered from edge effects,
-    //we recalculate deltas within regions B, C, E, F
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               x, y,
+                               x, y,
+                               grid.width() - 1, grid.height() - 1,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        //Since the other delta delay values may have suffered from edge effects,
+        //we recalculate deltas within regions B, C, E, F
 #ifdef VERBOSE
-    VTR_LOG("Computing from low/low:\n");
+        VTR_LOG("Computing from low/low:\n");
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           low_x, low_y,
-                           low_x, low_y,
-                           grid.width() - 1, grid.height() - 1,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    //Since the other delta delay values may have suffered from edge effects,
-    //we recalculate deltas within regions D, E, G, H
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               low_x, low_y,
+                               low_x, low_y,
+                               grid.width() - 1, grid.height() - 1,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        //Since the other delta delay values may have suffered from edge effects,
+        //we recalculate deltas within regions D, E, G, H
 #ifdef VERBOSE
-    VTR_LOG("Computing from high/high:\n");
+        VTR_LOG("Computing from high/high:\n");
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           high_x, high_y,
-                           0, 0,
-                           high_x, high_y,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    //Since the other delta delay values may have suffered from edge effects,
-    //we recalculate deltas within regions A, B, D, E
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               high_x, high_y,
+                               0, 0,
+                               high_x, high_y,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        //Since the other delta delay values may have suffered from edge effects,
+        //we recalculate deltas within regions A, B, D, E
 #ifdef VERBOSE
-    VTR_LOG("Computing from high/low:\n");
+        VTR_LOG("Computing from high/low:\n");
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           high_x, low_y,
-                           0, low_y,
-                           high_x, grid.height() - 1,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    //Since the other delta delay values may have suffered from edge effects,
-    //we recalculate deltas within regions E, F, H, I
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               high_x, low_y,
+                               0, low_y,
+                               high_x, grid.height() - 1,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        //Since the other delta delay values may have suffered from edge effects,
+        //we recalculate deltas within regions E, F, H, I
 #ifdef VERBOSE
-    VTR_LOG("Computing from low/high:\n");
+        VTR_LOG("Computing from low/high:\n");
 #endif
-    generic_compute_matrix(route_profiler, sampled_delta_delays,
-                           low_x, high_y,
-                           low_x, 0,
-                           grid.width() - 1, high_y,
-                           router_opts,
-                           measure_directconnect, allowed_types,
-                           is_flat);
-
-    vtr::Matrix<float> delta_delays({grid.width(), grid.height()});
-    for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {
-        for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) {
-            delta_delays[dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer);
+        generic_compute_matrix(route_profiler, sampled_delta_delays,
+                               layer_num,
+                               low_x, high_y,
+                               low_x, 0,
+                               grid.width() - 1, high_y,
+                               router_opts,
+                               measure_directconnect, allowed_types,
+                               is_flat);
+
+        for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {
+            for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) {
+                delta_delays[layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer);
+            }
         }
     }
 
@@ -862,17 +874,20 @@ float delay_reduce(std::vector<float>& delays, e_reducer reducer) {
  * we return IMPOSSIBLE_DELTA.
  */
 static float find_neightboring_average(
-    vtr::Matrix<float>& matrix,
-    int x,
-    int y,
+    vtr::NdMatrix<float, 3>& matrix,
+    t_physical_tile_loc tile_loc,
     int max_distance) {
     float sum = 0;
     int counter = 0;
-    int endx = matrix.end_index(0);
-    int endy = matrix.end_index(1);
+    int endx = matrix.end_index(1);
+    int endy = matrix.end_index(2);
 
     int delx, dely;
 
+    int x = tile_loc.x;
+    int y = tile_loc.y;
+    int layer_num = tile_loc.layer_num;
+
     for (int distance = 1; distance <= max_distance; ++distance) {
         for (delx = x - distance; delx <= x + distance; delx++) {
             for (dely = y - distance; dely <= y + distance; dely++) {
@@ -886,11 +901,11 @@ static float find_neightboring_average(
                     continue;
                 }
 
-                if (matrix[delx][dely] == EMPTY_DELTA || matrix[delx][dely] == IMPOSSIBLE_DELTA) {
+                if (matrix[layer_num][delx][dely] == EMPTY_DELTA || matrix[layer_num][delx][dely] == IMPOSSIBLE_DELTA) {
                     continue;
                 }
                 counter++;
-                sum += matrix[delx][dely];
+                sum += matrix[layer_num][delx][dely];
             }
         }
         if (counter != 0) {
@@ -901,7 +916,7 @@ static float find_neightboring_average(
     return IMPOSSIBLE_DELTA;
 }
 
-static void fix_empty_coordinates(vtr::Matrix<float>& delta_delays) {
+static void fix_empty_coordinates(vtr::NdMatrix<float, 3>& delta_delays) {
     // Set any empty delta's to the average of it's neighbours
     //
     // Empty coordinates may occur if the sampling location happens to not have
@@ -909,27 +924,32 @@ static void fix_empty_coordinates(vtr::Matrix<float>& delta_delays) {
     // would return a result, so we fill in the empty holes with a small
     // neighbour average.
     constexpr int kMaxAverageDistance = 2;
-    for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
-        for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
-            if (delta_delays[delta_x][delta_y] == EMPTY_DELTA) {
-                delta_delays[delta_x][delta_y] = find_neightboring_average(delta_delays, delta_x, delta_y, kMaxAverageDistance);
+    for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) {
+        for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) {
+            for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) {
+                if (delta_delays[layer_num][delta_x][delta_y] == EMPTY_DELTA) {
+                    delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average(delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance);
+                }
             }
         }
     }
 }
 
-static void fix_uninitialized_coordinates(vtr::Matrix<float>& delta_delays) {
+static void fix_uninitialized_coordinates(vtr::NdMatrix<float, 3>& delta_delays) {
     // Set any empty delta's to the average of it's neighbours
-    for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
-        for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
-            if (delta_delays[delta_x][delta_y] == UNINITIALIZED_DELTA) {
-                delta_delays[delta_x][delta_y] = IMPOSSIBLE_DELTA;
+
+    for (size_t layer_num = 0; layer_num < delta_delays.dim_size(0); ++layer_num) {
+        for (size_t delta_x = 0; delta_x < delta_delays.dim_size(1); ++delta_x) {
+            for (size_t delta_y = 0; delta_y < delta_delays.dim_size(2); ++delta_y) {
+                if (delta_delays[layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) {
+                    delta_delays[layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA;
+                }
             }
         }
     }
 }
 
-static void fill_impossible_coordinates(vtr::Matrix<float>& delta_delays) {
+static void fill_impossible_coordinates(vtr::NdMatrix<float, 3>& delta_delays) {
     // Set any impossible delta's to the average of it's neighbours
     //
     // Impossible coordinates may occur if an IPIN cannot be reached from the
@@ -942,17 +962,19 @@ static void fill_impossible_coordinates(vtr::Matrix<float>& delta_delays) {
     // filling these gaps.  It is more important to have a poor predication,
     // than a invalid value and causing a slack assertion.
     constexpr int kMaxAverageDistance = 5;
-    for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
-        for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
-            if (delta_delays[delta_x][delta_y] == IMPOSSIBLE_DELTA) {
-                delta_delays[delta_x][delta_y] = find_neightboring_average(
-                    delta_delays, delta_x, delta_y, kMaxAverageDistance);
+    for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) {
+        for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) {
+            for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) {
+                if (delta_delays[layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) {
+                    delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average(
+                        delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance);
+                }
             }
         }
     }
 }
 
-static vtr::Matrix<float> compute_delta_delay_model(
+static vtr::NdMatrix<float, 3> compute_delta_delay_model(
     RouterDelayProfiler& route_profiler,
     const t_placer_opts& placer_opts,
     const t_router_opts& router_opts,
@@ -960,12 +982,12 @@ static vtr::Matrix<float> compute_delta_delay_model(
     int longest_length,
     bool is_flat) {
     vtr::ScopedStartFinishTimer timer("Computing delta delays");
-    vtr::Matrix<float> delta_delays = compute_delta_delays(route_profiler,
-                                                           placer_opts,
-                                                           router_opts,
-                                                           measure_directconnect,
-                                                           longest_length,
-                                                           is_flat);
+    vtr::NdMatrix<float, 3> delta_delays = compute_delta_delays(route_profiler,
+                                                                placer_opts,
+                                                                router_opts,
+                                                                measure_directconnect,
+                                                                longest_length,
+                                                                is_flat);
 
     fix_uninitialized_coordinates(delta_delays);
 
@@ -997,54 +1019,63 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
 
     //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets,
     //and which has the appropriate pins
-    int from_x = 0, from_y = 0, from_sub_tile = 0;
+    int from_x = -1;
+    int from_y = -1;
+    int from_sub_tile = -1;
     int to_x = 0, to_y = 0, to_sub_tile = 0;
     bool found = false;
-    for (from_x = 0; from_x < (int)grid.width(); ++from_x) {
-        to_x = from_x + direct->x_offset;
-        if (to_x < 0 || to_x >= (int)grid.width()) continue;
-
-        for (from_y = 0; from_y < (int)grid.height(); ++from_y) {
-            if (grid.get_physical_type(from_x, from_y) != from_type) continue;
-
-            //Check that the from pin exists at this from location
-            //(with multi-width/height blocks pins may not exist at all locations)
-            bool from_pin_found = false;
-            if (direct->from_side != NUM_SIDES) {
-                RRNodeId from_pin_rr = node_lookup.find_node(from_x, from_y, OPIN, from_pin, direct->from_side);
-                from_pin_found = (from_pin_rr != RRNodeId::INVALID());
-            } else {
-                from_pin_found = !(node_lookup.find_nodes_at_all_sides(from_x, from_y, OPIN, from_pin).empty());
-            }
-            if (!from_pin_found) continue;
+    int found_layer_num = -1;
+    //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums
+    for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) {
+        for (int x = 0; x < (int)grid.width() && !found; ++x) {
+            to_x = x + direct->x_offset;
+            if (to_x < 0 || to_x >= (int)grid.width()) continue;
+
+            for (int y = 0; y < (int)grid.height() && !found; ++y) {
+                if (grid.get_physical_type({x, y, layer_num}) != from_type) continue;
+
+                //Check that the from pin exists at this from location
+                //(with multi-width/height blocks pins may not exist at all locations)
+                bool from_pin_found = false;
+                if (direct->from_side != NUM_SIDES) {
+                    RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side);
+                    from_pin_found = (from_pin_rr != RRNodeId::INVALID());
+                } else {
+                    from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty());
+                }
+                if (!from_pin_found) continue;
 
-            to_y = from_y + direct->y_offset;
+                to_y = y + direct->y_offset;
 
-            if (to_y < 0 || to_y >= (int)grid.height()) continue;
-            if (grid.get_physical_type(to_x, to_y) != to_type) continue;
+                if (to_y < 0 || to_y >= (int)grid.height()) continue;
+                if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue;
 
-            //Check that the from pin exists at this from location
-            //(with multi-width/height blocks pins may not exist at all locations)
-            bool to_pin_found = false;
-            if (direct->to_side != NUM_SIDES) {
-                RRNodeId to_pin_rr = node_lookup.find_node(to_x, to_y, IPIN, to_pin, direct->to_side);
-                to_pin_found = (to_pin_rr != RRNodeId::INVALID());
-            } else {
-                to_pin_found = !(node_lookup.find_nodes_at_all_sides(to_x, to_y, IPIN, to_pin).empty());
-            }
-            if (!to_pin_found) continue;
+                //Check that the from pin exists at this from location
+                //(with multi-width/height blocks pins may not exist at all locations)
+                bool to_pin_found = false;
+                if (direct->to_side != NUM_SIDES) {
+                    RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side);
+                    to_pin_found = (to_pin_rr != RRNodeId::INVALID());
+                } else {
+                    to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty());
+                }
+                if (!to_pin_found) continue;
 
-            for (from_sub_tile = 0; from_sub_tile < from_type->capacity; ++from_sub_tile) {
-                to_sub_tile = from_sub_tile + direct->sub_tile_offset;
+                for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) {
+                    to_sub_tile = sub_tile_num + direct->sub_tile_offset;
 
-                if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue;
+                    if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue;
 
-                found = true;
-                break;
+                    found = true;
+                    found_layer_num = layer_num;
+                    from_x = x;
+                    from_y = y;
+                    from_sub_tile = sub_tile_num;
+
+                    break;
+                }
             }
-            if (found) break;
         }
-        if (found) break;
     }
 
     if (!found) {
@@ -1052,10 +1083,10 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
     }
 
     //Now have a legal instance of this direct connect
-    VTR_ASSERT(grid.get_physical_type(from_x, from_y) == from_type);
+    VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type);
     VTR_ASSERT(from_sub_tile < from_type->capacity);
 
-    VTR_ASSERT(grid.get_physical_type(to_x, to_y) == to_type);
+    VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type);
     VTR_ASSERT(to_sub_tile < to_type->capacity);
 
     VTR_ASSERT(from_x + direct->x_offset == to_x);
@@ -1067,13 +1098,13 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
     //
 
     {
-        RRNodeId src_rr_candidate = node_lookup.find_node(from_x, from_y, SOURCE, from_pin_class);
+        RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class);
         VTR_ASSERT(src_rr_candidate);
         *src_rr = size_t(src_rr_candidate);
     }
 
     {
-        RRNodeId sink_rr_candidate = node_lookup.find_node(to_x, to_y, SINK, to_pin_class);
+        RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class);
         VTR_ASSERT(sink_rr_candidate);
         *sink_rr = size_t(sink_rr_candidate);
     }
@@ -1081,18 +1112,20 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
     return true;
 }
 
-static bool verify_delta_delays(const vtr::Matrix<float>& delta_delays) {
+static bool verify_delta_delays(const vtr::NdMatrix<float, 3>& delta_delays) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
-    for (size_t x = 0; x < grid.width(); ++x) {
-        for (size_t y = 0; y < grid.height(); ++y) {
-            float delta_delay = delta_delays[x][y];
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) {
+        for (size_t x = 0; x < grid.width(); ++x) {
+            for (size_t y = 0; y < grid.height(); ++y) {
+                float delta_delay = delta_delays[layer_num][x][y];
 
-            if (delta_delay < 0.) {
-                VPR_ERROR(VPR_ERROR_PLACE,
-                          "Found invaild negative delay %g for delta (%d,%d)",
-                          delta_delay, x, y);
+                if (delta_delay < 0.) {
+                    VPR_ERROR(VPR_ERROR_PLACE,
+                              "Found invaild negative delay %g for delta (%d,%d)",
+                              delta_delay, x, y);
+                }
             }
         }
     }
diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp
index cd75492eb71..c979295e4f0 100644
--- a/vpr/src/place/uniform_move_generator.cpp
+++ b/vpr/src/place/uniform_move_generator.cpp
@@ -16,7 +16,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_pl_loc to;
@@ -27,9 +27,10 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks
 
 #if 0
     auto& grid = g_vpr_ctx.device().grid;
-	VTR_LOG( "swap [%d][%d][%d] %s block %zu \"%s\" <=> [%d][%d][%d] %s block ",
-		from.x, from.y, from.sub_tile, grid[from.x][from.y].type->name, size_t(b_from), (b_from ? cluster_ctx.clb_nlist.block_name(b_from).c_str() : ""),
-		to.x, to.y, to.sub_tile, grid[to.x][to.y].type->name);
+    const auto& grid_to_type = grid.get_physical_type(to.x, to.y, to.layer);
+	VTR_LOG( "swap [%d][%d][%d][%d] %s block %zu \"%s\" <=> [%d][%d][%d][%d] %s block ",
+		from.x, from.y, from.sub_tile,from.layer, grid_from_type->name, size_t(b_from), (b_from ? cluster_ctx.clb_nlist.block_name(b_from).c_str() : ""),
+		to.x, to.y, to.sub_tile, to.layer, grid_to_type->name);
     if (b_to) {
         VTR_LOG("%zu \"%s\"", size_t(b_to), cluster_ctx.clb_nlist.block_name(b_to).c_str());
     } else {
diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp
index 00144f41c9a..4e968680cba 100644
--- a/vpr/src/place/weighted_centroid_move_generator.cpp
+++ b/vpr/src/place/weighted_centroid_move_generator.cpp
@@ -20,7 +20,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     t_range_limiters range_limiters;
diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp
index fbad2be2413..a5e59fec044 100644
--- a/vpr/src/place/weighted_median_move_generator.cpp
+++ b/vpr/src/place/weighted_median_move_generator.cpp
@@ -23,7 +23,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
 
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
-    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y);
+    auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer});
     VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type));
 
     /* Calculate the Edge weighted median region */
@@ -99,6 +99,8 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved&
     t_pl_loc w_median_point;
     w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2;
     w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2;
+    // TODO: Currently, we don't move blocks between different types of layers
+    w_median_point.layer = from.layer;
     if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) {
         return e_create_move::ABORT;
     }
diff --git a/vpr/src/power/power.cpp b/vpr/src/power/power.cpp
index 7591d2f183c..94d55479580 100644
--- a/vpr/src/power/power.cpp
+++ b/vpr/src/power/power.cpp
@@ -609,34 +609,36 @@ static void power_usage_blocks(t_power_usage* power_usage) {
     t_logical_block_type_ptr logical_block;
 
     /* Loop through all grid locations */
-    for (size_t x = 0; x < device_ctx.grid.width(); x++) {
-        for (size_t y = 0; y < device_ctx.grid.height(); y++) {
-            auto physical_tile = device_ctx.grid.get_physical_type(x, y);
-            int width_offset = device_ctx.grid.get_width_offset(x, y);
-            int height_offset = device_ctx.grid.get_height_offset(x, y);
-
-            if ((width_offset != 0)
-                || (height_offset != 0)
-                || is_empty_type(physical_tile)) {
-                continue;
-            }
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (int x = 0; x < (int)device_ctx.grid.width(); x++) {
+            for (int y = 0; y < (int)device_ctx.grid.height(); y++) {
+                auto physical_tile = device_ctx.grid.get_physical_type({x, y, layer_num});
+                int width_offset = device_ctx.grid.get_width_offset({x, y, layer_num});
+                int height_offset = device_ctx.grid.get_height_offset({x, y, layer_num});
+
+                if ((width_offset != 0)
+                    || (height_offset != 0)
+                    || is_empty_type(physical_tile)) {
+                    continue;
+                }
 
-            for (int z = 0; z < physical_tile->capacity; z++) {
-                t_pb* pb = nullptr;
-                t_power_usage pb_power;
+                for (int z = 0; z < physical_tile->capacity; z++) {
+                    t_pb* pb = nullptr;
+                    t_power_usage pb_power;
 
-                ClusterBlockId iblk = place_ctx.grid_blocks[x][y].blocks[z];
+                    ClusterBlockId iblk = place_ctx.grid_blocks.block_at_location({x, y, z, layer_num});
 
-                if (iblk != EMPTY_BLOCK_ID && iblk != INVALID_BLOCK_ID) {
-                    pb = cluster_ctx.clb_nlist.block_pb(iblk);
-                    logical_block = cluster_ctx.clb_nlist.block_type(iblk);
-                } else {
-                    logical_block = pick_logical_type(physical_tile);
-                }
+                    if (iblk != EMPTY_BLOCK_ID && iblk != INVALID_BLOCK_ID) {
+                        pb = cluster_ctx.clb_nlist.block_pb(iblk);
+                        logical_block = cluster_ctx.clb_nlist.block_type(iblk);
+                    } else {
+                        logical_block = pick_logical_type(physical_tile);
+                    }
 
-                /* Calculate power of this CLB */
-                power_usage_pb(&pb_power, pb, logical_block->pb_graph_head, iblk);
-                power_add_usage(power_usage, &pb_power);
+                    /* Calculate power of this CLB */
+                    power_usage_pb(&pb_power, pb, logical_block->pb_graph_head, iblk);
+                    power_add_usage(power_usage, &pb_power);
+                }
             }
         }
     }
diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp
index e2c048fe0f7..3cf5c5c20f2 100644
--- a/vpr/src/route/check_route.cpp
+++ b/vpr/src/route/check_route.cpp
@@ -257,7 +257,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
      * represent specially-crafted connections such as carry-chains or more advanced
      * blocks where adjacency is overridden by the architect */
 
-    int from_xlow, from_ylow, to_xlow, to_ylow, from_ptc, to_ptc, iclass;
+    int from_layer, from_xlow, from_ylow, to_layer, to_xlow, to_ylow, from_ptc, to_ptc, iclass;
     int num_adj, to_xhigh, to_yhigh, from_xhigh, from_yhigh;
     bool reached;
     t_rr_type from_type, to_type;
@@ -284,19 +284,25 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
 
     num_adj = 0;
 
-    from_type = rr_graph.node_type(from_node);
-    from_xlow = rr_graph.node_xlow(from_node);
-    from_ylow = rr_graph.node_ylow(from_node);
-    from_xhigh = rr_graph.node_xhigh(from_node);
-    from_yhigh = rr_graph.node_yhigh(from_node);
-    from_ptc = rr_graph.node_ptc_num(from_node);
-    to_type = rr_graph.node_type(to_node);
-    to_xlow = rr_graph.node_xlow(to_node);
-    to_ylow = rr_graph.node_ylow(to_node);
-    to_xhigh = rr_graph.node_xhigh(to_node);
-    to_yhigh = rr_graph.node_yhigh(to_node);
-    to_ptc = rr_graph.node_ptc_num(to_node);
-
+    auto from_rr = RRNodeId(from_node);
+    auto to_rr = RRNodeId(to_node);
+    from_type = rr_graph.node_type(from_rr);
+    from_layer = rr_graph.node_layer(from_rr);
+    from_xlow = rr_graph.node_xlow(from_rr);
+    from_ylow = rr_graph.node_ylow(from_rr);
+    from_xhigh = rr_graph.node_xhigh(from_rr);
+    from_yhigh = rr_graph.node_yhigh(from_rr);
+    from_ptc = rr_graph.node_ptc_num(from_rr);
+    to_type = rr_graph.node_type(to_rr);
+    to_layer = rr_graph.node_layer(to_rr);
+    to_xlow = rr_graph.node_xlow(to_rr);
+    to_ylow = rr_graph.node_ylow(to_rr);
+    to_xhigh = rr_graph.node_xhigh(to_rr);
+    to_yhigh = rr_graph.node_yhigh(to_rr);
+    to_ptc = rr_graph.node_ptc_num(to_rr);
+
+    // Layer numbers are should not be more than one layer apart for connected nodes
+    VTR_ASSERT(abs(from_layer - to_layer) <= 1);
     switch (from_type) {
         case SOURCE:
             VTR_ASSERT(to_type == OPIN);
@@ -306,8 +312,8 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
                 && from_ylow <= to_ylow
                 && from_xhigh >= to_xhigh
                 && from_yhigh >= to_yhigh) {
-                from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow);
-                to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow);
+                from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer});
+                to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer});
                 VTR_ASSERT(from_grid_type == to_grid_type);
 
                 iclass = get_class_num_from_pin_physical_num(to_grid_type, to_ptc);
@@ -321,7 +327,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
             break;
 
         case OPIN:
-            from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow);
+            from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer});
             if (to_type == CHANX || to_type == CHANY) {
                 num_adj += 1; //adjacent
             } else if (is_flat) {
@@ -335,7 +341,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
             break;
 
         case IPIN:
-            from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow);
+            from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer});
             if (is_flat) {
                 VTR_ASSERT(to_type == OPIN || to_type == IPIN || to_type == SINK);
             } else {
@@ -348,21 +354,21 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) {
                     && from_ylow >= to_ylow
                     && from_xhigh <= to_xhigh
                     && from_yhigh <= to_yhigh) {
-                    from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow);
-                    to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow);
+                    from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer});
+                    to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer});
                     VTR_ASSERT(from_grid_type == to_grid_type);
                     iclass = get_class_num_from_pin_physical_num(from_grid_type, from_ptc);
                     if (iclass == to_ptc)
                         num_adj++;
                 }
             } else {
-                from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow);
-                to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow);
+                from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer});
+                to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer});
                 VTR_ASSERT(from_grid_type == to_grid_type);
-                int from_root_x = from_xlow - device_ctx.grid.get_width_offset(from_xlow, from_ylow);
-                int from_root_y = from_ylow - device_ctx.grid.get_height_offset(from_xlow, from_ylow);
-                int to_root_x = to_xlow - device_ctx.grid.get_width_offset(to_xlow, to_ylow);
-                int to_root_y = to_ylow - device_ctx.grid.get_height_offset(to_xlow, to_ylow);
+                int from_root_x = from_xlow - device_ctx.grid.get_width_offset({from_xlow, from_ylow, from_layer});
+                int from_root_y = from_ylow - device_ctx.grid.get_height_offset({from_xlow, from_ylow, from_layer});
+                int to_root_x = to_xlow - device_ctx.grid.get_width_offset({to_xlow, to_ylow, to_layer});
+                int to_root_y = to_ylow - device_ctx.grid.get_height_offset({to_xlow, to_ylow, to_layer});
 
                 if (from_root_x == to_root_x && from_root_y == to_root_y) {
                     num_adj++;
diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp
index 645e96cb306..fbed3a3b62d 100644
--- a/vpr/src/route/clock_connection_builders.cpp
+++ b/vpr/src/route/clock_connection_builders.cpp
@@ -23,9 +23,10 @@ void RoutingToClockConnection::set_clock_switch_point_name(std::string clock_swi
     switch_point_name = clock_switch_point_name;
 }
 
-void RoutingToClockConnection::set_switch_location(int x, int y) {
+void RoutingToClockConnection::set_switch_location(int x, int y, int layer /* =0 */) {
     switch_location.x = x;
     switch_location.y = y;
+    switch_location.layer = layer;
 }
 
 void RoutingToClockConnection::set_switch(int arch_switch_index) {
@@ -55,15 +56,15 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_
     auto& device_ctx = g_vpr_ctx.device();
     const auto& node_lookup = device_ctx.rr_graph.node_lookup();
 
-    RRNodeId virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.x, switch_location.y);
+    RRNodeId virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.layer, switch_location.x, switch_location.y);
     {
         auto& mut_device_ctx = g_vpr_ctx.mutable_device();
         mut_device_ctx.virtual_clock_network_root_idx = size_t(virtual_clock_network_root_idx);
     }
 
     // rr_node indices for x and y channel routing wires and clock wires to connect to
-    auto x_wire_indices = node_lookup.find_channel_nodes(switch_location.x, switch_location.y, CHANX);
-    auto y_wire_indices = node_lookup.find_channel_nodes(switch_location.x, switch_location.y, CHANY);
+    auto x_wire_indices = node_lookup.find_channel_nodes(switch_location.layer, switch_location.x, switch_location.y, CHANX);
+    auto y_wire_indices = node_lookup.find_channel_nodes(switch_location.layer, switch_location.x, switch_location.y, CHANY);
     auto clock_indices = clock_graph.get_rr_node_indices_at_switch_location(
         clock_to_connect_to, switch_point_name, switch_location.x, switch_location.y);
 
@@ -90,7 +91,7 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_
     }
 }
 
-RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x, int y) {
+RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int layer, int x, int y) {
     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto& rr_graph = device_ctx.rr_graph;
     auto& rr_graph_builder = device_ctx.rr_graph_builder;
@@ -99,8 +100,8 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x,
     rr_graph_builder.emplace_back();
     RRNodeId node_index = RRNodeId(rr_graph.num_nodes() - 1);
 
-    //Determine the a valid PTC
-    std::vector<RRNodeId> nodes_at_loc = node_lookup.find_grid_nodes_at_all_sides(x, y, SINK);
+    //Determine a valid PTC
+    std::vector<RRNodeId> nodes_at_loc = node_lookup.find_grid_nodes_at_all_sides(layer, x, y, SINK);
 
     int max_ptc = 0;
     for (RRNodeId inode : nodes_at_loc) {
@@ -111,6 +112,7 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x,
     rr_graph_builder.set_node_type(node_index, SINK);
     rr_graph_builder.set_node_class_num(node_index, ptc);
     rr_graph_builder.set_node_coordinates(node_index, x, y, x, y);
+    rr_graph_builder.set_node_layer(node_index, layer);
     rr_graph_builder.set_node_capacity(node_index, 1);
     rr_graph_builder.set_node_cost_index(node_index, RRIndexedDataId(SINK_COST_INDEX));
 
@@ -122,7 +124,7 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x,
     // However, since the SINK node has the same xhigh/xlow as well as yhigh/ylow, we can probably use a shortcut
     for (int ix = rr_graph.node_xlow(node_index); ix <= rr_graph.node_xhigh(node_index); ++ix) {
         for (int iy = rr_graph.node_ylow(node_index); iy <= rr_graph.node_yhigh(node_index); ++iy) {
-            node_lookup.add_node(node_index, ix, iy, rr_graph.node_type(node_index), rr_graph.node_class_num(node_index));
+            node_lookup.add_node(node_index, layer, ix, iy, rr_graph.node_type(node_index), rr_graph.node_class_num(node_index));
         }
     }
 
@@ -248,23 +250,24 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra
     auto& device_ctx = g_vpr_ctx.device();
     const auto& node_lookup = device_ctx.rr_graph.node_lookup();
     auto& grid = clock_graph.grid();
+    int layer_num = 0; //Function *FOR NOW* assumes that layer_num is always 0
 
-    for (size_t x = 0; x < grid.width(); x++) {
-        for (size_t y = 0; y < grid.height(); y++) {
+    for (int x = 0; x < (int)grid.width(); x++) {
+        for (int y = 0; y < (int)grid.height(); y++) {
             //Avoid boundary
-            if ((y == 0 && x == 0) || (x == grid.width() - 1 && y == grid.height() - 1)) {
+            if ((y == 0 && x == 0) || (x == (int)grid.width() - 1 && y == (int)grid.height() - 1)) {
                 continue;
             }
 
-            auto type = grid.get_physical_type(x, y);
+            auto type = grid.get_physical_type({x, y, layer_num});
 
             // Skip EMPTY type
             if (is_empty_type(type)) {
                 continue;
             }
 
-            auto width_offset = grid.get_width_offset(x, y);
-            auto height_offset = grid.get_height_offset(x, y);
+            auto width_offset = grid.get_width_offset({x, y, layer_num});
+            auto height_offset = grid.get_height_offset({x, y, layer_num});
 
             // Ignore grid locations that do not have blocks
             bool has_pb_type = false;
@@ -282,7 +285,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra
 
             for (e_side side : SIDES) {
                 //Don't connect pins which are not adjacent to channels around the perimeter
-                if ((x == 0 && side != RIGHT) || (x == grid.width() - 1 && side != LEFT) || (y == 0 && side != TOP) || (y == grid.height() - 1 && side != BOTTOM)) {
+                if ((x == 0 && side != RIGHT) || (x == (int)grid.width() - 1 && side != LEFT) || (y == 0 && side != TOP) || (y == (int)grid.height() - 1 && side != BOTTOM)) {
                     continue;
                 }
 
@@ -298,7 +301,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra
                     if (x == 0) {
                         clock_x_offset = 1;  // chanx clock always starts at 1 offset
                         clock_y_offset = -1; // pick the chanx below the block
-                    } else if (x == grid.width() - 1) {
+                    } else if (x == (int)grid.width() - 1) {
                         clock_x_offset = -1; // chanx clock always ends at 1 offset
                         clock_y_offset = -1; // pick the chanx below the block
                     } else if (y == 0) {
@@ -307,7 +310,8 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra
                         clock_y_offset = -1; // pick the chanx below the block
                     }
 
-                    auto clock_pin_node_idx = node_lookup.find_node(x,
+                    auto clock_pin_node_idx = node_lookup.find_node(layer_num,
+                                                                    x,
                                                                     y,
                                                                     IPIN,
                                                                     clock_pin_idx,
diff --git a/vpr/src/route/clock_connection_builders.h b/vpr/src/route/clock_connection_builders.h
index 0565128471c..7ab1c7c5be7 100644
--- a/vpr/src/route/clock_connection_builders.h
+++ b/vpr/src/route/clock_connection_builders.h
@@ -47,7 +47,7 @@ class RoutingToClockConnection : public ClockConnection {
      */
     void set_clock_name_to_connect_to(std::string clock_name);
     void set_clock_switch_point_name(std::string clock_switch_point_name);
-    void set_switch_location(int x, int y);
+    void set_switch_location(int x, int y, int layer = 0);
     void set_switch(int arch_switch_index);
     void set_fc_val(float fc_val);
 
@@ -57,7 +57,7 @@ class RoutingToClockConnection : public ClockConnection {
     /* Connects the inter-block routing to the clock source at the specified coordinates */
     void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) override;
     size_t estimate_additional_nodes() override;
-    RRNodeId create_virtual_clock_network_sink_node(int x, int y);
+    RRNodeId create_virtual_clock_network_sink_node(int layer, int x, int y);
 };
 
 class ClockToClockConneciton : public ClockConnection {
diff --git a/vpr/src/route/clock_fwd.h b/vpr/src/route/clock_fwd.h
index ef119f07649..abf76b3b7bd 100644
--- a/vpr/src/route/clock_fwd.h
+++ b/vpr/src/route/clock_fwd.h
@@ -4,6 +4,7 @@
 struct Coordinates {
     int x = -1;
     int y = -1;
+    int layer = -1;
 };
 
 #endif
diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp
index 1db5796f47d..3a1606e5831 100644
--- a/vpr/src/route/clock_network_builders.cpp
+++ b/vpr/src/route/clock_network_builders.cpp
@@ -245,6 +245,10 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB
     VTR_ASSERT(repeat.y > 0);
     VTR_ASSERT(repeat.x > 0);
 
+    // TODO: This function is not adapted to the multi-layer grid
+    VTR_ASSERT(g_vpr_ctx.device().grid.get_num_layers() == 1);
+    int layer_num = 0;
+
     for (unsigned y = x_chan_wire.position; y < grid.height() - 1; y += repeat.y) {
         for (unsigned x_start = x_chan_wire.start; x_start < grid.width() - 1; x_start += repeat.x) {
             unsigned drive_x = x_start + drive.offset;
@@ -282,7 +286,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB
             }
 
             // create drive point (length zero wire)
-            auto drive_node_idx = create_chanx_wire(drive_x,
+            auto drive_node_idx = create_chanx_wire(layer_num,
+                                                    drive_x,
                                                     drive_x,
                                                     y,
                                                     ptc_num,
@@ -292,14 +297,16 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB
             clock_graph.add_switch_location(get_name(), drive.name, drive_x, y, drive_node_idx);
 
             // create rib wire to the right and left of the drive point
-            auto left_node_idx = create_chanx_wire(x_start + x_offset,
+            auto left_node_idx = create_chanx_wire(layer_num,
+                                                   x_start + x_offset,
                                                    drive_x - 1,
                                                    y,
                                                    ptc_num,
                                                    Direction::DEC,
                                                    rr_nodes,
                                                    rr_graph_builder);
-            auto right_node_idx = create_chanx_wire(drive_x + 1,
+            auto right_node_idx = create_chanx_wire(layer_num,
+                                                    drive_x + 1,
                                                     x_end,
                                                     y,
                                                     ptc_num,
@@ -320,7 +327,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB
     }
 }
 
-int ClockRib::create_chanx_wire(int x_start,
+int ClockRib::create_chanx_wire(int layer,
+                                int x_start,
                                 int x_end,
                                 int y,
                                 int ptc_num,
@@ -333,6 +341,7 @@ int ClockRib::create_chanx_wire(int x_start,
 
     rr_graph_builder.set_node_type(chanx_node, CHANX);
     rr_graph_builder.set_node_coordinates(chanx_node, x_start, y, x_end, y);
+    rr_graph_builder.set_node_layer(chanx_node, layer);
     rr_graph_builder.set_node_capacity(chanx_node, 1);
     rr_graph_builder.set_node_track_num(chanx_node, ptc_num);
     rr_graph_builder.set_node_rc_index(chanx_node, NodeRCIndex(find_create_rr_rc_data(
@@ -363,7 +372,7 @@ int ClockRib::create_chanx_wire(int x_start,
     for (int ix = rr_graph.node_xlow(chanx_node); ix <= rr_graph.node_xhigh(chanx_node); ++ix) {
         for (int iy = rr_graph.node_ylow(chanx_node); iy <= rr_graph.node_yhigh(chanx_node); ++iy) {
             //TODO: CHANX uses odd swapped x/y indices here. Will rework once rr_node_indices is shadowed
-            rr_graph_builder.node_lookup().add_node(chanx_node, iy, ix, rr_graph.node_type(chanx_node), rr_graph.node_track_num(chanx_node));
+            rr_graph_builder.node_lookup().add_node(chanx_node, layer, iy, ix, rr_graph.node_type(chanx_node), rr_graph.node_track_num(chanx_node));
         }
     }
 
@@ -573,6 +582,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap
     VTR_ASSERT(repeat.y > 0);
     VTR_ASSERT(repeat.x > 0);
 
+    int layer_num = 0; //Function "FOR NOW" assumes that layer_num is always 0
+
     for (unsigned x = y_chan_wire.position; x < grid.width() - 1; x += repeat.x) {
         for (unsigned y_start = y_chan_wire.start; y_start < grid.height() - 1; y_start += repeat.y) {
             unsigned drive_y = y_start + drive.offset;
@@ -610,7 +621,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap
             }
 
             //create drive point (length zero wire)
-            auto drive_node_idx = create_chany_wire(drive_y,
+            auto drive_node_idx = create_chany_wire(layer_num,
+                                                    drive_y,
                                                     drive_y,
                                                     x,
                                                     ptc_num,
@@ -621,7 +633,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap
             clock_graph.add_switch_location(get_name(), drive.name, x, drive_y, drive_node_idx);
 
             // create spine wire above and below the drive point
-            auto left_node_idx = create_chany_wire(y_start + y_offset,
+            auto left_node_idx = create_chany_wire(layer_num,
+                                                   y_start + y_offset,
                                                    drive_y - 1,
                                                    x,
                                                    ptc_num,
@@ -629,7 +642,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap
                                                    rr_nodes,
                                                    rr_graph_builder,
                                                    num_segments_x);
-            auto right_node_idx = create_chany_wire(drive_y + 1,
+            auto right_node_idx = create_chany_wire(layer_num,
+                                                    drive_y + 1,
                                                     y_end,
                                                     x,
                                                     ptc_num,
@@ -654,7 +668,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap
     }
 }
 
-int ClockSpine::create_chany_wire(int y_start,
+int ClockSpine::create_chany_wire(int layer,
+                                  int y_start,
                                   int y_end,
                                   int x,
                                   int ptc_num,
@@ -668,6 +683,7 @@ int ClockSpine::create_chany_wire(int y_start,
 
     rr_graph_builder.set_node_type(chany_node, CHANY);
     rr_graph_builder.set_node_coordinates(chany_node, x, y_start, x, y_end);
+    rr_graph_builder.set_node_layer(chany_node, layer);
     rr_graph_builder.set_node_capacity(chany_node, 1);
     rr_graph_builder.set_node_track_num(chany_node, ptc_num);
     rr_graph_builder.set_node_rc_index(chany_node, NodeRCIndex(find_create_rr_rc_data(
@@ -697,7 +713,7 @@ int ClockSpine::create_chany_wire(int y_start,
     /* TODO: Will replace these codes with an API add_node_to_all_locs() of RRGraphBuilder */
     for (int ix = rr_graph.node_xlow(chany_node); ix <= rr_graph.node_xhigh(chany_node); ++ix) {
         for (int iy = rr_graph.node_ylow(chany_node); iy <= rr_graph.node_yhigh(chany_node); ++iy) {
-            rr_graph_builder.node_lookup().add_node(chany_node, ix, iy, rr_graph.node_type(chany_node), rr_graph.node_ptc_num(chany_node));
+            rr_graph_builder.node_lookup().add_node(chany_node, layer, ix, iy, rr_graph.node_type(chany_node), rr_graph.node_ptc_num(chany_node));
         }
     }
 
diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h
index 60db3eba728..f9983cd85e9 100644
--- a/vpr/src/route/clock_network_builders.h
+++ b/vpr/src/route/clock_network_builders.h
@@ -175,7 +175,8 @@ class ClockRib : public ClockNetwork {
 
     void map_relative_seg_indices(const t_unified_to_parallel_seg_index& index_map) override;
 
-    int create_chanx_wire(int x_start,
+    int create_chanx_wire(int layer,
+                          int x_start,
                           int x_end,
                           int y,
                           int ptc_num,
@@ -242,7 +243,8 @@ class ClockSpine : public ClockNetwork {
                                                              int num_segments_x) override;
     size_t estimate_additional_nodes(const DeviceGrid& grid) override;
     void map_relative_seg_indices(const t_unified_to_parallel_seg_index& index_map) override;
-    int create_chany_wire(int y_start,
+    int create_chany_wire(int layer,
+                          int y_start,
                           int y_end,
                           int x,
                           int ptc_num,
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index d5bd3e7a93d..4d0c0f96f05 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -31,7 +31,9 @@ inline void update_router_stats(const DeviceContext& device_ctx,
 
     auto node_type = rr_graph->node_type(rr_node_id);
     VTR_ASSERT(node_type != NUM_RR_TYPES);
-    t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph->node_xlow(rr_node_id), rr_graph->node_ylow(rr_node_id));
+    t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id),
+                                                                                rr_graph->node_ylow(rr_node_id),
+                                                                                rr_graph->node_layer(rr_node_id)});
 
     if (is_inter_cluster_node(physical_type,
                               node_type,
diff --git a/vpr/src/route/overuse_report.cpp b/vpr/src/route/overuse_report.cpp
index 1fd65115aea..f2e0864ec25 100644
--- a/vpr/src/route/overuse_report.cpp
+++ b/vpr/src/route/overuse_report.cpp
@@ -22,6 +22,7 @@ static void report_congested_nets(const Netlist<>& net_list,
                                   std::ostream& os,
                                   const std::set<ParentNetId>& congested_nets,
                                   bool is_flat,
+                                  int layer_num,
                                   int x,
                                   int y,
                                   bool report_sinks);
@@ -30,6 +31,7 @@ static void log_overused_nodes_header();
 static void log_single_overused_node_status(int overuse_index, RRNodeId inode);
 void print_block_pins_nets(std::ostream& os,
                            t_physical_tile_type_ptr physical_type,
+                           int layer,
                            int root_x,
                            int root_y,
                            int pin_physical_num,
@@ -110,6 +112,7 @@ void report_overused_nodes(const Netlist<>& net_list,
         bool report_sinks = false;
         int x = rr_graph.node_xlow(node_id);
         int y = rr_graph.node_ylow(node_id);
+        int layer_num = rr_graph.node_layer(node_id);
         switch (node_type) {
             case IPIN:
             case OPIN:
@@ -117,8 +120,8 @@ void report_overused_nodes(const Netlist<>& net_list,
                                           node_id,
                                           rr_node_to_net_map);
                 report_sinks = true;
-                x -= g_vpr_ctx.device().grid.get_physical_type(x, y)->width;
-                y -= g_vpr_ctx.device().grid.get_physical_type(x, y)->width;
+                x -= g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num})->width;
+                y -= g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num})->width;
                 break;
             case CHANX:
             case CHANY:
@@ -142,6 +145,7 @@ void report_overused_nodes(const Netlist<>& net_list,
                               os,
                               congested_nets,
                               is_flat,
+                              layer_num,
                               x,
                               y,
                               report_sinks);
@@ -209,47 +213,52 @@ static void report_overused_ipin_opin(std::ostream& os,
 
     auto grid_x = rr_graph.node_xlow(node_id);
     auto grid_y = rr_graph.node_ylow(node_id);
+    auto grid_layer = rr_graph.node_layer(node_id);
+
     VTR_ASSERT_MSG(
         grid_x == rr_graph.node_xhigh(node_id) && grid_y == rr_graph.node_yhigh(node_id),
         "Non-track RR node should not span across multiple grid blocks.");
 
-    t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type(grid_x, grid_y);
+    t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer});
 
     os << "Pin physical number = " << rr_graph.node_pin_num(node_id) << '\n';
     if (is_inter_cluster_node(physical_tile, rr_graph.node_type(node_id), rr_graph.node_ptc_num(node_id))) {
         os << "On Tile Pin"
            << "\n";
     } else {
-        auto pb_type_name = get_pb_graph_node_from_pin_physical_num(device_ctx.grid.get_physical_type(grid_x, grid_y),
+        auto pb_type_name = get_pb_graph_node_from_pin_physical_num(device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}),
                                                                     rr_graph.node_ptc_num(node_id))
                                 ->pb_type->name;
-        auto pb_pin = get_pb_pin_from_pin_physical_num(device_ctx.grid.get_physical_type(grid_x, grid_y), rr_graph.node_ptc_num(node_id));
+        auto pb_pin = get_pb_pin_from_pin_physical_num(device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}),
+                                                       rr_graph.node_ptc_num(node_id));
         os << "Intra-Tile Pin - Port : " << pb_pin->port->name << " - PB Type : " << std::string(pb_type_name) << "\n";
     }
     print_block_pins_nets(os,
-                          device_ctx.grid.get_physical_type(grid_x, grid_y),
-                          grid_x - device_ctx.grid.get_width_offset(grid_x, grid_y),
-                          grid_y - device_ctx.grid.get_height_offset(grid_x, grid_y),
+                          device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}),
+                          grid_layer,
+                          grid_x - device_ctx.grid.get_width_offset({grid_x, grid_y, grid_layer}),
+                          grid_y - device_ctx.grid.get_height_offset({grid_x, grid_y, grid_layer}),
                           rr_graph.node_ptc_num(node_id),
                           rr_node_to_net_map);
     os << "Side = " << rr_graph.node_side_string(node_id) << "\n\n";
 
     //Add block type for IPINs/OPINs in overused rr-node report
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
-    auto& grid_info = place_ctx.grid_blocks[grid_x][grid_y];
+    const auto& grid_info = place_ctx.grid_blocks;
 
     os << "Grid location: X = " << grid_x << ", Y = " << grid_y << '\n';
-    os << "Number of blocks currently occupying this grid location = " << grid_info.usage << '\n';
+    os << "Number of blocks currently occupying this grid location = " << grid_info.get_usage({grid_x, grid_y, grid_layer}) << '\n';
 
     size_t iblock = 0;
-    for (size_t isubtile = 0; isubtile < grid_info.blocks.size(); ++isubtile) {
+    for (int isubtile = 0; isubtile < (int)grid_info.num_blocks_at_location({grid_x, grid_y, grid_layer}); ++isubtile) {
         //Check if there is a valid block at this subtile location
-        if (grid_info.subtile_empty(isubtile)) {
+        if (grid_info.is_sub_tile_empty({grid_x, grid_y, grid_layer}, isubtile)) {
             continue;
         }
 
         //Print out the block index, name and type
-        ClusterBlockId block_id = grid_info.blocks[isubtile];
+        // TODO: Needs to be updated when RR Graph Nodes know their layer_num
+        ClusterBlockId block_id = grid_info.block_at_location({grid_x, grid_y, isubtile, 0});
         os << "Block #" << iblock << ": ";
         os << "Block name = " << clb_nlist.block_pb(block_id)->name << ", ";
         os << "Block type = " << clb_nlist.block_type(block_id)->name << '\n';
@@ -298,6 +307,7 @@ static void report_congested_nets(const Netlist<>& net_list,
                                   std::ostream& os,
                                   const std::set<ParentNetId>& congested_nets,
                                   bool is_flat,
+                                  int layer_num,
                                   int x,
                                   int y,
                                   bool report_sinks) {
@@ -329,11 +339,12 @@ static void report_congested_nets(const Netlist<>& net_list,
                     cluster_block_id = convert_to_cluster_block_id(net_list.pin_block(sink_id));
                 }
                 auto cluster_loc = g_vpr_ctx.placement().block_locs[cluster_block_id];
-                auto physical_type = g_vpr_ctx.device().grid.get_physical_type(x, y);
-                int cluster_x = cluster_loc.loc.x - g_vpr_ctx.device().grid.get_physical_type(cluster_loc.loc.x, cluster_loc.loc.y)->width;
-                int cluster_y = cluster_loc.loc.y - g_vpr_ctx.device().grid.get_physical_type(cluster_loc.loc.x, cluster_loc.loc.y)->height;
+                auto physical_type = g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num});
+                int cluster_layer_num = cluster_loc.loc.layer;
+                int cluster_x = cluster_loc.loc.x - g_vpr_ctx.device().grid.get_physical_type({cluster_loc.loc.x, cluster_loc.loc.y, cluster_layer_num})->width;
+                int cluster_y = cluster_loc.loc.y - g_vpr_ctx.device().grid.get_physical_type({cluster_loc.loc.x, cluster_loc.loc.y, cluster_layer_num})->height;
                 if (cluster_x == x && cluster_y == y) {
-                    VTR_ASSERT(physical_type == g_vpr_ctx.device().grid.get_physical_type(cluster_x, cluster_y));
+                    VTR_ASSERT(physical_type == g_vpr_ctx.device().grid.get_physical_type({cluster_x, cluster_y, cluster_layer_num}));
                     os << "Sink in the same location = "
                        << "\n";
                     if (is_flat) {
@@ -370,7 +381,8 @@ static void log_single_overused_node_status(int overuse_index, RRNodeId node_id)
     const auto& route_ctx = g_vpr_ctx.routing();
     int x = rr_graph.node_xlow(node_id);
     int y = rr_graph.node_ylow(node_id);
-    auto physical_blk = device_ctx.grid.get_physical_type(x, y);
+    int layer_num = rr_graph.node_layer(node_id);
+    auto physical_blk = device_ctx.grid.get_physical_type({x, y, layer_num});
 
     //Determines if direction or side is available for printing
     auto node_type = rr_graph.node_type(node_id);
@@ -429,6 +441,7 @@ static void log_single_overused_node_status(int overuse_index, RRNodeId node_id)
 
 void print_block_pins_nets(std::ostream& os,
                            t_physical_tile_type_ptr physical_type,
+                           int layer,
                            int root_x,
                            int root_y,
                            int pin_physical_num,
@@ -457,7 +470,7 @@ void print_block_pins_nets(std::ostream& os,
 
     for (int pin = pin_num_range.low; pin <= pin_num_range.high; pin++) {
         t_rr_type rr_type = (get_pin_type_from_pin_physical_num(physical_type, pin) == DRIVER) ? t_rr_type::OPIN : t_rr_type::IPIN;
-        RRNodeId node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_type, root_x, root_y, pin);
+        RRNodeId node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_type, layer, root_x, root_y, pin);
         VTR_ASSERT(node_id != RRNodeId::INVALID());
         auto search_result = rr_node_to_net_map.find(node_id);
         if (rr_type == t_rr_type::OPIN) {
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index fd2091fbc9b..466608319fb 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -646,8 +646,8 @@ static vtr::vector<ParentNetId, std::vector<int>> load_net_rr_terminals(const RR
             t_block_loc blk_loc;
             blk_loc = get_block_loc(block_id, is_flat);
             int iclass = get_block_pin_class_num(block_id, pin_id, is_flat);
-
-            RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.x,
+            RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.layer,
+                                                              blk_loc.loc.x,
                                                               blk_loc.loc.y,
                                                               (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */
                                                               iclass);
@@ -754,7 +754,8 @@ static vtr::vector<ParentBlockId, std::vector<int>> load_rr_clb_sources(const RR
                     rr_type = SINK;
                 }
 
-                RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.x,
+                RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.layer,
+                                                                  blk_loc.loc.x,
                                                                   blk_loc.loc.y,
                                                                   rr_type,
                                                                   iclass);
diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp
index d119403cb8e..48074f717cb 100644
--- a/vpr/src/route/route_timing.cpp
+++ b/vpr/src/route/route_timing.cpp
@@ -2310,7 +2310,7 @@ vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_net
                 std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) {
                     sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num));
                 });
-                auto physical_type = device_ctx.grid.get_physical_type(blk_loc.loc.x, blk_loc.loc.y);
+                auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer});
                 // Get the choke points of the sink corresponds to pin_count given the sink group
                 auto sink_choking_spots = get_sink_choking_points(physical_type,
                                                                   rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])),
@@ -2321,6 +2321,7 @@ vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_net
                     int num_reachable_sinks = choking_spot.second;
                     auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(),
                                                              physical_type,
+                                                             blk_loc.loc.layer,
                                                              blk_loc.loc.x,
                                                              blk_loc.loc.y,
                                                              pin_physical_num);
diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp
index 5b95603f191..375b1127177 100644
--- a/vpr/src/route/router_lookahead_extended_map.cpp
+++ b/vpr/src/route/router_lookahead_extended_map.cpp
@@ -73,12 +73,15 @@ std::pair<float, float> ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no
     //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final
     //delay to reach the sink.
 
-    t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(from_node), rr_graph.node_ylow(from_node));
+    t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
+                                                                            rr_graph.node_ylow(from_node),
+                                                                            rr_graph.node_layer(from_node)});
     auto tile_index = tile_type->index;
 
     auto from_ptc = rr_graph.node_ptc_num(from_node);
+    int from_layer_num = rr_graph.node_layer(from_node);
 
-    if (this->src_opin_delays[tile_index][from_ptc].empty()) {
+    if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) {
         //During lookahead profiling we were unable to find any wires which connected
         //to this PTC.
         //
@@ -105,7 +108,7 @@ std::pair<float, float> ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no
         float expected_delay_cost = std::numeric_limits<float>::infinity();
         float expected_cong_cost = std::numeric_limits<float>::infinity();
 
-        for (const auto& kv : this->src_opin_delays[tile_index][from_ptc]) {
+        for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) {
             const util::t_reachable_wire_inf& reachable_wire_inf = kv.second;
 
             util::Cost_Entry cost_entry;
@@ -151,14 +154,17 @@ float ExtendedMapLookahead::get_chan_ipin_delays(RRNodeId to_node) const {
     e_rr_type to_type = rr_graph.node_type(to_node);
     VTR_ASSERT(to_type == SINK || to_type == IPIN);
 
-    auto to_tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(to_node), rr_graph.node_ylow(to_node));
+    auto to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node),
+                                                           rr_graph.node_ylow(to_node),
+                                                           rr_graph.node_layer(to_node)});
     auto to_tile_index = to_tile_type->index;
 
     auto to_ptc = rr_graph.node_ptc_num(to_node);
+    int to_layer_num = rr_graph.node_layer(to_node);
 
     float site_pin_delay = 0.f;
-    if (this->chan_ipins_delays[to_tile_index].size() != 0) {
-        auto reachable_wire_inf = this->chan_ipins_delays[to_tile_index][to_ptc];
+    if (this->chan_ipins_delays[to_layer_num][to_tile_index].size() != 0) {
+        auto reachable_wire_inf = this->chan_ipins_delays[to_layer_num][to_tile_index][to_ptc];
 
         site_pin_delay = reachable_wire_inf.delay;
     }
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index d029900f565..b518970dcc0 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -205,7 +205,11 @@ struct t_dijkstra_data {
 t_wire_cost_map f_wire_cost_map;
 
 /******** File-Scope Functions ********/
-Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y);
+Cost_Entry get_wire_cost_entry(e_rr_type rr_type,
+                               int seg_index,
+                               int layer_num,
+                               int delta_x,
+                               int delta_y);
 static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segment_inf);
 static void compute_tiles_lookahead(std::unordered_map<t_physical_tile_type_ptr, util::t_ipin_primitive_sink_delays>& inter_tile_pin_primitive_pin_delay,
                                     std::unordered_map<t_physical_tile_type_ptr, std::unordered_map<int, util::Cost_Entry>>& tile_min_cost,
@@ -221,9 +225,7 @@ static void store_min_cost_to_sinks(std::unordered_map<t_physical_tile_type_ptr,
                                     t_physical_tile_type_ptr physical_tile,
                                     const std::unordered_map<t_physical_tile_type_ptr, util::t_ipin_primitive_sink_delays>& inter_tile_pin_primitive_pin_delay);
 
-static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 2>& internal_opin_global_cost_map,
-                                size_t max_dx,
-                                size_t max_dy);
+static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opin_global_cost_map);
 
 // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost
 static void read_intra_cluster_router_lookahead(std::unordered_map<t_physical_tile_type_ptr, util::t_ipin_primitive_sink_delays>& inter_tile_pin_primitive_pin_delay,
@@ -236,10 +238,11 @@ static void write_intra_cluster_router_lookahead(const std::string& file,
                                                  const std::unordered_map<t_physical_tile_type_ptr, std::unordered_map<int, util::Cost_Entry>>& tile_min_cost);
 
 /* returns index of a node from which to start routing */
-static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
+static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(RRNodeId start_node,
+                         int sample_layer_num,
                          int start_x,
                          int start_y,
                          t_routing_cost_map& routing_cost_map,
@@ -250,11 +253,11 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry,
                                        vtr::vector<RRNodeId, bool>& node_expanded,
                                        std::priority_queue<PQ_Entry>& pq);
 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
-static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map);
+static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map);
 /* fills in missing lookahead map entries by copying the cost of the closest valid entry */
 static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type);
 /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
-static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index);
+static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index);
 /* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
 static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y);
 static void adjust_rr_position(const RRNodeId rr, int& x, int& y);
@@ -262,7 +265,7 @@ static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y);
 static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y);
 static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y);
 
-static void print_wire_cost_map(const std::vector<t_segment_inf>& segment_inf);
+static void print_wire_cost_map(int layer_num, const std::vector<t_segment_inf>& segment_inf);
 static void print_router_cost_map(const t_routing_cost_map& router_cost_map);
 
 /******** Interface class member function definitions ********/
@@ -270,13 +273,18 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
 
-    t_physical_tile_type_ptr from_physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(current_node), rr_graph.node_ylow(current_node));
+    t_physical_tile_type_ptr from_physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(current_node),
+                                                                                     rr_graph.node_ylow(current_node),
+                                                                                     rr_graph.node_layer(current_node)});
     t_rr_type from_rr_type = rr_graph.node_type(current_node);
     int from_node_ptc_num = rr_graph.node_ptc_num(current_node);
 
-    t_physical_tile_type_ptr to_physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(target_node), rr_graph.node_ylow(target_node));
+    t_physical_tile_type_ptr to_physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(target_node),
+                                                                                   rr_graph.node_ylow(target_node),
+                                                                                   rr_graph.node_layer(target_node)});
     t_rr_type to_rr_type = rr_graph.node_type(target_node);
     int to_node_ptc_num = rr_graph.node_ptc_num(target_node);
+    int to_layer_num = rr_graph.node_layer(target_node);
     VTR_ASSERT(to_rr_type == t_rr_type::SINK);
 
     float delay_cost = 0.;
@@ -285,6 +293,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod
     float cong_offset_cost = 0.;
 
     if (is_flat_) {
+        // We have not checked the multi-layer FPGA for flat routing
+        VTR_ASSERT(rr_graph.node_layer(current_node) == rr_graph.node_layer(target_node));
         if (from_rr_type == CHANX || from_rr_type == CHANY) {
             std::tie(delay_cost, cong_cost) = get_expected_delay_and_cong(current_node, target_node, params, R_upstream);
 
@@ -329,8 +339,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod
                     get_xy_deltas(current_node, target_node, &delta_x, &delta_y);
                     delta_x = abs(delta_x);
                     delta_y = abs(delta_y);
-                    delay_cost = params.criticality * distance_based_min_cost[delta_x][delta_y].delay;
-                    cong_cost = (1. - params.criticality) * distance_based_min_cost[delta_x][delta_y].congestion;
+                    delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay;
+                    cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion;
 
                     delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay;
                     cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion;
@@ -361,8 +371,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod
                 get_xy_deltas(current_node, target_node, &delta_x, &delta_y);
                 delta_x = abs(delta_x);
                 delta_y = abs(delta_y);
-                delay_cost = params.criticality * distance_based_min_cost[delta_x][delta_y].delay;
-                cong_cost = (1. - params.criticality) * distance_based_min_cost[delta_x][delta_y].congestion;
+                delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay;
+                cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion;
 
                 delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay;
                 cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion;
@@ -393,6 +403,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
     auto& rr_graph = device_ctx.rr_graph;
 
     int delta_x, delta_y;
+    int from_layer_num = rr_graph.node_layer(from_node);
     get_xy_deltas(from_node, to_node, &delta_x, &delta_y);
     delta_x = abs(delta_x);
     delta_y = abs(delta_y);
@@ -407,12 +418,15 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final
         //delay to reach the sink.
 
-        t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(from_node), rr_graph.node_ylow(from_node));
+        t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node),
+                                                                                rr_graph.node_ylow(from_node),
+                                                                                from_layer_num});
+
         auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type);
 
         auto from_ptc = rr_graph.node_ptc_num(from_node);
 
-        if (this->src_opin_delays[tile_index][from_ptc].empty()) {
+        if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) {
             //During lookahead profiling we were unable to find any wires which connected
             //to this PTC.
             //
@@ -436,7 +450,7 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
             //From the current SOURCE/OPIN we look-up the wiretypes which are reachable
             //and then add the estimates from those wire types for the distance of interest.
             //If there are multiple options we use the minimum value.
-            for (const auto& kv : this->src_opin_delays[tile_index][from_ptc]) {
+            for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) {
                 const util::t_reachable_wire_inf& reachable_wire_inf = kv.second;
 
                 Cost_Entry wire_cost_entry;
@@ -449,7 +463,11 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
                 } else {
                     //For an actual accessible wire, we query the wire look-up to get it's
                     //delay and congestion cost estimates
-                    wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, reachable_wire_inf.wire_seg_index, delta_x, delta_y);
+                    wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type,
+                                                          reachable_wire_inf.wire_seg_index,
+                                                          from_layer_num,
+                                                          delta_x,
+                                                          delta_y);
                 }
 
                 float this_delay_cost = (params.criticality) * (reachable_wire_inf.delay + wire_cost_entry.delay);
@@ -481,7 +499,11 @@ std::pair<float, float> MapLookahead::get_expected_delay_and_cong(RRNodeId from_
         VTR_ASSERT(from_seg_index >= 0);
 
         /* now get the expected cost from our lookahead map */
-        Cost_Entry cost_entry = get_wire_cost_entry(from_type, from_seg_index, delta_x, delta_y);
+        Cost_Entry cost_entry = get_wire_cost_entry(from_type,
+                                                    from_seg_index,
+                                                    from_layer_num,
+                                                    delta_x,
+                                                    delta_y);
 
         float expected_delay = cost_entry.delay;
         float expected_cong = cost_entry.congestion;
@@ -532,9 +554,7 @@ void MapLookahead::compute_intra_tile() {
                             det_routing_arch_,
                             g_vpr_ctx.device());
 
-    min_global_cost_map(distance_based_min_cost,
-                        f_wire_cost_map.dim_size(2),
-                        f_wire_cost_map.dim_size(3));
+    min_global_cost_map(distance_based_min_cost);
 }
 
 void MapLookahead::read(const std::string& file) {
@@ -554,9 +574,7 @@ void MapLookahead::read_intra_cluster(const std::string& file) {
                                         file);
 
     // The information about distance_based_min_cost is not stored in the file, thus it needs to be computed
-    min_global_cost_map(distance_based_min_cost,
-                        f_wire_cost_map.dim_size(2),
-                        f_wire_cost_map.dim_size(3));
+    min_global_cost_map(distance_based_min_cost);
 }
 
 void MapLookahead::write(const std::string& file) const {
@@ -571,7 +589,7 @@ void MapLookahead::write_intra_cluster(const std::string& file) const {
 
 /******** Function Definitions ********/
 
-Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y) {
+Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int layer_num, int delta_x, int delta_y) {
     VTR_ASSERT_SAFE(rr_type == CHANX || rr_type == CHANY);
 
     int chan_index = 0;
@@ -579,10 +597,11 @@ Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, in
         chan_index = 1;
     }
 
-    VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(2));
-    VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(3));
+    VTR_ASSERT_SAFE(layer_num < (int)f_wire_cost_map.dim_size(0));
+    VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(3));
+    VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(4));
 
-    return f_wire_cost_map[chan_index][seg_index][delta_x][delta_y];
+    return f_wire_cost_map[layer_num][chan_index][seg_index][delta_x][delta_y];
 }
 
 static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segment_inf) {
@@ -593,7 +612,11 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
     auto& grid = device_ctx.grid;
 
     //Re-allocate
-    f_wire_cost_map = t_wire_cost_map({2, segment_inf.size(), device_ctx.grid.width(), device_ctx.grid.height()});
+    f_wire_cost_map = t_wire_cost_map({static_cast<unsigned long>(grid.get_num_layers()),
+                                       2,
+                                       segment_inf.size(),
+                                       device_ctx.grid.width(),
+                                       device_ctx.grid.height()});
 
     int longest_length = 0;
     for (const auto& seg_inf : segment_inf) {
@@ -620,117 +643,122 @@ static void compute_router_wire_lookahead(const std::vector<t_segment_inf>& segm
     int target_y = device_ctx.grid.height() - 2;
 
     //Profile each wire segment type
-    for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) {
-        //First try to pick good representative sample locations for each type
-        std::map<t_rr_type, std::vector<RRNodeId>> sample_nodes;
-        std::vector<e_rr_type> chan_types;
-        if (segment_inf[iseg].parallel_axis == X_AXIS)
-            chan_types.push_back(CHANX);
-        else if (segment_inf[iseg].parallel_axis == Y_AXIS)
-            chan_types.push_back(CHANY);
-        else //Both for BOTH_AXIS segments and special segments such as clock_networks we want to search in both directions.
-            chan_types.insert(chan_types.end(), {CHANX, CHANY});
-
-        for (e_rr_type chan_type : chan_types) {
-            for (int ref_inc : ref_increments) {
-                int sample_x = ref_x + ref_inc;
-                int sample_y = ref_y + ref_inc;
-
-                if (sample_x >= int(grid.width())) continue;
-                if (sample_y >= int(grid.height())) continue;
-
-                for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) {
-                    /* get the rr node index from which to start routing */
-                    RRNodeId start_node = get_start_node(sample_x, sample_y,
-                                                         target_x, target_y, //non-corner upper right
-                                                         chan_type, iseg, track_offset);
-
-                    if (!start_node) {
-                        continue;
+    for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) {
+        for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) {
+            //First try to pick good representative sample locations for each type
+            std::map<t_rr_type, std::vector<RRNodeId>> sample_nodes;
+            std::vector<e_rr_type> chan_types;
+            if (segment_inf[iseg].parallel_axis == X_AXIS)
+                chan_types.push_back(CHANX);
+            else if (segment_inf[iseg].parallel_axis == Y_AXIS)
+                chan_types.push_back(CHANY);
+            else //Both for BOTH_AXIS segments and special segments such as clock_networks we want to search in both directions.
+                chan_types.insert(chan_types.end(), {CHANX, CHANY});
+
+            for (e_rr_type chan_type : chan_types) {
+                for (int ref_inc : ref_increments) {
+                    int sample_x = ref_x + ref_inc;
+                    int sample_y = ref_y + ref_inc;
+
+                    if (sample_x >= int(grid.width())) continue;
+                    if (sample_y >= int(grid.height())) continue;
+
+                    for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) {
+                        /* get the rr node index from which to start routing */
+                        RRNodeId start_node = get_start_node(layer_num, sample_x, sample_y,
+                                                             target_x, target_y, //non-corner upper right
+                                                             chan_type, iseg, track_offset);
+
+                        if (!start_node) {
+                            continue;
+                        }
+                        // TODO: Temporary - After testing benchmarks this can be deleted
+                        VTR_ASSERT(rr_graph.node_layer(start_node) == layer_num);
+
+                        sample_nodes[chan_type].push_back(RRNodeId(start_node));
                     }
-
-                    sample_nodes[chan_type].push_back(RRNodeId(start_node));
                 }
             }
-        }
 
-        //If we failed to find any representative sample locations, search exhaustively
-        //
-        //This is to ensure we sample 'unusual' wire types which may not exist in all channels
-        //(e.g. clock routing)
-        for (e_rr_type chan_type : chan_types) {
-            if (!sample_nodes[chan_type].empty()) continue;
+            //If we failed to find any representative sample locations, search exhaustively
+            //
+            //This is to ensure we sample 'unusual' wire types which may not exist in all channels
+            //(e.g. clock routing)
+            for (e_rr_type chan_type : chan_types) {
+                if (!sample_nodes[chan_type].empty()) continue;
 
-            //Try an exhaustive search to find a suitable sample point
-            for (RRNodeId rr_node : rr_graph.nodes()) {
-                auto rr_type = rr_graph.node_type(rr_node);
-                if (rr_type != chan_type) continue;
+                //Try an exhaustive search to find a suitable sample point
+                for (RRNodeId rr_node : rr_graph.nodes()) {
+                    auto rr_type = rr_graph.node_type(rr_node);
+                    if (rr_type != chan_type) continue;
+                    if (rr_graph.node_layer(rr_node) != layer_num) continue;
 
-                auto cost_index = rr_graph.node_cost_index(rr_node);
-                VTR_ASSERT(cost_index != RRIndexedDataId(OPEN));
+                    auto cost_index = rr_graph.node_cost_index(rr_node);
+                    VTR_ASSERT(cost_index != RRIndexedDataId(OPEN));
 
-                int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index;
+                    int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index;
 
-                if (seg_index == iseg) {
-                    sample_nodes[chan_type].push_back(rr_node);
-                }
+                    if (seg_index == iseg) {
+                        sample_nodes[chan_type].push_back(rr_node);
+                    }
 
-                if (sample_nodes[chan_type].size() >= ref_increments.size()) {
-                    break;
+                    if (sample_nodes[chan_type].size() >= ref_increments.size()) {
+                        break;
+                    }
                 }
             }
-        }
-
-        //Finally, now that we have a list of sample locations, run a Djikstra flood from
-        //each sample location to profile the routing network from this type
-
-        t_dijkstra_data dijkstra_data;
-        t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()});
 
-        for (e_rr_type chan_type : chan_types) {
-            if (sample_nodes[chan_type].empty()) {
-                VTR_LOG_WARN("Unable to find any sample location for segment %s type '%s' (length %d)\n",
-                             rr_node_typename[chan_type],
-                             segment_inf[iseg].name.c_str(),
-                             segment_inf[iseg].length);
-            } else {
-                //reset cost for this segment
-                routing_cost_map.fill(Expansion_Cost_Entry());
+            //Finally, now that we have a list of sample locations, run a Djikstra flood from
+            //each sample location to profile the routing network from this type
 
-                for (RRNodeId sample_node : sample_nodes[chan_type]) {
-                    int sample_x = rr_graph.node_xlow(sample_node);
-                    int sample_y = rr_graph.node_ylow(sample_node);
+            t_dijkstra_data dijkstra_data;
+            t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()});
 
-                    if (rr_graph.node_direction(sample_node) == Direction::DEC) {
-                        sample_x = rr_graph.node_xhigh(sample_node);
-                        sample_y = rr_graph.node_yhigh(sample_node);
+            for (e_rr_type chan_type : chan_types) {
+                if (sample_nodes[chan_type].empty()) {
+                    VTR_LOG_WARN("Unable to find any sample location for segment %s type '%s' (length %d)\n",
+                                 rr_node_typename[chan_type],
+                                 segment_inf[iseg].name.c_str(),
+                                 segment_inf[iseg].length);
+                } else {
+                    //reset cost for this segment
+                    routing_cost_map.fill(Expansion_Cost_Entry());
+
+                    for (RRNodeId sample_node : sample_nodes[chan_type]) {
+                        int sample_x = rr_graph.node_xlow(sample_node);
+                        int sample_y = rr_graph.node_ylow(sample_node);
+
+                        if (rr_graph.node_direction(sample_node) == Direction::DEC) {
+                            sample_x = rr_graph.node_xhigh(sample_node);
+                            sample_y = rr_graph.node_yhigh(sample_node);
+                        }
+
+                        run_dijkstra(sample_node,
+                                     layer_num,
+                                     sample_x,
+                                     sample_y,
+                                     routing_cost_map,
+                                     &dijkstra_data);
                     }
 
-                    run_dijkstra(sample_node,
-                                 sample_x,
-                                 sample_y,
-                                 routing_cost_map,
-                                 &dijkstra_data);
-                }
-
-                if (false) print_router_cost_map(routing_cost_map);
+                    if (false) print_router_cost_map(routing_cost_map);
 
-                /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead
-                 * cost map */
-                set_lookahead_map_costs(iseg, chan_type, routing_cost_map);
+                    /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead
+                     * cost map */
+                    set_lookahead_map_costs(layer_num, iseg, chan_type, routing_cost_map);
 
-                /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on
-                 * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */
-                fill_in_missing_lookahead_entries(iseg, chan_type);
+                    /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on
+                     * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */
+                    fill_in_missing_lookahead_entries(iseg, chan_type);
+                }
             }
         }
+        if (false) print_wire_cost_map(layer_num, segment_inf);
     }
-
-    if (false) print_wire_cost_map(segment_inf);
 }
 
 /* returns index of a node from which to start routing */
-static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) {
+static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
     const auto& node_lookup = rr_graph.node_lookup();
@@ -751,7 +779,7 @@ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int targe
     int start_lookup_y = start_y;
 
     /* find first node in channel that has specified segment index and goes in the desired direction */
-    for (const RRNodeId& node_id : node_lookup.find_channel_nodes(start_lookup_x, start_lookup_y, rr_type)) {
+    for (const RRNodeId& node_id : node_lookup.find_channel_nodes(layer, start_lookup_x, start_lookup_y, rr_type)) {
         VTR_ASSERT(rr_graph.node_type(node_id) == rr_type);
 
         Direction node_direction = rr_graph.node_direction(node_id);
@@ -774,6 +802,7 @@ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int targe
 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(RRNodeId start_node,
+                         int sample_layer_num,
                          int start_x,
                          int start_y,
                          t_routing_cost_map& routing_cost_map,
@@ -814,6 +843,10 @@ static void run_dijkstra(RRNodeId start_node,
             continue;
         }
 
+        if (rr_graph.node_layer(curr_node) != sample_layer_num) {
+            continue;
+        }
+
         //VTR_LOG("Expanding with delay=%10.3g cong=%10.3g (%s)\n", current.delay, current.congestion_upstream, describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, curr_node).c_str());
 
         /* if this node is an ipin record its congestion/delay in the routing_cost_map */
@@ -849,7 +882,9 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry,
     for (t_edge_size edge : rr_graph.edges(parent)) {
         RRNodeId child_node = rr_graph.edge_sink_node(parent, edge);
         // For the time being, we decide to not let the lookahead explore the node inside the clusters
-        t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(child_node), rr_graph.node_ylow(child_node));
+        t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(child_node),
+                                                                                    rr_graph.node_ylow(child_node),
+                                                                                    rr_graph.node_layer(child_node)});
 
         if (!is_inter_cluster_node(physical_type,
                                    rr_graph.node_type(child_node),
@@ -882,7 +917,7 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry,
 }
 
 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
-static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) {
+static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) {
     int chan_index = 0;
     if (chan_type == CHANY) {
         chan_index = 1;
@@ -893,7 +928,7 @@ static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_ro
         for (unsigned iy = 0; iy < routing_cost_map.dim_size(1); iy++) {
             Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[ix][iy];
 
-            f_wire_cost_map[chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD);
+            f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD);
         }
     }
 }
@@ -908,20 +943,22 @@ static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_
     auto& device_ctx = g_vpr_ctx.device();
 
     /* find missing cost entries and fill them in by copying a nearby cost entry */
-    for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) {
-        for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) {
-            Cost_Entry cost_entry = f_wire_cost_map[chan_index][segment_index][ix][iy];
-
-            if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) {
-                Cost_Entry copied_entry = get_nearby_cost_entry(ix, iy, segment_index, chan_index);
-                f_wire_cost_map[chan_index][segment_index][ix][iy] = copied_entry;
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); ++layer_num) {
+        for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) {
+            for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) {
+                Cost_Entry cost_entry = f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy];
+
+                if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) {
+                    Cost_Entry copied_entry = get_nearby_cost_entry(layer_num, ix, iy, segment_index, chan_index);
+                    f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = copied_entry;
+                }
             }
         }
     }
 }
 
 /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
-static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index) {
+static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index) {
     /* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates
      * of the cost entry to be copied */
 
@@ -948,14 +985,14 @@ static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int cha
     copy_y = std::max(copy_y, 0); //Clip to zero
     copy_x = std::max(copy_x, 0); //Clip to zero
 
-    Cost_Entry copy_entry = f_wire_cost_map[chan_index][segment_index][copy_x][copy_y];
+    Cost_Entry copy_entry = f_wire_cost_map[layer_num][chan_index][segment_index][copy_x][copy_y];
 
     /* if the entry to be copied is also empty, recurse */
     if (std::isnan(copy_entry.delay) && std::isnan(copy_entry.congestion)) {
         if (copy_x == 0 && copy_y == 0) {
             copy_entry = Cost_Entry(0., 0.); //(0, 0) entry is invalid so set zero to terminate recursion
         } else {
-            copy_entry = get_nearby_cost_entry(copy_x, copy_y, segment_index, chan_index);
+            copy_entry = get_nearby_cost_entry(layer_num, copy_x, copy_y, segment_index, chan_index);
         }
     }
 
@@ -1274,11 +1311,11 @@ static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) {
     y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.);
 }
 
-static void print_wire_cost_map(const std::vector<t_segment_inf>& segment_inf) {
+static void print_wire_cost_map(int layer_num, const std::vector<t_segment_inf>& segment_inf) {
     auto& device_ctx = g_vpr_ctx.device();
 
-    for (size_t chan_index = 0; chan_index < f_wire_cost_map.dim_size(0); chan_index++) {
-        for (size_t iseg = 0; iseg < f_wire_cost_map.dim_size(1); iseg++) {
+    for (size_t chan_index = 0; chan_index < f_wire_cost_map.dim_size(1); chan_index++) {
+        for (size_t iseg = 0; iseg < f_wire_cost_map.dim_size(2); iseg++) {
             vtr::printf("Seg %d (%s, length %d) %d\n",
                         iseg,
                         segment_inf[iseg].name.c_str(),
@@ -1286,7 +1323,7 @@ static void print_wire_cost_map(const std::vector<t_segment_inf>& segment_inf) {
                         chan_index);
             for (size_t iy = 0; iy < device_ctx.grid.height(); iy++) {
                 for (size_t ix = 0; ix < device_ctx.grid.width(); ix++) {
-                    vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[chan_index][iseg][ix][iy].delay);
+                    vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[layer_num][chan_index][iseg][ix][iy].delay);
                 }
                 vtr::printf("\n");
             }
@@ -1335,11 +1372,13 @@ static void compute_tile_lookahead(std::unordered_map<t_physical_tile_type_ptr,
                                    const t_det_routing_arch& det_routing_arch,
                                    const int delayless_switch) {
     RRGraphBuilder rr_graph_builder;
+    int layer = 0;
     int x = 1;
     int y = 1;
     build_tile_rr_graph(rr_graph_builder,
                         det_routing_arch,
                         physical_tile,
+                        layer,
                         x,
                         y,
                         delayless_switch);
@@ -1355,6 +1394,7 @@ static void compute_tile_lookahead(std::unordered_map<t_physical_tile_type_ptr,
 
     util::t_ipin_primitive_sink_delays pin_delays = util::compute_intra_tile_dijkstra(rr_graph,
                                                                                       physical_tile,
+                                                                                      layer,
                                                                                       x,
                                                                                       y);
 
@@ -1394,24 +1434,30 @@ static void store_min_cost_to_sinks(std::unordered_map<t_physical_tile_type_ptr,
     VTR_ASSERT(insert_res.second);
 }
 
-static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 2>& internal_opin_global_cost_map,
-                                size_t max_dx,
-                                size_t max_dy) {
-    internal_opin_global_cost_map.resize({max_dx, max_dy});
-    for (int dx = 0; dx < (int)max_dx; dx++) {
-        for (int dy = 0; dy < (int)max_dy; dy++) {
-            util::Cost_Entry min_cost(std::numeric_limits<float>::max(), std::numeric_limits<float>::max());
-            for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(0); chan_idx++) {
-                for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(1); seg_idx++) {
-                    auto cost = util::Cost_Entry(f_wire_cost_map[chan_idx][seg_idx][dx][dy].delay,
-                                                 f_wire_cost_map[chan_idx][seg_idx][dx][dy].congestion);
-                    if (cost.delay < min_cost.delay) {
-                        min_cost.delay = cost.delay;
-                        min_cost.congestion = cost.congestion;
+static void min_global_cost_map(vtr::NdMatrix<util::Cost_Entry, 3>& internal_opin_global_cost_map) {
+    int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    int width = (int)g_vpr_ctx.device().grid.width();
+    int height = (int)g_vpr_ctx.device().grid.height();
+    internal_opin_global_cost_map.resize({static_cast<unsigned long>(num_layers),
+                                          static_cast<unsigned long>(width),
+                                          static_cast<unsigned long>(height)});
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        for (int dx = 0; dx < width; dx++) {
+            for (int dy = 0; dy < height; dy++) {
+                util::Cost_Entry min_cost(std::numeric_limits<float>::max(), std::numeric_limits<float>::max());
+                for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(1); chan_idx++) {
+                    for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(2); seg_idx++) {
+                        auto cost = util::Cost_Entry(f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].delay,
+                                                     f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].congestion);
+                        if (cost.delay < min_cost.delay) {
+                            min_cost.delay = cost.delay;
+                            min_cost.congestion = cost.congestion;
+                        }
                     }
                 }
+                internal_opin_global_cost_map[layer_num][dx][dy] = min_cost;
             }
-            internal_opin_global_cost_map[dx][dy] = min_cost;
         }
     }
 }
@@ -1501,7 +1547,7 @@ void read_router_lookahead(const std::string& file) {
 
     auto map = reader.getRoot<VprMapLookahead>();
 
-    ToNdMatrix<4, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry);
+    ToNdMatrix<5, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry);
 }
 
 void write_router_lookahead(const std::string& file) {
@@ -1510,7 +1556,7 @@ void write_router_lookahead(const std::string& file) {
     auto map = builder.initRoot<VprMapLookahead>();
 
     auto cost_map = map.initCostMap();
-    FromNdMatrix<4, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry);
+    FromNdMatrix<5, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry);
 
     writeMessageToFile(file, &builder);
 }
diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h
index 6ccd88aa621..00dc5bf62ad 100644
--- a/vpr/src/route/router_lookahead_map.h
+++ b/vpr/src/route/router_lookahead_map.h
@@ -20,7 +20,7 @@ class MapLookahead : public RouterLookahead {
     // Lookup table to store the minimum cost to reach to a primitive pin from the root-level IPINs
     std::unordered_map<t_physical_tile_type_ptr, std::unordered_map<int, util::Cost_Entry>> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost
     // Lookup table to store the minimum cost for each dx and dy
-    vtr::NdMatrix<util::Cost_Entry, 2> distance_based_min_cost; // [dx][dy] -> cost
+    vtr::NdMatrix<util::Cost_Entry, 3> distance_based_min_cost; // [layer_num][dx][dy] -> cost
     const t_det_routing_arch& det_routing_arch_;
     bool is_flat_;
 
@@ -56,7 +56,7 @@ class Cost_Entry {
 
 /* provides delay/congestion estimates to travel specified distances
  * in the x/y direction */
-typedef vtr::NdMatrix<Cost_Entry, 4> t_wire_cost_map; //[0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
+typedef vtr::NdMatrix<Cost_Entry, 5> t_wire_cost_map; //[0..num_layers][0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
                                                       //[0..1] entry distinguish between CHANX/CHANY start nodes respectively
 
 void read_router_lookahead(const std::string& file);
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 139c1bdfb22..5ec27a15cc8 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -22,7 +22,7 @@
 static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays);
 static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays);
 
-static vtr::Point<int> pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point<int> start);
+static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev);
 
 static void run_intra_tile_dijkstra(const RRGraphView& rr_graph,
                                     util::t_ipin_primitive_sink_delays& pin_delays,
@@ -312,66 +312,73 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) {
 
     t_src_opin_delays src_opin_delays;
 
-    src_opin_delays.resize(device_ctx.physical_tile_types.size());
+    src_opin_delays.resize(device_ctx.grid.get_num_layers());
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size());
+    }
 
     //We assume that the routing connectivity of each instance of a physical tile is the same,
     //and so only measure one instance of each type
-    for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) {
-        for (e_rr_type rr_type : {SOURCE, OPIN}) {
-            vtr::Point<int> sample_loc(-1, -1);
-
-            size_t num_sampled_locs = 0;
-            bool ptcs_with_no_delays = true;
-            while (ptcs_with_no_delays) { //Haven't found wire connected to ptc
-                ptcs_with_no_delays = false;
-
-                sample_loc = pick_sample_tile(&device_ctx.physical_tile_types[itile], sample_loc);
-
-                if (sample_loc.x() == -1 && sample_loc.y() == -1) {
-                    //No untried instances of the current tile type left
-                    VTR_LOG_WARN("Found no %ssample locations for %s in %s\n",
-                                 (num_sampled_locs == 0) ? "" : "more ",
-                                 rr_node_typename[rr_type],
-                                 device_ctx.physical_tile_types[itile].name);
-                    break;
-                }
-
-                //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y());
-
-                const std::vector<RRNodeId>& rr_nodes_at_loc = device_ctx.rr_graph.node_lookup().find_grid_nodes_at_all_sides(sample_loc.x(), sample_loc.y(), rr_type);
-                for (RRNodeId node_id : rr_nodes_at_loc) {
-                    int ptc = rr_graph.node_ptc_num(node_id);
-                    // For the time being, we decide to not let the lookahead explore the node inside the clusters
-                    if (!is_inter_cluster_node(&device_ctx.physical_tile_types[itile],
-                                               rr_type,
-                                               ptc)) {
-                        continue;
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) {
+            if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], layer_num) == 0) {
+                continue;
+            }
+            for (e_rr_type rr_type : {SOURCE, OPIN}) {
+                t_physical_tile_loc sample_loc(OPEN, OPEN, OPEN);
+
+                size_t num_sampled_locs = 0;
+                bool ptcs_with_no_delays = true;
+                while (ptcs_with_no_delays) { //Haven't found wire connected to ptc
+                    ptcs_with_no_delays = false;
+
+                    sample_loc = pick_sample_tile(layer_num, &device_ctx.physical_tile_types[itile], sample_loc);
+
+                    if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) {
+                        //No untried instances of the current tile type left
+                        VTR_LOG_WARN("Found no %ssample locations for %s in %s\n",
+                                     (num_sampled_locs == 0) ? "" : "more ",
+                                     rr_node_typename[rr_type],
+                                     device_ctx.physical_tile_types[itile].name);
+                        break;
                     }
 
-                    if (ptc >= int(src_opin_delays[itile].size())) {
-                        src_opin_delays[itile].resize(ptc + 1); //Inefficient but functional...
+                    //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y());
+                    const std::vector<RRNodeId>& rr_nodes_at_loc = device_ctx.rr_graph.node_lookup().find_grid_nodes_at_all_sides(sample_loc.layer_num, sample_loc.x, sample_loc.y, rr_type);
+                    for (RRNodeId node_id : rr_nodes_at_loc) {
+                        int ptc = rr_graph.node_ptc_num(node_id);
+                        // For the time being, we decide to not let the lookahead explore the node inside the clusters
+                        if (!is_inter_cluster_node(&device_ctx.physical_tile_types[itile],
+                                                   rr_type,
+                                                   ptc)) {
+                            continue;
+                        }
+
+                        if (ptc >= int(src_opin_delays[layer_num][itile].size())) {
+                            src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional...
+                        }
+
+                        //Find the wire types which are reachable from inode and record them and
+                        //the cost to reach them
+                        dijkstra_flood_to_wires(itile, node_id, src_opin_delays);
+
+                        if (src_opin_delays[layer_num][itile][ptc].empty()) {
+                            VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n",
+                                           rr_node_typename[rr_type],
+                                           rr_node_arch_name(size_t(node_id), is_flat).c_str(),
+                                           sample_loc.x,
+                                           sample_loc.y,
+                                           is_flat);
+
+                            ptcs_with_no_delays = true;
+                        }
                     }
 
-                    //Find the wire types which are reachable from inode and record them and
-                    //the cost to reach them
-                    dijkstra_flood_to_wires(itile, node_id, src_opin_delays);
-
-                    if (src_opin_delays[itile][ptc].empty()) {
-                        VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n",
-                                       rr_node_typename[rr_type],
-                                       rr_node_arch_name(size_t(node_id), is_flat).c_str(),
-                                       sample_loc.x(),
-                                       sample_loc.y(),
-                                       is_flat);
-
-                        ptcs_with_no_delays = true;
-                    }
+                    ++num_sampled_locs;
+                }
+                if (ptcs_with_no_delays) {
+                    VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n");
                 }
-
-                ++num_sampled_locs;
-            }
-            if (ptcs_with_no_delays) {
-                VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n");
             }
         }
     }
@@ -386,34 +393,42 @@ t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
 
     t_chan_ipins_delays chan_ipins_delays;
 
-    chan_ipins_delays.resize(device_ctx.physical_tile_types.size());
+    chan_ipins_delays.resize(device_ctx.grid.get_num_layers());
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        chan_ipins_delays[layer_num].resize(device_ctx.physical_tile_types.size());
+    }
 
     //We assume that the routing connectivity of each instance of a physical tile is the same,
     //and so only measure one instance of each type
-    for (auto tile_type : device_ctx.physical_tile_types) {
-        vtr::Point<int> sample_loc(-1, -1);
+    for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) {
+        for (auto tile_type : device_ctx.physical_tile_types) {
+            if (device_ctx.grid.num_instances(&tile_type, layer_num) == 0) {
+                continue;
+            }
+            t_physical_tile_loc sample_loc(OPEN, OPEN, OPEN);
 
-        sample_loc = pick_sample_tile(&tile_type, sample_loc);
+            sample_loc = pick_sample_tile(layer_num, &tile_type, sample_loc);
 
-        if (sample_loc.x() == -1 && sample_loc.y() == -1) {
-            //No untried instances of the current tile type left
-            VTR_LOG_WARN("Found no sample locations for %s\n",
-                         tile_type.name);
-            continue;
-        }
+            if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) {
+                //No untried instances of the current tile type left
+                VTR_LOG_WARN("Found no sample locations for %s\n",
+                             tile_type.name);
+                continue;
+            }
 
-        int min_x = std::max(0, sample_loc.x() - X_OFFSET);
-        int min_y = std::max(0, sample_loc.y() - Y_OFFSET);
-        int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x() + X_OFFSET);
-        int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y() + Y_OFFSET);
-
-        for (int ix = min_x; ix < max_x; ix++) {
-            for (int iy = min_y; iy < max_y; iy++) {
-                for (auto rr_type : {CHANX, CHANY}) {
-                    for (const RRNodeId& node_id : node_lookup.find_channel_nodes(ix, iy, rr_type)) {
-                        //Find the IPINs which are reachable from the wires within the bounding box
-                        //around the selected tile location
-                        dijkstra_flood_to_ipins(node_id, chan_ipins_delays);
+            int min_x = std::max(0, sample_loc.x - X_OFFSET);
+            int min_y = std::max(0, sample_loc.y - Y_OFFSET);
+            int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x + X_OFFSET);
+            int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y + Y_OFFSET);
+
+            for (int ix = min_x; ix < max_x; ix++) {
+                for (int iy = min_y; iy < max_y; iy++) {
+                    for (auto rr_type : {CHANX, CHANY}) {
+                        for (const RRNodeId& node_id : node_lookup.find_channel_nodes(sample_loc.layer_num, ix, iy, rr_type)) {
+                            //Find the IPINs which are reachable from the wires within the bounding box
+                            //around the selected tile location
+                            dijkstra_flood_to_ipins(node_id, chan_ipins_delays);
+                        }
                     }
                 }
             }
@@ -425,6 +440,7 @@ t_chan_ipins_delays compute_router_chan_ipin_lookahead() {
 
 t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph,
                                                          t_physical_tile_type_ptr physical_tile,
+                                                         int layer,
                                                          int x,
                                                          int y) {
     auto tile_pins_vec = get_flat_tile_pins(physical_tile);
@@ -436,6 +452,7 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g
     for (int pin_physical_num : tile_pins_vec) {
         RRNodeId pin_node_id = get_pin_rr_node_id(rr_graph.node_lookup(),
                                                   physical_tile,
+                                                  layer,
                                                   x,
                                                   y,
                                                   pin_physical_num);
@@ -471,6 +488,7 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
     root.node = node;
 
     int ptc = rr_graph.node_ptc_num(node);
+    int node_layer_num = rr_graph.node_layer(node);
 
     /*
      * Perform Djikstra from the SOURCE/OPIN of interest, stopping at the the first
@@ -517,12 +535,12 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
             }
 
             //Keep costs of the best path to reach each wire type
-            if (!src_opin_delays[itile][ptc].count(seg_index)
-                || curr.delay < src_opin_delays[itile][ptc][seg_index].delay) {
-                src_opin_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type;
-                src_opin_delays[itile][ptc][seg_index].wire_seg_index = seg_index;
-                src_opin_delays[itile][ptc][seg_index].delay = curr.delay;
-                src_opin_delays[itile][ptc][seg_index].congestion = curr.congestion;
+            if (!src_opin_delays[node_layer_num][itile][ptc].count(seg_index)
+                || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay) {
+                src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type;
+                src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index;
+                src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay;
+                src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion;
             }
 
         } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) {
@@ -536,10 +554,18 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d
 
                 RRNodeId next_node = rr_graph.rr_nodes().edge_sink_node(edge);
                 // For the time being, we decide to not let the lookahead explore the node inside the clusters
-                t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(next_node), rr_graph.node_ylow(next_node));
+                t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(next_node),
+                                                                                            rr_graph.node_ylow(next_node),
+                                                                                            rr_graph.node_layer(next_node)});
                 if (!is_inter_cluster_node(physical_type,
                                            rr_graph.node_type(next_node),
                                            rr_graph.node_ptc_num(next_node))) {
+                    // Don't go inside the clusters
+                    continue;
+                }
+
+                if (rr_graph.node_layer(curr.node) != node_layer_num) {
+                    //Don't change the layer
                     continue;
                 }
 
@@ -579,6 +605,8 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch
     root.node = node;
     root.level = 0;
 
+    int root_layer = rr_graph.node_layer(node);
+
     /*
      * Perform Djikstra from the CHAN of interest, stopping at the the first
      * reachable IPIN
@@ -607,21 +635,22 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch
         if (curr_rr_type == IPIN) {
             int node_x = rr_graph.node_xlow(curr.node);
             int node_y = rr_graph.node_ylow(curr.node);
+            int node_layer = rr_graph.node_layer(curr.node);
 
-            auto tile_type = device_ctx.grid.get_physical_type(node_x, node_y);
+            auto tile_type = device_ctx.grid.get_physical_type({node_x, node_y, node_layer});
             int itile = tile_type->index;
 
             int ptc = rr_graph.node_ptc_num(curr.node);
 
-            if (ptc >= int(chan_ipins_delays[itile].size())) {
-                chan_ipins_delays[itile].resize(ptc + 1); //Inefficient but functional...
+            if (ptc >= int(chan_ipins_delays[root_layer][itile].size())) {
+                chan_ipins_delays[root_layer][itile].resize(ptc + 1); //Inefficient but functional...
             }
 
             site_pin_delay = std::min(curr.delay, site_pin_delay);
             //Keep costs of the best path to reach each wire type
-            chan_ipins_delays[itile][ptc].wire_rr_type = curr_rr_type;
-            chan_ipins_delays[itile][ptc].delay = site_pin_delay;
-            chan_ipins_delays[itile][ptc].congestion = curr.congestion;
+            chan_ipins_delays[root_layer][itile][ptc].wire_rr_type = curr_rr_type;
+            chan_ipins_delays[root_layer][itile][ptc].delay = site_pin_delay;
+            chan_ipins_delays[root_layer][itile][ptc].congestion = curr.congestion;
         } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) {
             if (curr.level >= MAX_EXPANSION_LEVEL) {
                 continue;
@@ -637,6 +666,11 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch
 
                 RRNodeId next_node = rr_graph.rr_nodes().edge_sink_node(edge);
 
+                if (rr_graph.node_layer(next_node) != root_layer) {
+                    //Don't change the layer
+                    continue;
+                }
+
                 t_pq_entry next;
                 next.congestion = new_cong; //Of current node
                 next.delay = new_delay;     //To reach next node
@@ -651,18 +685,21 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch
     }
 }
 
-static vtr::Point<int> pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point<int> prev) {
+static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev) {
     //Very simple for now, just pick the fist matching tile found
-    vtr::Point<int> loc(OPEN, OPEN);
+    t_physical_tile_loc loc(OPEN, OPEN, OPEN);
 
-    //VTR_LOG("Prev: %d,%d\n", prev.x(), prev.y());
+    //VTR_LOG("Prev: %d,%d\n", prev.x, prev.y);
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& grid = device_ctx.grid;
 
-    int y_init = prev.y() + 1; //Start searching next element above prev
+    int y_init = prev.y + 1; //Start searching next element above prev
 
-    for (int x = prev.x(); x < int(grid.width()); ++x) {
+    if (device_ctx.grid.num_instances(tile_type, layer_num) == 0) {
+        return loc;
+    }
+    for (int x = prev.x; x < int(grid.width()); ++x) {
         if (x < 0) continue;
 
         //VTR_LOG("  x: %d\n", x);
@@ -671,20 +708,22 @@ static vtr::Point<int> pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr:
             if (y < 0) continue;
 
             //VTR_LOG("   y: %d\n", y);
-            if (grid.get_physical_type(x, y) == tile_type) {
-                loc.set_x(x);
-                loc.set_y(y);
+            if (grid.get_physical_type(t_physical_tile_loc(x, y, layer_num)) == tile_type) {
+                loc.x = x;
+                loc.y = y;
+                loc.layer_num = layer_num;
                 break;
             }
         }
 
-        if (loc.x() != OPEN && loc.y() != OPEN) {
+        if (loc.x != OPEN && loc.y != OPEN && loc.layer_num != OPEN) {
             break;
         } else {
             y_init = 0; //Prepare to search next column
         }
     }
-    //VTR_LOG("Next: %d,%d\n", loc.x(), loc.y());
+
+    //VTR_LOG("Next: %d,%d\n", loc.x, loc.y);
 
     return loc;
 }
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
index 5a7a83aa9fd..f3a3d43249a 100644
--- a/vpr/src/route/router_lookahead_map_utils.h
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -268,7 +268,7 @@ struct t_reachable_wire_inf {
 //
 // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned
 // as the lookahead expected cost.
-typedef std::vector<std::vector<std::map<int, t_reachable_wire_inf>>> t_src_opin_delays;
+typedef std::vector<std::vector<std::vector<std::map<int, t_reachable_wire_inf>>>> t_src_opin_delays;
 
 //[from pin ptc num][target src ptc num]->cost
 typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_delays;
@@ -282,13 +282,14 @@ typedef std::vector<std::unordered_map<int, Cost_Entry>> t_ipin_primitive_sink_d
 //
 // This data structure stores the minimum delay to reach a specific SINK from the last connection between the wire (CHANX/CHANY)
 // and the tile's IPIN. If there are many connections to the same IPIN, the one with the minimum delay is selected.
-typedef std::vector<std::vector<t_reachable_wire_inf>> t_chan_ipins_delays;
+typedef std::vector<std::vector<std::vector<t_reachable_wire_inf>>> t_chan_ipins_delays;
 
 t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat);
 t_chan_ipins_delays compute_router_chan_ipin_lookahead();
 
 t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph,
                                                          t_physical_tile_type_ptr physical_tile,
+                                                         int layer,
                                                          int x,
                                                          int y);
 
diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp
index 96581ccc093..dc0d7a06d04 100644
--- a/vpr/src/route/rr_graph.cpp
+++ b/vpr/src/route/rr_graph.cpp
@@ -112,6 +112,7 @@ static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr
 
 static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder,
                                  const RRGraphView& rr_graph,
+                                 const int layer,
                                  const int i,
                                  const int j,
                                  const e_side side,
@@ -128,6 +129,7 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder,
 
 static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
                                   const RRGraphView& rr_graph,
+                                  const int layer,
                                   const int i,
                                   const int j,
                                   const e_side side,
@@ -149,6 +151,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
 
 static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder,
                                        const RRGraphView& rr_graph,
+                                       int layer,
                                        int x,
                                        int y,
                                        e_side side,
@@ -207,12 +210,14 @@ static vtr::vector<ClusterBlockId, std::unordered_set<int>> get_pin_chains_flat(
 
 static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder,
                                  const std::vector<int>& class_num_vec,
+                                 const int layer,
                                  const int root_x,
                                  const int root_y,
                                  t_physical_tile_type_ptr physical_type);
 
 static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
                               const std::vector<int>& pin_num_vec,
+                              const int layer,
                               const int i,
                               const int j,
                               t_physical_tile_type_ptr physical_type);
@@ -225,6 +230,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
  * @param rr_graph_builder
  * @param arch_sw_inf_map
  * @param class_num_vec
+ * @param layer
  * @param i
  * @param j
  * @param rr_edges_to_create
@@ -234,6 +240,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
 static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
                                           std::map<int, t_arch_switch_inf>& arch_sw_inf_map,
                                           const std::vector<int>& class_num_vec,
+                                          const int layer,
                                           const int i,
                                           const int j,
                                           t_rr_edge_info_set& rr_edges_to_create,
@@ -242,6 +249,7 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
 
 static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
                                      const std::vector<int>& class_num_vec,
+                                     const int layer,
                                      const int i,
                                      const int j,
                                      t_rr_edge_info_set& rr_edges_to_create,
@@ -251,6 +259,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
 static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
                                          std::map<int, t_arch_switch_inf>& arch_sw_inf_map,
                                          t_physical_tile_type_ptr physical_tile,
+                                         int layer,
                                          int root_x,
                                          int root_y,
                                          const int delayless_switch);
@@ -302,6 +311,7 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
 static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
                                           t_rr_edge_info_set& rr_edges_to_create,
                                           t_physical_tile_type_ptr physical_tile,
+                                          int layer,
                                           int i,
                                           int j);
 
@@ -311,6 +321,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
 static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder,
                                          int& num_collapsed_nodes,
                                          ClusterBlockId cluster_blk_id,
+                                         const int layer,
                                          const int i,
                                          const int j,
                                          const int cap,
@@ -332,6 +343,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder,
                          const t_pb* pb,
                          const t_cluster_pin_chain& nodes_to_collapse,
                          int rel_cap,
+                         int layer,
                          int i,
                          int j);
 
@@ -345,6 +357,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder,
  * @param nodes_to_collapse
  * @param R_minW_nmos
  * @param R_minW_pmos
+ * @param layer
  * @param i
  * @param j
  * @return Number of the collapsed nodes
@@ -357,6 +370,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder,
                                          const t_cluster_pin_chain& nodes_to_collapse,
                                          float R_minW_nmos,
                                          float R_minW_pmos,
+                                         int layer,
                                          int i,
                                          int j);
 /**
@@ -374,6 +388,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder,
  * @param chain_idx
  * @param node_idx
  * @param sink_pin_num
+ * @param layer
  * @param i
  * @param j
  */
@@ -389,6 +404,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder,
                                         float R_minW_pmos,
                                         int chain_idx,
                                         int node_idx,
+                                        int layer,
                                         int i,
                                         int j);
 
@@ -412,6 +428,7 @@ static float get_min_delay_to_chain(t_physical_tile_type_ptr physical_type,
 static std::unordered_set<int> get_chain_pins(std::vector<t_pin_chain_node> chain);
 
 static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
+                          const int layer,
                           const int i,
                           const int j,
                           const t_rr_type chan_type,
@@ -744,10 +761,12 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
         auto block_loc = place_ctx.block_locs[cluster_blk_id].loc;
         int i = block_loc.x;
         int j = block_loc.y;
+        int layer = block_loc.layer;
         int abs_cap = block_loc.sub_tile;
         build_cluster_internal_edges(rr_graph_builder,
                                      num_collapsed_nodes,
                                      cluster_blk_id,
+                                     layer,
                                      i,
                                      j,
                                      abs_cap,
@@ -769,6 +788,7 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
 static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
                                           t_rr_edge_info_set& rr_edges_to_create,
                                           t_physical_tile_type_ptr physical_tile,
+                                          int layer,
                                           int i,
                                           int j) {
     auto pin_num_vec = get_flat_tile_pins(physical_tile);
@@ -778,6 +798,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
         }
         auto pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                  physical_tile,
+                                                 layer,
                                                  i,
                                                  j,
                                                  pin_physical_num);
@@ -787,6 +808,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder,
         for (auto driving_pin : driving_pins) {
             auto driving_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                           physical_tile,
+                                                          layer,
                                                           i,
                                                           j,
                                                           driving_pin);
@@ -1069,7 +1091,7 @@ static void build_rr_graph(const t_graph_type graph_type,
     }
     device_ctx.rr_graph_builder.resize_nodes(num_rr_nodes);
 
-    /* These are data structures used by the the unidir opin mapping. They are used
+    /* These are data structures used by the unidir opin mapping. They are used
      * to spread connections evenly for each segment type among the available
      * wire start points */
     vtr::NdMatrix<int, 3> Fc_xofs({grid.height() - 1,
@@ -1224,7 +1246,7 @@ static void build_rr_graph(const t_graph_type graph_type,
 
     // Verify no incremental node allocation.
     /* AA: Note that in the case of dedicated networks, we are currently underestimating the additional node count due to the clock networks. 
-     * Thus this below error is logged; it's not actually an error, the node estimation needs to get fixed for dedicated clock networks. */
+     * Thus, this below error is logged; it's not actually an error, the node estimation needs to get fixed for dedicated clock networks. */
     if (rr_graph.num_nodes() > expected_node_count) {
         VTR_LOG_ERROR("Expected no more than %zu nodes, have %zu nodes\n",
                       expected_node_count, rr_graph.num_nodes());
@@ -1361,6 +1383,7 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type,
 void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
                          const t_det_routing_arch& det_routing_arch,
                          t_physical_tile_type_ptr physical_tile,
+                         int layer,
                          int x,
                          int y,
                          const int delayless_switch) {
@@ -1369,6 +1392,7 @@ void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
     int num_rr_nodes = 0;
     alloc_and_load_tile_rr_node_indices(rr_graph_builder,
                                         physical_tile,
+                                        layer,
                                         x,
                                         y,
                                         &num_rr_nodes);
@@ -1377,6 +1401,7 @@ void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
     alloc_and_load_tile_rr_graph(rr_graph_builder,
                                  sw_map,
                                  physical_tile,
+                                 layer,
                                  x,
                                  y,
                                  delayless_switch);
@@ -1863,39 +1888,44 @@ static std::function<void(t_chan_width*)> alloc_and_load_rr_graph(RRGraphBuilder
 
     int num_edges = 0;
     /* Connection SINKS and SOURCES to their pins - Initializing IPINs/OPINs. */
-    for (size_t i = 0; i < grid.width(); ++i) {
-        for (size_t j = 0; j < grid.height(); ++j) {
-            if (grid.get_width_offset(i, j) == 0 && grid.get_height_offset(i, j) == 0) {
-                t_physical_tile_type_ptr physical_tile = grid.get_physical_type(i, j);
-                std::vector<int> class_num_vec;
-                std::vector<int> pin_num_vec;
-                class_num_vec = get_tile_root_classes(physical_tile);
-                pin_num_vec = get_tile_root_pins(physical_tile);
-                add_classes_rr_graph(rr_graph_builder,
-                                     class_num_vec,
-                                     i,
-                                     j,
-                                     physical_tile);
-
-                add_pins_rr_graph(rr_graph_builder,
-                                  pin_num_vec,
-                                  i,
-                                  j,
-                                  physical_tile);
-
-                connect_src_sink_to_pins(rr_graph_builder,
+    for (int layer = 0; layer < grid.get_num_layers(); ++layer) {
+        for (int i = 0; i < (int)grid.width(); ++i) {
+            for (int j = 0; j < (int)grid.height(); ++j) {
+                if (grid.get_width_offset({i, j, layer}) == 0 && grid.get_height_offset({i, j, layer}) == 0) {
+                    t_physical_tile_type_ptr physical_tile = grid.get_physical_type({i, j, layer});
+                    std::vector<int> class_num_vec;
+                    std::vector<int> pin_num_vec;
+                    class_num_vec = get_tile_root_classes(physical_tile);
+                    pin_num_vec = get_tile_root_pins(physical_tile);
+                    add_classes_rr_graph(rr_graph_builder,
                                          class_num_vec,
+                                         layer,
                                          i,
                                          j,
-                                         rr_edges_to_create,
-                                         delayless_switch,
                                          physical_tile);
 
-                //Create the actual SOURCE->OPIN, IPIN->SINK edges
-                uniquify_edges(rr_edges_to_create);
-                alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
-                num_edges += rr_edges_to_create.size();
-                rr_edges_to_create.clear();
+                    add_pins_rr_graph(rr_graph_builder,
+                                      pin_num_vec,
+                                      layer,
+                                      i,
+                                      j,
+                                      physical_tile);
+
+                    connect_src_sink_to_pins(rr_graph_builder,
+                                             class_num_vec,
+                                             layer,
+                                             i,
+                                             j,
+                                             rr_edges_to_create,
+                                             delayless_switch,
+                                             physical_tile);
+
+                    //Create the actual SOURCE->OPIN, IPIN->SINK edges
+                    uniquify_edges(rr_edges_to_create);
+                    alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
+                    num_edges += rr_edges_to_create.size();
+                    rr_edges_to_create.clear();
+                }
             }
         }
     }
@@ -1904,31 +1934,35 @@ static std::function<void(t_chan_width*)> alloc_and_load_rr_graph(RRGraphBuilder
     num_edges = 0;
     /* Build opins */
     int rr_edges_before_directs = 0;
-    for (size_t i = 0; i < grid.width(); ++i) {
-        for (size_t j = 0; j < grid.height(); ++j) {
-            for (e_side side : SIDES) {
-                if (BI_DIRECTIONAL == directionality) {
-                    build_bidir_rr_opins(rr_graph_builder, rr_graph, i, j, side,
-                                         opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, chan_details_y,
-                                         grid,
-                                         directs, num_directs, clb_to_clb_directs, num_seg_types);
-                } else {
-                    VTR_ASSERT(UNI_DIRECTIONAL == directionality);
-                    bool clipped;
-                    build_unidir_rr_opins(rr_graph_builder, rr_graph, i, j, side, grid, Fc_out, chan_width,
-                                          chan_details_x, chan_details_y, Fc_xofs, Fc_yofs,
-                                          rr_edges_to_create, &clipped, seg_index_map,
-                                          directs, num_directs, clb_to_clb_directs, num_seg_types, rr_edges_before_directs);
-                    if (clipped) {
-                        *Fc_clipped = true;
+    for (int layer = 0; layer < grid.get_num_layers(); layer++) {
+        for (size_t i = 0; i < grid.width(); ++i) {
+            for (size_t j = 0; j < grid.height(); ++j) {
+                for (e_side side : SIDES) {
+                    if (BI_DIRECTIONAL == directionality) {
+                        build_bidir_rr_opins(rr_graph_builder, rr_graph, layer, i, j, side,
+                                             opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x,
+                                             chan_details_y,
+                                             grid,
+                                             directs, num_directs, clb_to_clb_directs, num_seg_types);
+                    } else {
+                        VTR_ASSERT(UNI_DIRECTIONAL == directionality);
+                        bool clipped;
+                        build_unidir_rr_opins(rr_graph_builder, rr_graph, layer, i, j, side, grid, Fc_out, chan_width,
+                                              chan_details_x, chan_details_y, Fc_xofs, Fc_yofs,
+                                              rr_edges_to_create, &clipped, seg_index_map,
+                                              directs, num_directs, clb_to_clb_directs, num_seg_types,
+                                              rr_edges_before_directs);
+                        if (clipped) {
+                            *Fc_clipped = true;
+                        }
                     }
-                }
 
-                //Create the actual OPIN->CHANX/CHANY edges
-                uniquify_edges(rr_edges_to_create);
-                alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
-                num_edges += rr_edges_to_create.size();
-                rr_edges_to_create.clear();
+                    //Create the actual OPIN->CHANX/CHANY edges
+                    uniquify_edges(rr_edges_to_create);
+                    alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
+                    num_edges += rr_edges_to_create.size();
+                    rr_edges_to_create.clear();
+                }
             }
         }
     }
@@ -1939,41 +1973,43 @@ static std::function<void(t_chan_width*)> alloc_and_load_rr_graph(RRGraphBuilder
     num_edges = 0;
     /* Build channels */
     VTR_ASSERT(Fs % 3 == 0);
-    for (size_t i = 0; i < grid.width() - 1; ++i) {
-        for (size_t j = 0; j < grid.height() - 1; ++j) {
-            if (i > 0) {
-                int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]);
-                build_rr_chan(rr_graph_builder, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, switch_block_conn,
-                              CHANX_COST_INDEX_START,
-                              chan_width, grid, tracks_per_chan,
-                              sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
-                              rr_edges_to_create,
-                              wire_to_ipin_switch,
-                              directionality);
-
-                //Create the actual CHAN->CHAN edges
-                uniquify_edges(rr_edges_to_create);
-                alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
-                num_edges += rr_edges_to_create.size();
-
-                rr_edges_to_create.clear();
-            }
-            if (j > 0) {
-                int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]);
-                build_rr_chan(rr_graph_builder, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, switch_block_conn,
-                              CHANX_COST_INDEX_START + num_seg_types_x,
-                              chan_width, grid, tracks_per_chan,
-                              sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
-                              rr_edges_to_create,
-                              wire_to_ipin_switch,
-                              directionality);
-
-                //Create the actual CHAN->CHAN edges
-                uniquify_edges(rr_edges_to_create);
-                alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
-                num_edges += rr_edges_to_create.size();
-
-                rr_edges_to_create.clear();
+    for (int layer = 0; layer < grid.get_num_layers(); ++layer) {
+        for (size_t i = 0; i < grid.width() - 1; ++i) {
+            for (size_t j = 0; j < grid.height() - 1; ++j) {
+                if (i > 0) {
+                    int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]);
+                    build_rr_chan(rr_graph_builder, layer, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, switch_block_conn,
+                                  CHANX_COST_INDEX_START,
+                                  chan_width, grid, tracks_per_chan,
+                                  sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
+                                  rr_edges_to_create,
+                                  wire_to_ipin_switch,
+                                  directionality);
+
+                    //Create the actual CHAN->CHAN edges
+                    uniquify_edges(rr_edges_to_create);
+                    alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
+                    num_edges += rr_edges_to_create.size();
+
+                    rr_edges_to_create.clear();
+                }
+                if (j > 0) {
+                    int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]);
+                    build_rr_chan(rr_graph_builder, layer, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, switch_block_conn,
+                                  CHANX_COST_INDEX_START + num_seg_types_x,
+                                  chan_width, grid, tracks_per_chan,
+                                  sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
+                                  rr_edges_to_create,
+                                  wire_to_ipin_switch,
+                                  directionality);
+
+                    //Create the actual CHAN->CHAN edges
+                    uniquify_edges(rr_edges_to_create);
+                    alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
+                    num_edges += rr_edges_to_create.size();
+
+                    rr_edges_to_create.clear();
+                }
             }
         }
     }
@@ -2054,43 +2090,49 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build
                                                   bool is_flat) {
     t_rr_edge_info_set rr_edges_to_create;
     int num_edges = 0;
-    for (size_t i = 0; i < grid.width(); ++i) {
-        for (size_t j = 0; j < grid.height(); ++j) {
-            if (grid.get_width_offset(i, j) == 0 && grid.get_height_offset(i, j) == 0) {
-                t_physical_tile_type_ptr physical_tile = grid.get_physical_type(i, j);
-                std::vector<int> class_num_vec;
-                std::vector<int> pin_num_vec;
-                class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(i, j, physical_tile);
-                pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(i,
-                                                                         j,
-                                                                         pin_chains,
-                                                                         chain_pin_nums,
-                                                                         physical_tile);
-                add_classes_rr_graph(rr_graph_builder,
-                                     class_num_vec,
-                                     i,
-                                     j,
-                                     physical_tile);
-
-                add_pins_rr_graph(rr_graph_builder,
-                                  pin_num_vec,
-                                  i,
-                                  j,
-                                  physical_tile);
-
-                connect_src_sink_to_pins(rr_graph_builder,
+    for (int layer = 0; layer < grid.get_num_layers(); layer++) {
+        for (int i = 0; i < (int)grid.width(); ++i) {
+            for (int j = 0; j < (int)grid.height(); ++j) {
+                if (grid.get_width_offset({i, j, layer}) == 0 && grid.get_height_offset({i, j, layer}) == 0) {
+                    t_physical_tile_type_ptr physical_tile = grid.get_physical_type({i, j, layer});
+                    std::vector<int> class_num_vec;
+                    std::vector<int> pin_num_vec;
+                    class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(layer, i, j, physical_tile);
+                    pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(layer,
+                                                                             i,
+                                                                             j,
+                                                                             pin_chains,
+                                                                             chain_pin_nums,
+                                                                             physical_tile);
+                    add_classes_rr_graph(rr_graph_builder,
                                          class_num_vec,
+                                         layer,
                                          i,
                                          j,
-                                         rr_edges_to_create,
-                                         delayless_switch,
                                          physical_tile);
 
-                //Create the actual SOURCE->OPIN, IPIN->SINK edges
-                uniquify_edges(rr_edges_to_create);
-                alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
-                num_edges += rr_edges_to_create.size();
-                rr_edges_to_create.clear();
+                    add_pins_rr_graph(rr_graph_builder,
+                                      pin_num_vec,
+                                      layer,
+                                      i,
+                                      j,
+                                      physical_tile);
+
+                    connect_src_sink_to_pins(rr_graph_builder,
+                                             class_num_vec,
+                                             layer,
+                                             i,
+                                             j,
+                                             rr_edges_to_create,
+                                             delayless_switch,
+                                             physical_tile);
+
+                    //Create the actual SOURCE->OPIN, IPIN->SINK edges
+                    uniquify_edges(rr_edges_to_create);
+                    alloc_and_load_edges(rr_graph_builder, rr_edges_to_create);
+                    num_edges += rr_edges_to_create.size();
+                    rr_edges_to_create.clear();
+                }
             }
         }
     }
@@ -2117,6 +2159,7 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build
 
 static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder,
                                  const std::vector<int>& class_num_vec,
+                                 const int layer,
                                  const int root_x,
                                  const int root_y,
                                  t_physical_tile_type_ptr physical_type) {
@@ -2124,7 +2167,7 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder,
 
     for (auto class_num : class_num_vec) {
         auto class_type = get_class_type_from_class_physical_num(physical_type, class_num);
-        RRNodeId class_inode = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type, root_x, root_y, class_num);
+        RRNodeId class_inode = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type, layer, root_x, root_y, class_num);
         VTR_ASSERT(class_inode != RRNodeId::INVALID());
         int class_num_pins = get_class_num_pins_from_class_physical_num(physical_type, class_num);
         if (class_type == DRIVER) {
@@ -2140,6 +2183,8 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder,
         rr_graph_builder.set_node_capacity(class_inode, (short)class_num_pins);
         VTR_ASSERT(root_x <= std::numeric_limits<short>::max() && root_y <= std::numeric_limits<short>::max());
         rr_graph_builder.set_node_coordinates(class_inode, (short)root_x, (short)root_y, (short)(root_x + physical_type->width - 1), (short)(root_y + physical_type->height - 1));
+        VTR_ASSERT(layer <= std::numeric_limits<short>::max());
+        rr_graph_builder.set_node_layer(class_inode, layer);
         float R = 0.;
         float C = 0.;
         rr_graph_builder.set_node_rc_index(class_inode, NodeRCIndex(find_create_rr_rc_data(R, C, mutable_device_ctx.rr_rc_data)));
@@ -2149,6 +2194,7 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder,
 
 static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
                               const std::vector<int>& pin_num_vec,
+                              const int layer,
                               const int i,
                               const int j,
                               t_physical_tile_type_ptr physical_type) {
@@ -2167,7 +2213,8 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
             int y_offset = y_offset_vec[pin_coord];
             e_side pin_side = pin_sides_vec[pin_coord];
             auto node_type = (pin_type == DRIVER) ? OPIN : IPIN;
-            RRNodeId node_id = node_lookup.find_node(i + x_offset,
+            RRNodeId node_id = node_lookup.find_node(layer,
+                                                     i + x_offset,
                                                      j + y_offset,
                                                      node_type,
                                                      pin_num,
@@ -2196,6 +2243,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
                                                       j + y_offset,
                                                       i + x_offset,
                                                       j + y_offset);
+                rr_graph_builder.set_node_layer(node_id, layer);
                 rr_graph_builder.add_node_side(node_id, pin_side);
             }
         }
@@ -2205,6 +2253,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder,
 static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
                                           std::map<int, t_arch_switch_inf>& /*arch_sw_inf_map*/,
                                           const std::vector<int>& class_num_vec,
+                                          const int layer,
                                           const int i,
                                           const int j,
                                           t_rr_edge_info_set& rr_edges_to_create,
@@ -2213,14 +2262,15 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
     for (auto class_num : class_num_vec) {
         const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num);
         auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num);
-        RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, class_num);
+        RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, class_num);
         VTR_ASSERT(class_rr_node_id != RRNodeId::INVALID());
         //bool is_primitive = is_primitive_pin(physical_type_ptr, pin_list[0]);
         //t_logical_block_type_ptr logical_block = is_primitive ? get_logical_block_from_pin_physical_num(physical_type_ptr, pin_list[0]) : nullptr;
         for (auto pin_num : pin_list) {
-            RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, pin_num);
+            RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, pin_num);
             if (pin_rr_node_id == RRNodeId::INVALID()) {
-                VTR_LOG_ERROR("In block (%d, %d) pin num: %d doesn't exist to be connected to class %d\n",
+                VTR_LOG_ERROR("In block (%d, %d, %d) pin num: %d doesn't exist to be connected to class %d\n",
+                              layer,
                               i,
                               j,
                               pin_num,
@@ -2253,6 +2303,7 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
 
 static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
                                      const std::vector<int>& class_num_vec,
+                                     const int layer,
                                      const int i,
                                      const int j,
                                      t_rr_edge_info_set& rr_edges_to_create,
@@ -2261,12 +2312,13 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
     for (auto class_num : class_num_vec) {
         const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num);
         auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num);
-        RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, class_num);
+        RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, class_num);
         VTR_ASSERT(class_rr_node_id != RRNodeId::INVALID());
         for (auto pin_num : pin_list) {
-            RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, pin_num);
+            RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, pin_num);
             if (pin_rr_node_id == RRNodeId::INVALID()) {
-                VTR_LOG_ERROR("In block (%d, %d) pin num: %d doesn't exist to be connected to class %d\n",
+                VTR_LOG_ERROR("In block (%d, %d, %d) pin num: %d doesn't exist to be connected to class %d\n",
+                              layer,
                               i,
                               j,
                               pin_num,
@@ -2289,6 +2341,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder,
 static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
                                          std::map<int, t_arch_switch_inf>& arch_sw_inf_map,
                                          t_physical_tile_type_ptr physical_tile,
+                                         int layer,
                                          int root_x,
                                          int root_y,
                                          const int delayless_switch) {
@@ -2302,12 +2355,14 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
 
     add_classes_rr_graph(rr_graph_builder,
                          class_num_vec,
+                         layer,
                          root_x,
                          root_y,
                          physical_tile);
 
     add_pins_rr_graph(rr_graph_builder,
                       pin_num_vec,
+                      layer,
                       root_x,
                       root_y,
                       physical_tile);
@@ -2315,6 +2370,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
     connect_tile_src_sink_to_pins(rr_graph_builder,
                                   arch_sw_inf_map,
                                   class_num_vec,
+                                  layer,
                                   root_x,
                                   root_y,
                                   rr_edges_to_create,
@@ -2328,6 +2384,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
     add_intra_tile_edges_rr_graph(rr_graph_builder,
                                   rr_edges_to_create,
                                   physical_tile,
+                                  layer,
                                   root_x,
                                   root_y);
 
@@ -2342,6 +2399,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
 
 static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder,
                                  const RRGraphView& rr_graph,
+                                 const int layer,
                                  const int i,
                                  const int j,
                                  const e_side side,
@@ -2363,9 +2421,9 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder,
         return;
     }
 
-    auto type = grid.get_physical_type(i, j);
-    int width_offset = grid.get_width_offset(i, j);
-    int height_offset = grid.get_height_offset(i, j);
+    auto type = grid.get_physical_type({i, j, layer});
+    int width_offset = grid.get_width_offset({i, j, layer});
+    int height_offset = grid.get_height_offset({i, j, layer});
 
     const vtr::Matrix<int>& Fc = Fc_out[type->index];
 
@@ -2386,18 +2444,18 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder,
             total_pin_Fc += Fc[pin_index][iseg];
         }
 
-        RRNodeId node_index = rr_graph_builder.node_lookup().find_node(i, j, OPIN, pin_index, side);
+        RRNodeId node_index = rr_graph_builder.node_lookup().find_node(layer, i, j, OPIN, pin_index, side);
         VTR_ASSERT(node_index);
 
         if (total_pin_Fc > 0) {
-            get_bidir_opin_connections(rr_graph_builder, i, j, pin_index,
+            get_bidir_opin_connections(rr_graph_builder, layer, i, j, pin_index,
                                        node_index, rr_edges_to_create, opin_to_track_map,
                                        chan_details_x,
                                        chan_details_y);
         }
 
         /* Add in direct connections */
-        get_opin_direct_connections(rr_graph_builder, rr_graph, i, j, side, pin_index,
+        get_opin_direct_connections(rr_graph_builder, rr_graph, layer, i, j, side, pin_index,
                                     node_index, rr_edges_to_create,
                                     directs, num_directs, clb_to_clb_directs);
     }
@@ -2430,6 +2488,7 @@ void free_rr_graph() {
 static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder,
                                          int& num_collapsed_nodes,
                                          ClusterBlockId cluster_blk_id,
+                                         const int layer,
                                          const int i,
                                          const int j,
                                          const int abs_cap,
@@ -2441,8 +2500,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder,
                                          bool is_flat) {
     VTR_ASSERT(is_flat);
     /* Internal edges are added from the start tile */
-    int width_offset = grid.get_width_offset(i, j);
-    int height_offset = grid.get_height_offset(i, j);
+    int width_offset = grid.get_width_offset({i, j, layer});
+    int height_offset = grid.get_height_offset({i, j, layer});
     VTR_ASSERT(width_offset == 0 && height_offset == 0);
 
     auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist;
@@ -2475,6 +2534,7 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder,
                      pb,
                      nodes_to_collapse,
                      rel_cap,
+                     layer,
                      i,
                      j);
 
@@ -2491,6 +2551,7 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder,
                                                          nodes_to_collapse,
                                                          R_minW_nmos,
                                                          R_minW_pmos,
+                                                         layer,
                                                          i,
                                                          j);
 }
@@ -2503,6 +2564,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder,
                          const t_pb* pb,
                          const t_cluster_pin_chain& nodes_to_collapse,
                          int rel_cap,
+                         int layer,
                          int i,
                          int j) {
     auto pin_num_range = get_pb_pins(physical_type,
@@ -2524,6 +2586,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder,
         }
         auto parent_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                      physical_type,
+                                                     layer,
                                                      i,
                                                      j,
                                                      pin_physical_num);
@@ -2543,6 +2606,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder,
             }
             auto conn_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                        physical_type,
+                                                       layer,
                                                        i,
                                                        j,
                                                        conn_pin_physical_num);
@@ -2569,6 +2633,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder,
                                          const t_cluster_pin_chain& nodes_to_collapse,
                                          float R_minW_nmos,
                                          float R_minW_pmos,
+                                         int layer,
                                          int i,
                                          int j) {
     // Store the cluster pins in a set to make the search more run-time efficient
@@ -2595,6 +2660,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder,
                                         R_minW_pmos,
                                         chain_idx,
                                         node_idx,
+                                        layer,
                                         i,
                                         j);
         }
@@ -2614,6 +2680,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder,
                                         float R_minW_pmos,
                                         int chain_idx,
                                         int node_idx,
+                                        int layer,
                                         int i,
                                         int j) {
     // Chain node pin physical number
@@ -2634,6 +2701,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder,
     // Get the chain's sink node rr node it.
     RRNodeId sink_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                   physical_type,
+                                                  layer,
                                                   i,
                                                   j,
                                                   sink_pin_num);
@@ -2664,6 +2732,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder,
                                                                   sink_pin_num);
             RRNodeId rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                      physical_type,
+                                                     layer,
                                                      i,
                                                      j,
                                                      pin_physical_num);
@@ -2695,6 +2764,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder,
                                                      sink_pin_num);
                 RRNodeId rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(),
                                                          physical_type,
+                                                         layer,
                                                          i,
                                                          j,
                                                          src_pin);
@@ -2763,6 +2833,7 @@ static std::unordered_set<int> get_chain_pins(std::vector<t_pin_chain_node> chai
 /* Allocates/loads edges for nodes belonging to specified channel segment and initializes
  * node properties such as cost, occupancy and capacity */
 static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
+                          const int layer,
                           const int x_coord,
                           const int y_coord,
                           const t_rr_type chan_type,
@@ -2786,7 +2857,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
     auto& device_ctx = g_vpr_ctx.device();
     auto& mutable_device_ctx = g_vpr_ctx.mutable_device();
 
-    //Initally a assumes CHANX
+    //Initally assumes CHANX
     int seg_coord = x_coord;                           //The absolute coordinate of this segment within the channel
     int chan_coord = y_coord;                          //The absolute coordinate of this channel within the device
     int seg_dimension = device_ctx.grid.width() - 2;   //-2 for no perim channels
@@ -2833,7 +2904,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
             from_seg_details = chan_details_x[start][y_coord].data();
         }
 
-        RRNodeId node = rr_graph_builder.node_lookup().find_node(x_coord, y_coord, chan_type, track);
+        RRNodeId node = rr_graph_builder.node_lookup().find_node(layer, x_coord, y_coord, chan_type, track);
 
         if (!node) {
             continue;
@@ -2841,7 +2912,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
 
         /* Add the edges from this track to all it's connected pins into the list */
         int num_edges = 0;
-        num_edges += get_track_to_pins(rr_graph_builder, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create,
+        num_edges += get_track_to_pins(rr_graph_builder, layer, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create,
                                        track_to_pin_lookup, seg_details, chan_type, seg_dimension,
                                        wire_to_ipin_switch, directionality);
 
@@ -2858,7 +2929,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
                 max_opposite_chan_width = nodes_per_chan.x_max;
             }
             if (to_seg_details->length() > 0) {
-                num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, chan_coord,
+                num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord,
                                                  opposite_chan_type, seg_dimension, max_opposite_chan_width, grid,
                                                  Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                  from_seg_details, to_seg_details, opposite_chan_details,
@@ -2878,7 +2949,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
                 max_opposite_chan_width = nodes_per_chan.x_max;
             }
             if (to_seg_details->length() > 0) {
-                num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, chan_coord + 1,
+                num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord + 1,
                                                  opposite_chan_type, seg_dimension, max_opposite_chan_width, grid,
                                                  Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                  from_seg_details, to_seg_details, opposite_chan_details,
@@ -2911,7 +2982,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
                     max_chan_width = nodes_per_chan.y_max;
                 }
                 if (to_seg_details->length() > 0) {
-                    num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, target_seg,
+                    num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, target_seg,
                                                      chan_type, seg_dimension, max_chan_width, grid,
                                                      Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                      from_seg_details, to_seg_details, from_chan_details,
@@ -2937,6 +3008,8 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder,
             rr_graph_builder.set_node_coordinates(node, x_coord, start, x_coord, end);
         }
 
+        rr_graph_builder.set_node_layer(node, layer);
+
         int length = end - start + 1;
         float R = length * seg_details[track].Rmetal();
         float C = length * seg_details[track].Cmetal();
@@ -3727,6 +3800,7 @@ static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr
  */
 static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
                                   const RRGraphView& rr_graph,
+                                  const int layer,
                                   const int i,
                                   const int j,
                                   const e_side side,
@@ -3751,10 +3825,10 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
      */
     *Fc_clipped = false;
 
-    auto type = grid.get_physical_type(i, j);
+    auto type = grid.get_physical_type({i, j, layer});
 
-    int width_offset = grid.get_width_offset(i, j);
-    int height_offset = grid.get_height_offset(i, j);
+    int width_offset = grid.get_width_offset({i, j, layer});
+    int height_offset = grid.get_height_offset({i, j, layer});
 
     /* Go through each pin and find its fanout. */
     for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
@@ -3767,7 +3841,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
             continue;
         }
 
-        RRNodeId opin_node_index = rr_graph_builder.node_lookup().find_node(i, j, OPIN, pin_index, side);
+        RRNodeId opin_node_index = rr_graph_builder.node_lookup().find_node(layer, i, j, OPIN, pin_index, side);
         if (!opin_node_index) continue; //No valid from node
 
         for (int iseg = 0; iseg < num_seg_types; iseg++) {
@@ -3827,7 +3901,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
 
             //VTR_ASSERT_MSG(seg_index == 0 || seg_index > 0,"seg_index map not working properly");
 
-            rr_edge_count += get_unidir_opin_connections(rr_graph_builder, chan, seg,
+            rr_edge_count += get_unidir_opin_connections(rr_graph_builder, layer, chan, seg,
                                                          seg_type_Fc, seg_index, chan_type, seg_details,
                                                          opin_node_index,
                                                          rr_edges_to_create,
@@ -3840,7 +3914,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder,
         }
 
         /* Add in direct connections */
-        get_opin_direct_connections(rr_graph_builder, rr_graph, i, j, side, pin_index, opin_node_index, rr_edges_to_create,
+        get_opin_direct_connections(rr_graph_builder, rr_graph, layer, i, j, side, pin_index, opin_node_index, rr_edges_to_create,
                                     directs, num_directs, clb_to_clb_directs);
     }
 }
@@ -3961,10 +4035,11 @@ static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_in
 
 /* Add all direct clb-pin-to-clb-pin edges to given opin
  *
- * The current opin is located at (x,y) along the specified side
+ * The current opin is located at (layer,x,y) along the specified side
  */
 static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder,
                                        const RRGraphView& rr_graph,
+                                       int layer,
                                        int x,
                                        int y,
                                        e_side side,
@@ -3976,12 +4051,12 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder,
                                        const t_clb_to_clb_directs* clb_to_clb_directs) {
     auto& device_ctx = g_vpr_ctx.device();
 
-    t_physical_tile_type_ptr curr_type = device_ctx.grid.get_physical_type(x, y);
+    t_physical_tile_type_ptr curr_type = device_ctx.grid.get_physical_type({x, y, layer});
 
     int num_pins = 0;
 
-    int width_offset = device_ctx.grid.get_width_offset(x, y);
-    int height_offset = device_ctx.grid.get_height_offset(x, y);
+    int width_offset = device_ctx.grid.get_width_offset({x, y, layer});
+    int height_offset = device_ctx.grid.get_height_offset({x, y, layer});
     if (!curr_type->pinloc[width_offset][height_offset][side][opin]) {
         return num_pins; //No source pin on this side
     }
@@ -4004,7 +4079,9 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder,
                 && y + directs[i].y_offset < int(device_ctx.grid.height() - 1)
                 && y + directs[i].y_offset > 0) {
                 //Only add connections if the target clb type matches the type in the direct specification
-                t_physical_tile_type_ptr target_type = device_ctx.grid.get_physical_type(x + directs[i].x_offset, y + directs[i].y_offset);
+                t_physical_tile_type_ptr target_type = device_ctx.grid.get_physical_type({x + directs[i].x_offset,
+                                                                                          y + directs[i].y_offset,
+                                                                                          layer});
 
                 if (clb_to_clb_directs[i].to_clb_type == target_type
                     && z + directs[i].sub_tile_offset < int(target_type->capacity)
@@ -4064,13 +4141,13 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder,
 
                         if (directs[i].to_side != NUM_SIDES) {
                             //Explicit side specified, only create if pin exists on that side
-                            RRNodeId inode = rr_graph_builder.node_lookup().find_node(x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin, directs[i].to_side);
+                            RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin, directs[i].to_side);
                             if (inode) {
                                 inodes.push_back(inode);
                             }
                         } else {
                             //No side specified, get all candidates
-                            inodes = rr_graph_builder.node_lookup().find_nodes_at_all_sides(x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin);
+                            inodes = rr_graph_builder.node_lookup().find_nodes_at_all_sides(layer, x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin);
                         }
 
                         if (inodes.size() > 0) {
@@ -4286,6 +4363,7 @@ bool pins_connected(t_block_loc cluster_loc,
 
     int x = cluster_loc.loc.x;
     int y = cluster_loc.loc.y;
+    int layer = cluster_loc.loc.layer;
     int abs_cap = cluster_loc.loc.sub_tile;
     const t_sub_tile* sub_tile = nullptr;
 
@@ -4316,10 +4394,10 @@ bool pins_connected(t_block_loc cluster_loc,
 
     VTR_ASSERT(to_pin_physical_num != OPEN);
 
-    RRNodeId from_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, x, y, from_pin_physical_num);
+    RRNodeId from_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, layer, x, y, from_pin_physical_num);
     VTR_ASSERT(from_node != RRNodeId::INVALID());
 
-    RRNodeId to_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, x, y, to_pin_physical_num);
+    RRNodeId to_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, layer, x, y, to_pin_physical_num);
     VTR_ASSERT(to_node != RRNodeId::INVALID());
 
     int num_edges = rr_graph.num_edges(from_node);
diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h
index 821e0a0de01..af06257d98e 100644
--- a/vpr/src/route/rr_graph.h
+++ b/vpr/src/route/rr_graph.h
@@ -32,9 +32,12 @@ void create_rr_graph(const t_graph_type graph_type,
                      int* Warnings,
                      bool is_flat);
 
+// Build a complete RR graph, including all modes, for the given tile. This is used by router lookahead when
+// flat-routing is enabled. It allows it to store the cost from the border of a tile to a sink inside of it
 void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder,
                          const t_det_routing_arch& det_routing_arch,
                          t_physical_tile_type_ptr physical_tile,
+                         int layer,
                          int x,
                          int y,
                          const int delayless_switch);
diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp
index ecc695ac5e1..9ae222ca20c 100644
--- a/vpr/src/route/rr_graph2.cpp
+++ b/vpr/src/route/rr_graph2.cpp
@@ -27,6 +27,7 @@ static void get_switch_type(bool is_from_sb,
                             short switch_types[2]);
 
 static void load_chan_rr_indices(const int max_chan_width,
+                                 const DeviceGrid& grid,
                                  const int chan_len,
                                  const int num_chans,
                                  const t_rr_type type,
@@ -42,6 +43,7 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder,
 static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
                                     t_physical_tile_type_ptr physical_type_ptr,
                                     const std::vector<int>& pin_num_vec,
+                                    int layer,
                                     int root_x,
                                     int root_y,
                                     int* index,
@@ -50,6 +52,7 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
 static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder,
                                        t_physical_tile_type_ptr physical_type_ptr,
                                        const std::vector<int>& class_num_vec,
+                                       int layer,
                                        int x,
                                        int y,
                                        int block_width,
@@ -58,6 +61,7 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder,
 
 static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
                                        const std::vector<int> conn_tracks,
+                                       const int layer,
                                        const int to_chan,
                                        const int to_seg,
                                        const int to_sb,
@@ -71,6 +75,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
                                        t_rr_edge_info_set& rr_edges_to_create);
 
 static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
+                                        const int layer,
                                         const int from_track,
                                         const int to_chan,
                                         const int to_seg,
@@ -89,6 +94,7 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
                                         t_rr_edge_info_set& rr_edges_to_create);
 
 static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
+                                 const int layer,
                                  const int from_track,
                                  const int to_chan,
                                  const int to_seg,
@@ -143,7 +149,7 @@ void dump_seg_details(t_seg_details* seg_details,
 //  from_seg_coord: The horizontal or vertical location along the channel (i.e. y-coord for CHANY, x-coord for CHANX)
 //  from_chan_type: The from channel type
 //  to_chan_type: The to channel type
-static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type);
+static int should_create_switchblock(const DeviceGrid& grid, int layer_num, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type);
 
 static bool should_apply_switch_override(int switch_override);
 
@@ -649,6 +655,7 @@ int get_seg_end(const t_chan_seg_details* seg_details, const int itrack, const i
 /* Returns the number of tracks to which clb opin #ipin at (i,j) connects.   *
  * Also stores the nodes to which this pin connects in rr_edges_to_create    */
 int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
+                               const int layer,
                                const int i,
                                const int j,
                                const int ipin,
@@ -665,9 +672,9 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
 
     auto& device_ctx = g_vpr_ctx.device();
 
-    type = device_ctx.grid.get_physical_type(i, j);
-    int width_offset = device_ctx.grid.get_width_offset(i, j);
-    int height_offset = device_ctx.grid.get_height_offset(i, j);
+    type = device_ctx.grid.get_physical_type({i, j, layer});
+    int width_offset = device_ctx.grid.get_width_offset({i, j, layer});
+    int height_offset = device_ctx.grid.get_height_offset({i, j, layer});
 
     num_conn = 0;
 
@@ -717,7 +724,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
             /* Only connect to wire if there is a CB */
             if (is_cblock(chan, seg, to_track, seg_details)) {
                 to_switch = seg_details[to_track].arch_wire_switch();
-                RRNodeId to_node = rr_graph_builder.node_lookup().find_node(tr_i, tr_j, to_type, to_track);
+                RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, tr_i, tr_j, to_type, to_track);
 
                 if (!to_node) {
                     continue;
@@ -741,6 +748,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
  * 
  */
 int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder,
+                                const int layer,
                                 const int chan,
                                 const int seg,
                                 int Fc,
@@ -802,8 +810,8 @@ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder,
         dec_track = dec_muxes[dec_mux];
 
         /* Figure the inodes of those muxes */
-        RRNodeId inc_inode_index = rr_graph_builder.node_lookup().find_node(x, y, chan_type, inc_track);
-        RRNodeId dec_inode_index = rr_graph_builder.node_lookup().find_node(x, y, chan_type, dec_track);
+        RRNodeId inc_inode_index = rr_graph_builder.node_lookup().find_node(layer, x, y, chan_type, inc_track);
+        RRNodeId dec_inode_index = rr_graph_builder.node_lookup().find_node(layer, x, y, chan_type, dec_track);
 
         if (!inc_inode_index || !dec_inode_index) {
             continue;
@@ -1039,48 +1047,50 @@ void dump_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
     }
 }
 static void load_chan_rr_indices(const int max_chan_width,
+                                 const DeviceGrid& grid,
                                  const int chan_len,
                                  const int num_chans,
                                  const t_rr_type type,
                                  const t_chan_details& chan_details,
                                  RRGraphBuilder& rr_graph_builder,
                                  int* index) {
-    for (int chan = 0; chan < num_chans - 1; ++chan) {
-        for (int seg = 1; seg < chan_len - 1; ++seg) {
-            /* Assign an inode to the starts of tracks */
-            int x = (type == CHANX ? seg : chan);
-            int y = (type == CHANX ? chan : seg);
-            const t_chan_seg_details* seg_details = chan_details[x][y].data();
-
-            /* Reserve nodes in lookup to save memory */
-            rr_graph_builder.node_lookup().reserve_nodes(chan, seg, type, max_chan_width);
-
-            for (int track = 0; track < max_chan_width; ++track) {
-                /* TODO: May let the length() == 0 case go through, to model muxes */
-                if (seg_details[track].length() <= 0)
-                    continue;
+    for (int layer = 0; layer < grid.get_num_layers(); layer++) {
+        for (int chan = 0; chan < num_chans - 1; ++chan) {
+            for (int seg = 1; seg < chan_len - 1; ++seg) {
+                /* Assign an inode to the starts of tracks */
+                int x = (type == CHANX ? seg : chan);
+                int y = (type == CHANX ? chan : seg);
+                const t_chan_seg_details* seg_details = chan_details[x][y].data();
 
-                int start = get_seg_start(seg_details, track, chan, seg);
+                /* Reserve nodes in lookup to save memory */
+                rr_graph_builder.node_lookup().reserve_nodes(layer, chan, seg, type, max_chan_width);
 
-                /* TODO: Now we still use the (y, x) convention here for CHANX. Should rework later */
-                int node_x = chan;
-                int node_y = start;
-                if (CHANX == type) {
-                    std::swap(node_x, node_y);
-                }
+                for (int track = 0; track < max_chan_width; ++track) {
+                    /* TODO: May let the length() == 0 case go through, to model muxes */
+                    if (seg_details[track].length() <= 0)
+                        continue;
 
-                /* If the start of the wire doesn't have a inode,
-                 * assign one to it. */
-                RRNodeId inode = rr_graph_builder.node_lookup().find_node(node_x, node_y, type, track);
-                if (!inode) {
-                    inode = RRNodeId(*index);
-                    ++(*index);
+                    int start = get_seg_start(seg_details, track, chan, seg);
 
-                    rr_graph_builder.node_lookup().add_node(inode, chan, start, type, track);
-                }
+                    /* TODO: Now we still use the (y, x) convention here for CHANX. Should rework later */
+                    int node_x = chan;
+                    int node_y = start;
+                    if (CHANX == type) {
+                        std::swap(node_x, node_y);
+                    }
+
+                    /* If the start of the wire doesn't have an inode,
+                     * assign one to it. */
+                    RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, node_x, node_y, type, track);
+                    if (!inode) {
+                        inode = RRNodeId(*index);
+                        ++(*index);
+                        rr_graph_builder.node_lookup().add_node(inode, layer, chan, start, type, track);
+                    }
 
-                /* Assign inode of start of wire to current position */
-                rr_graph_builder.node_lookup().add_node(inode, chan, seg, type, track);
+                    /* Assign inode of start of wire to current position */
+                    rr_graph_builder.node_lookup().add_node(inode, layer, chan, seg, type, track);
+                }
             }
         }
     }
@@ -1094,88 +1104,93 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder,
                                   int* index,
                                   bool /*is_flat*/) {
     //Walk through the grid assigning indices to SOURCE/SINK IPIN/OPIN
+    for (int layer = 0; layer < grid.get_num_layers(); layer++) {
+        for (int x = 0; x < (int)grid.width(); x++) {
+            for (int y = 0; y < (int)grid.height(); y++) {
+                //Process each block from its root location
+                if (grid.get_width_offset({x, y, layer}) == 0 && grid.get_height_offset({x, y, layer}) == 0) {
+                    t_physical_tile_type_ptr physical_type = grid.get_physical_type({x,
+                                                                                     y,
+                                                                                     layer});
+                    //Assign indices for SINKs and SOURCEs
+                    // Note that SINKS/SOURCES have no side, so we always use side 0
+                    std::vector<int> class_num_vec;
+                    std::vector<int> pin_num_vec;
+
+                    class_num_vec = get_tile_root_classes(physical_type);
+                    pin_num_vec = get_tile_root_pins(physical_type);
+                    add_classes_spatial_lookup(rr_graph_builder,
+                                               physical_type,
+                                               class_num_vec,
+                                               layer,
+                                               x,
+                                               y,
+                                               physical_type->width,
+                                               physical_type->height,
+                                               index);
+
+                    /* Limited sides for grids
+                     *   The wanted side depends on the location of the grid.
+                     *   In particular for perimeter grid,
+                     *   -------------------------------------------------------
+                     *   Grid location |  IPIN side
+                     *   -------------------------------------------------------
+                     *   TOP           |  BOTTOM
+                     *   -------------------------------------------------------
+                     *   RIGHT         |  LEFT
+                     *   -------------------------------------------------------
+                     *   BOTTOM        |  TOP
+                     *   -------------------------------------------------------
+                     *   LEFT          |  RIGHT
+                     *   -------------------------------------------------------
+                     *   TOP-LEFT      |  BOTTOM & RIGHT
+                     *   -------------------------------------------------------
+                     *   TOP-RIGHT     |  BOTTOM & LEFT
+                     *   -------------------------------------------------------
+                     *   BOTTOM-LEFT   |  TOP & RIGHT
+                     *   -------------------------------------------------------
+                     *   BOTTOM-RIGHT  |  TOP & LEFT
+                     *   -------------------------------------------------------
+                     *   Other         |  First come first fit
+                     *   -------------------------------------------------------
+                     *
+                     * Special for IPINs:
+                     *   If there are multiple wanted sides, first come first fit is applied
+                     *   This guarantee that there is only a unique rr_node
+                     *   for the same input pin on multiple sides, and thus avoid multiple driver problems
+                     */
+                    std::vector<e_side> wanted_sides;
+                    if ((int)grid.height() - 1 == y) { /* TOP side */
+                        wanted_sides.push_back(BOTTOM);
+                    }
+                    if ((int)grid.width() - 1 == x) { /* RIGHT side */
+                        wanted_sides.push_back(LEFT);
+                    }
+                    if (0 == y) { /* BOTTOM side */
+                        wanted_sides.push_back(TOP);
+                    }
+                    if (0 == x) { /* LEFT side */
+                        wanted_sides.push_back(RIGHT);
+                    }
 
-    for (size_t x = 0; x < grid.width(); x++) {
-        for (size_t y = 0; y < grid.height(); y++) {
-            //Process each block from it's root location
-            if (grid.get_width_offset(x, y) == 0 && grid.get_height_offset(x, y) == 0) {
-                t_physical_tile_type_ptr physical_type = grid.get_physical_type(x, y);
-                //Assign indices for SINKs and SOURCEs
-                // Note that SINKS/SOURCES have no side, so we always use side 0
-                std::vector<int> class_num_vec;
-                std::vector<int> pin_num_vec;
-
-                class_num_vec = get_tile_root_classes(physical_type);
-                pin_num_vec = get_tile_root_pins(physical_type);
-                add_classes_spatial_lookup(rr_graph_builder,
-                                           physical_type,
-                                           class_num_vec,
-                                           x,
-                                           y,
-                                           physical_type->width,
-                                           physical_type->height,
-                                           index);
-
-                /* Limited sides for grids
-                 *   The wanted side depends on the location of the grid.
-                 *   In particular for perimeter grid,
-                 *   -------------------------------------------------------
-                 *   Grid location |  IPIN side
-                 *   -------------------------------------------------------
-                 *   TOP           |  BOTTOM
-                 *   -------------------------------------------------------
-                 *   RIGHT         |  LEFT
-                 *   -------------------------------------------------------
-                 *   BOTTOM        |  TOP
-                 *   -------------------------------------------------------
-                 *   LEFT          |  RIGHT
-                 *   -------------------------------------------------------
-                 *   TOP-LEFT      |  BOTTOM & RIGHT
-                 *   -------------------------------------------------------
-                 *   TOP-RIGHT     |  BOTTOM & LEFT
-                 *   -------------------------------------------------------
-                 *   BOTTOM-LEFT   |  TOP & RIGHT
-                 *   -------------------------------------------------------
-                 *   BOTTOM-RIGHT  |  TOP & LEFT
-                 *   -------------------------------------------------------
-                 *   Other         |  First come first fit
-                 *   -------------------------------------------------------
-                 *
-                 * Special for IPINs:
-                 *   If there are multiple wanted sides, first come first fit is applied
-                 *   This guarantee that there is only a unique rr_node
-                 *   for the same input pin on multiple sides, and thus avoid multiple driver problems
-                 */
-                std::vector<e_side> wanted_sides;
-                if (grid.height() - 1 == y) { /* TOP side */
-                    wanted_sides.push_back(BOTTOM);
-                }
-                if (grid.width() - 1 == x) { /* RIGHT side */
-                    wanted_sides.push_back(LEFT);
-                }
-                if (0 == y) { /* BOTTOM side */
-                    wanted_sides.push_back(TOP);
-                }
-                if (0 == x) { /* LEFT side */
-                    wanted_sides.push_back(RIGHT);
-                }
-
-                /* If wanted sides is empty still, this block does not have specific wanted sides,
-                 * Deposit all the sides
-                 */
-                if (wanted_sides.empty()) {
-                    for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) {
-                        wanted_sides.push_back(side);
+                    /* If wanted sides is empty still, this block does not have specific wanted sides,
+                     * Deposit all the sides
+                     */
+                    if (wanted_sides.empty()) {
+                        for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) {
+                            wanted_sides.push_back(side);
+                        }
                     }
-                }
 
-                add_pins_spatial_lookup(rr_graph_builder,
-                                        physical_type,
-                                        pin_num_vec,
-                                        x,
-                                        y,
-                                        index,
-                                        wanted_sides);
+                    add_pins_spatial_lookup(rr_graph_builder,
+                                            physical_type,
+                                            pin_num_vec,
+                                            layer,
+                                            x,
+                                            y,
+                                            index,
+                                            wanted_sides);
+                }
             }
         }
     }
@@ -1183,6 +1198,7 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder,
 static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
                                     t_physical_tile_type_ptr physical_type_ptr,
                                     const std::vector<int>& pin_num_vec,
+                                    int layer,
                                     int root_x,
                                     int root_y,
                                     int* index,
@@ -1193,8 +1209,8 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
             for (int height_offset = 0; height_offset < physical_type_ptr->height; ++height_offset) {
                 int y_tile = root_y + height_offset;
                 //only nodes on the tile may be located in a location other than the root-location
-                rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, OPIN, physical_type_ptr->num_pins, side);
-                rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, IPIN, physical_type_ptr->num_pins, side);
+                rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, OPIN, physical_type_ptr->num_pins, side);
+                rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, IPIN, physical_type_ptr->num_pins, side);
             }
         }
     }
@@ -1210,13 +1226,12 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
             int x_tile = root_x + x_offset[pin_coord_idx];
             int y_tile = root_y + y_offset[pin_coord_idx];
             auto side = pin_sides[pin_coord_idx];
-
             if (pin_type == DRIVER) {
-                rr_graph_builder.node_lookup().add_node(RRNodeId(*index), x_tile, y_tile, OPIN, pin_num, side);
+                rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, x_tile, y_tile, OPIN, pin_num, side);
                 assigned_to_rr_node = true;
             } else {
                 VTR_ASSERT(pin_type == RECEIVER);
-                rr_graph_builder.node_lookup().add_node(RRNodeId(*index), x_tile, y_tile, IPIN, pin_num, side);
+                rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, x_tile, y_tile, IPIN, pin_num, side);
                 assigned_to_rr_node = true;
             }
         }
@@ -1240,6 +1255,7 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder,
 static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder,
                                        t_physical_tile_type_ptr physical_type_ptr,
                                        const std::vector<int>& class_num_vec,
+                                       int layer,
                                        int root_x,
                                        int root_y,
                                        int block_width,
@@ -1247,18 +1263,19 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder,
                                        int* index) {
     for (int x_tile = root_x; x_tile < (root_x + block_width); x_tile++) {
         for (int y_tile = root_y; y_tile < (root_y + block_height); y_tile++) {
-            rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, SOURCE, class_num_vec.size(), SIDES[0]);
-            rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, SINK, class_num_vec.size(), SIDES[0]);
+            rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SOURCE, class_num_vec.size(), SIDES[0]);
+            rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SINK, class_num_vec.size(), SIDES[0]);
         }
     }
 
     for (auto class_num : class_num_vec) {
         auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num);
         if (class_type == DRIVER) {
-            rr_graph_builder.node_lookup().add_node(RRNodeId(*index), root_x, root_y, SOURCE, class_num);
+            rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, root_x, root_y, SOURCE, class_num);
         } else {
             VTR_ASSERT(class_type == RECEIVER);
-            rr_graph_builder.node_lookup().add_node(RRNodeId(*index), root_x, root_y, SINK, class_num);
+
+            rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, root_x, root_y, SINK, class_num);
         }
         ++(*index);
     }
@@ -1273,11 +1290,11 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder,
             }
             int curr_x = root_x + x_offset;
             int curr_y = root_y + y_offset;
-            rr_graph_builder.node_lookup().mirror_nodes(vtr::Point<int>(root_x, root_y),
+            rr_graph_builder.node_lookup().mirror_nodes(layer, vtr::Point<int>(root_x, root_y),
                                                         vtr::Point<int>(curr_x, curr_y),
                                                         SOURCE,
                                                         SIDES[0]);
-            rr_graph_builder.node_lookup().mirror_nodes(vtr::Point<int>(root_x, root_y),
+            rr_graph_builder.node_lookup().mirror_nodes(layer, vtr::Point<int>(root_x, root_y),
                                                         vtr::Point<int>(curr_x, curr_y),
                                                         SINK,
                                                         SIDES[0]);
@@ -1309,9 +1326,9 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder,
     /* Alloc the lookup table */
     for (t_rr_type rr_type : RR_TYPES) {
         if (rr_type == CHANX) {
-            rr_graph_builder.node_lookup().resize_nodes(grid.height(), grid.width(), rr_type, NUM_SIDES);
+            rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.height(), grid.width(), rr_type, NUM_SIDES);
         } else {
-            rr_graph_builder.node_lookup().resize_nodes(grid.width(), grid.height(), rr_type, NUM_SIDES);
+            rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.width(), grid.height(), rr_type, NUM_SIDES);
         }
     }
 
@@ -1319,9 +1336,9 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder,
     load_block_rr_indices(rr_graph_builder, grid, index, is_flat);
 
     /* Load the data for x and y channels */
-    load_chan_rr_indices(nodes_per_chan->x_max, grid.width(), grid.height(),
+    load_chan_rr_indices(nodes_per_chan->x_max, grid, grid.width(), grid.height(),
                          CHANX, chan_details_x, rr_graph_builder, index);
-    load_chan_rr_indices(nodes_per_chan->y_max, grid.height(), grid.width(),
+    load_chan_rr_indices(nodes_per_chan->y_max, grid, grid.height(), grid.width(),
                          CHANY, chan_details_y, rr_graph_builder, index);
 }
 
@@ -1330,39 +1347,44 @@ void alloc_and_load_intra_cluster_rr_node_indices(RRGraphBuilder& rr_graph_build
                                                   const vtr::vector<ClusterBlockId, t_cluster_pin_chain>& pin_chains,
                                                   const vtr::vector<ClusterBlockId, std::unordered_set<int>>& pin_chains_num,
                                                   int* index) {
-    for (size_t x = 0; x < grid.width(); x++) {
-        for (size_t y = 0; y < grid.height(); y++) {
-            //Process each block from it's root location
-            if (grid.get_width_offset(x, y) == 0 && grid.get_height_offset(x, y) == 0) {
-                t_physical_tile_type_ptr physical_type = grid.get_physical_type(x, y);
-                //Assign indices for SINKs and SOURCEs
-                // Note that SINKS/SOURCES have no side, so we always use side 0
-                std::vector<int> class_num_vec;
-                std::vector<int> pin_num_vec;
-                class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(x, y, physical_type);
-                pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(x,
-                                                                         y,
-                                                                         pin_chains,
-                                                                         pin_chains_num,
-                                                                         physical_type);
-                add_classes_spatial_lookup(rr_graph_builder,
-                                           physical_type,
-                                           class_num_vec,
-                                           x,
-                                           y,
-                                           physical_type->width,
-                                           physical_type->height,
-                                           index);
-
-                std::vector<e_side> wanted_sides;
-                wanted_sides.push_back(e_side::TOP);
-                add_pins_spatial_lookup(rr_graph_builder,
-                                        physical_type,
-                                        pin_num_vec,
-                                        x,
-                                        y,
-                                        index,
-                                        wanted_sides);
+    for (int layer = 0; layer < grid.get_num_layers(); layer++) {
+        for (int x = 0; x < (int)grid.width(); x++) {
+            for (int y = 0; y < (int)grid.height(); y++) {
+                //Process each block from it's root location
+                if (grid.get_width_offset({x, y, layer}) == 0 && grid.get_height_offset({x, y, layer}) == 0) {
+                    t_physical_tile_type_ptr physical_type = grid.get_physical_type({x, y, layer});
+                    //Assign indices for SINKs and SOURCEs
+                    // Note that SINKS/SOURCES have no side, so we always use side 0
+                    std::vector<int> class_num_vec;
+                    std::vector<int> pin_num_vec;
+                    class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(layer, x, y, physical_type);
+                    pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(layer,
+                                                                             x,
+                                                                             y,
+                                                                             pin_chains,
+                                                                             pin_chains_num,
+                                                                             physical_type);
+                    add_classes_spatial_lookup(rr_graph_builder,
+                                               physical_type,
+                                               class_num_vec,
+                                               layer,
+                                               x,
+                                               y,
+                                               physical_type->width,
+                                               physical_type->height,
+                                               index);
+
+                    std::vector<e_side> wanted_sides;
+                    wanted_sides.push_back(e_side::TOP);
+                    add_pins_spatial_lookup(rr_graph_builder,
+                                            physical_type,
+                                            pin_num_vec,
+                                            layer,
+                                            x,
+                                            y,
+                                            index,
+                                            wanted_sides);
+                }
             }
         }
     }
@@ -1386,111 +1408,114 @@ bool verify_rr_node_indices(const DeviceGrid& grid,
 
     int width = grid.width();
     int height = grid.height();
-
-    for (int x = 0; x < width; ++x) {
-        for (int y = 0; y < height; ++y) {
-            for (t_rr_type rr_type : RR_TYPES) {
-                /* Get the list of nodes at a specific location (x, y) */
-                std::vector<RRNodeId> nodes_from_lookup;
-                if (rr_type == CHANX || rr_type == CHANY) {
-                    nodes_from_lookup = rr_graph.node_lookup().find_channel_nodes(x, y, rr_type);
-                } else {
-                    nodes_from_lookup = rr_graph.node_lookup().find_grid_nodes_at_all_sides(x, y, rr_type);
-                }
-                for (RRNodeId inode : nodes_from_lookup) {
-                    rr_node_counts[inode]++;
-
-                    if (rr_graph.node_type(inode) != rr_type) {
-                        VPR_ERROR(VPR_ERROR_ROUTE, "RR node type does not match between rr_nodes and rr_node_indices (%s/%s): %s",
-                                  rr_node_typename[rr_graph.node_type(inode)],
-                                  rr_node_typename[rr_type],
-                                  describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+    int layer = grid.get_num_layers();
+
+    for (int l = 0; l < layer; ++l) {
+        for (int x = 0; x < width; ++x) {
+            for (int y = 0; y < height; ++y) {
+                for (t_rr_type rr_type : RR_TYPES) {
+                    /* Get the list of nodes at a specific location (x, y) */
+                    std::vector<RRNodeId> nodes_from_lookup;
+                    if (rr_type == CHANX || rr_type == CHANY) {
+                        nodes_from_lookup = rr_graph.node_lookup().find_channel_nodes(l, x, y, rr_type);
+                    } else {
+                        nodes_from_lookup = rr_graph.node_lookup().find_grid_nodes_at_all_sides(l, x, y, rr_type);
                     }
+                    for (RRNodeId inode : nodes_from_lookup) {
+                        rr_node_counts[inode]++;
 
-                    if (rr_graph.node_type(inode) == CHANX) {
-                        VTR_ASSERT_MSG(rr_graph.node_ylow(inode) == rr_graph.node_yhigh(inode), "CHANX should be horizontal");
-
-                        if (y != rr_graph.node_ylow(inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_ylow(inode),
-                                      y,
+                        if (rr_graph.node_type(inode) != rr_type) {
+                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node type does not match between rr_nodes and rr_node_indices (%s/%s): %s",
+                                      rr_node_typename[rr_graph.node_type(inode)],
+                                      rr_node_typename[rr_type],
                                       describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
                         }
 
-                        if (!rr_graph.x_in_node_range(x, inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_xlow(inode),
-                                      rr_graph.node_xlow(inode),
-                                      x,
-                                      describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
-                        }
-                    } else if (rr_graph.node_type(inode) == CHANY) {
-                        VTR_ASSERT_MSG(rr_graph.node_xlow(inode) == rr_graph.node_xhigh(inode), "CHANY should be veritcal");
+                        if (rr_graph.node_type(inode) == CHANX) {
+                            VTR_ASSERT_MSG(rr_graph.node_ylow(inode) == rr_graph.node_yhigh(inode), "CHANX should be horizontal");
 
-                        if (x != rr_graph.node_xlow(inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node x position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_xlow(inode),
-                                      x,
-                                      describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
-                        }
+                            if (y != rr_graph.node_ylow(inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_ylow(inode),
+                                          y,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
 
-                        if (!rr_graph.y_in_node_range(y, inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_ylow(inode),
-                                      rr_graph.node_ylow(inode),
-                                      y,
-                                      describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
-                        }
-                    } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) {
-                        //Sources have co-ordintes covering the entire block they are in
-                        if (!rr_graph.x_in_node_range(x, inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_xlow(inode),
-                                      rr_graph.node_xlow(inode),
-                                      x,
-                                      describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
-                        }
+                            if (!rr_graph.x_in_node_range(x, inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_xlow(inode),
+                                          rr_graph.node_xlow(inode),
+                                          x,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
+                        } else if (rr_graph.node_type(inode) == CHANY) {
+                            VTR_ASSERT_MSG(rr_graph.node_xlow(inode) == rr_graph.node_xhigh(inode), "CHANY should be veritcal");
+
+                            if (x != rr_graph.node_xlow(inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node x position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_xlow(inode),
+                                          x,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
 
-                        if (!rr_graph.y_in_node_range(y, inode)) {
-                            VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
-                                      rr_graph.node_ylow(inode),
-                                      rr_graph.node_ylow(inode),
-                                      y,
-                                      describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
-                        }
+                            if (!rr_graph.y_in_node_range(y, inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_ylow(inode),
+                                          rr_graph.node_ylow(inode),
+                                          y,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
+                        } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) {
+                            //Sources have co-ordintes covering the entire block they are in
+                            if (!rr_graph.x_in_node_range(x, inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_xlow(inode),
+                                          rr_graph.node_xlow(inode),
+                                          x,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
 
-                    } else {
-                        VTR_ASSERT(rr_graph.node_type(inode) == IPIN || rr_graph.node_type(inode) == OPIN);
-                        /* As we allow a pin to be indexable on multiple sides,
-                         * This check code should be invalid
-                         * if (rr_node.xlow() != x) {
-                         *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%d/%d): %s",
-                         *               rr_node.xlow(),
-                         *               x,
-                         *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
-                         * }
-                         *
-                         * if (rr_node.ylow() != y) {
-                         *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node ylow does not match between rr_nodes and rr_node_indices (%d/%d): %s",
-                         *               rr_node.ylow(),
-                         *               y,
-                         *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
-                         * }
-                         */
-                    }
+                            if (!rr_graph.y_in_node_range(y, inode)) {
+                                VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s",
+                                          rr_graph.node_ylow(inode),
+                                          rr_graph.node_ylow(inode),
+                                          y,
+                                          describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str());
+                            }
+
+                        } else {
+                            VTR_ASSERT(rr_graph.node_type(inode) == IPIN || rr_graph.node_type(inode) == OPIN);
+                            /* As we allow a pin to be indexable on multiple sides,
+                             * This check code should be invalid
+                             * if (rr_node.xlow() != x) {
+                             *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%d/%d): %s",
+                             *               rr_node.xlow(),
+                             *               x,
+                             *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
+                             * }
+                             *
+                             * if (rr_node.ylow() != y) {
+                             *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node ylow does not match between rr_nodes and rr_node_indices (%d/%d): %s",
+                             *               rr_node.ylow(),
+                             *               y,
+                             *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
+                             * }
+                             */
+                        }
 
-                    if (rr_type == IPIN || rr_type == OPIN) {
-                        /* As we allow a pin to be indexable on multiple sides,
-                         * This check code should be invalid
-                         * if (rr_node.side() != side) {
-                         *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%s/%s): %s",
-                         *               SIDE_STRING[rr_node.side()],
-                         *               SIDE_STRING[side],
-                         *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
-                         * } else {
-                         *     VTR_ASSERT(rr_node.side() == side);
-                         * }
-                         */
+                        if (rr_type == IPIN || rr_type == OPIN) {
+                            /* As we allow a pin to be indexable on multiple sides,
+                             * This check code should be invalid
+                             * if (rr_node.side() != side) {
+                             *     VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%s/%s): %s",
+                             *               SIDE_STRING[rr_node.side()],
+                             *               SIDE_STRING[side],
+                             *               describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str());
+                             * } else {
+                             *     VTR_ASSERT(rr_node.side() == side);
+                             * }
+                             */
+                        }
                     }
                 }
             }
@@ -1537,6 +1562,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid,
 }
 
 int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
+                      int layer,
                       int seg,
                       int chan,
                       int track,
@@ -1579,7 +1605,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
                 }
 
                 /* PAJ - if the pointed to is an EMPTY then shouldn't look for ipins */
-                auto type = device_ctx.grid.get_physical_type(x, y);
+                auto type = device_ctx.grid.get_physical_type({x, y, layer});
                 if (type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE)
                     continue;
 
@@ -1592,8 +1618,8 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
 
                 /* We need the type to find the ipin map for this type */
 
-                int width_offset = device_ctx.grid.get_width_offset(x, y);
-                int height_offset = device_ctx.grid.get_height_offset(x, y);
+                int width_offset = device_ctx.grid.get_width_offset({x, y, layer});
+                int height_offset = device_ctx.grid.get_height_offset({x, y, layer});
 
                 max_conn = track_to_pin_lookup[type->index][phy_track][width_offset][height_offset][side].size();
                 for (iconn = 0; iconn < max_conn; iconn++) {
@@ -1601,7 +1627,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
 
                     /* Check there is a connection and Fc map isn't wrong */
                     /*int to_node = get_rr_node_index(L_rr_node_indices, x + width_offset, y + height_offset, IPIN, ipin, side);*/
-                    RRNodeId to_node = rr_graph_builder.node_lookup().find_node(x, y, IPIN, ipin, side);
+                    RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, x, y, IPIN, ipin, side);
                     if (to_node) {
                         rr_edges_to_create.emplace_back(from_rr_node, to_node, wire_to_ipin_switch);
                         ++num_conn;
@@ -1632,6 +1658,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
  * transistor.
  */
 int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
+                        const int layer,
                         const int from_chan,
                         const int from_seg,
                         const int from_track,
@@ -1714,7 +1741,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
             from_is_sblock = true;
         }
 
-        auto switch_override = should_create_switchblock(grid, from_chan, sb_seg, from_type, to_type);
+        auto switch_override = should_create_switchblock(grid, layer, from_chan, sb_seg, from_type, to_type);
         if (switch_override == NO_SWITCH) {
             continue; //Do not create an SB here
         }
@@ -1734,7 +1761,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
             to_sb = from_chan;
         }
 
-        /* to_chan_details may correspond to an x-directed or y-directed channel, depending for which
+        /* to_chan_details may correspond to an x-directed or y-directed channel, depending on which
          * channel type this function is used; so coordinates are reversed as necessary */
         if (to_type == CHANX) {
             to_seg_details = to_chan_details[to_seg][to_chan].data();
@@ -1778,7 +1805,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
         if (sb_seg < end_sb_seg) {
             if (custom_switch_block) {
                 if (Direction::DEC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) {
-                    num_conn += get_track_to_chan_seg(rr_graph_builder, from_track, to_chan, to_seg,
+                    num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg,
                                                       to_type, from_side_a, to_side,
                                                       switch_override,
                                                       sb_conn_map, from_rr_node, rr_edges_to_create);
@@ -1788,7 +1815,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
                     /* For bidir, the target segment might have an unbuffered (bidir pass transistor)
                      * switchbox, so we follow through regardless of whether the current segment has an SB */
                     conn_tracks = switch_block_conn[from_side_a][to_side][from_track];
-                    num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks,
+                    num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, layer,
                                                             to_chan, to_seg, to_sb, to_type,
                                                             to_seg_details, from_is_sblock, from_switch,
                                                             switch_override,
@@ -1799,7 +1826,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
                     /* Also, we are connecting from the top or right of SB so it
                      * makes the most sense to only get there from Direction::DEC wires. */
                     if ((from_is_sblock) && (Direction::DEC == from_seg_details[from_track].direction())) {
-                        num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, from_track, to_chan,
+                        num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan,
                                                                  to_seg, to_sb, to_type, max_chan_width, grid,
                                                                  from_side_a, to_side, Fs_per_side,
                                                                  sblock_pattern,
@@ -1816,7 +1843,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
         if (sb_seg > start_sb_seg) {
             if (custom_switch_block) {
                 if (Direction::INC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) {
-                    num_conn += get_track_to_chan_seg(rr_graph_builder, from_track, to_chan, to_seg,
+                    num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg,
                                                       to_type, from_side_b, to_side,
                                                       switch_override,
                                                       sb_conn_map, from_rr_node, rr_edges_to_create);
@@ -1826,7 +1853,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
                     /* For bidir, the target segment might have an unbuffered (bidir pass transistor)
                      * switchbox, so we follow through regardless of whether the current segment has an SB */
                     conn_tracks = switch_block_conn[from_side_b][to_side][from_track];
-                    num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks,
+                    num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, layer,
                                                             to_chan, to_seg, to_sb, to_type,
                                                             to_seg_details, from_is_sblock, from_switch,
                                                             switch_override,
@@ -1838,7 +1865,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
                      * makes the most sense to only get there from Direction::INC wires. */
                     if ((from_is_sblock)
                         && (Direction::INC == from_seg_details[from_track].direction())) {
-                        num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, from_track, to_chan,
+                        num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan,
                                                                  to_seg, to_sb, to_type, max_chan_width, grid,
                                                                  from_side_b, to_side, Fs_per_side,
                                                                  sblock_pattern,
@@ -1856,6 +1883,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
 
 void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder,
                                          t_physical_tile_type_ptr physical_tile,
+                                         int layer,
                                          int x,
                                          int y,
                                          int* num_rr_nodes) {
@@ -1869,6 +1897,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder,
     add_classes_spatial_lookup(rr_graph_builder,
                                physical_tile,
                                class_num_vec,
+                               layer,
                                x,
                                y,
                                physical_tile->width,
@@ -1878,6 +1907,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder,
     add_pins_spatial_lookup(rr_graph_builder,
                             physical_tile,
                             pin_num_vec,
+                            layer,
                             x,
                             y,
                             num_rr_nodes,
@@ -1886,6 +1916,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder,
 
 static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
                                        const std::vector<int> conn_tracks,
+                                       const int layer,
                                        const int to_chan,
                                        const int to_seg,
                                        const int to_sb,
@@ -1916,7 +1947,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
     num_conn = 0;
     for (iconn = 0; iconn < conn_tracks.size(); ++iconn) {
         to_track = conn_tracks[iconn];
-        RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_type, to_track);
+        RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_type, to_track);
 
         if (!to_node) {
             continue;
@@ -1953,6 +1984,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
  * See route/build_switchblocks.c for a detailed description of how the switch block
  * connection map sb_conn_map is generated. */
 static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
+                                 const int layer,
                                  const int from_wire,
                                  const int to_chan,
                                  const int to_seg,
@@ -1995,7 +2027,7 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
             if (conn_vector.at(iconn).from_wire != from_wire) continue;
 
             int to_wire = conn_vector.at(iconn).to_wire;
-            RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_chan_type, to_wire);
+            RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_chan_type, to_wire);
 
             if (!to_node) {
                 continue;
@@ -2027,6 +2059,7 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
 }
 
 static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
+                                        const int layer,
                                         const int from_track,
                                         const int to_chan,
                                         const int to_seg,
@@ -2090,8 +2123,7 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder,
                 to_track = mux_labels[(to_mux + i) % num_labels];
                 sblock_pattern[sb_x][sb_y][from_side][to_side][from_track][j + 1] = to_track;
             }
-
-            RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_type, to_track);
+            RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_type, to_track);
 
             if (!to_node) {
                 continue;
@@ -2709,7 +2741,7 @@ static int find_label_of_track(const std::vector<int>& wire_mux_on_track,
     return i_label;
 }
 
-static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type) {
+static int should_create_switchblock(const DeviceGrid& grid, int layer_num, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type) {
     //Convert the chan/seg indices to real x/y coordinates
     int y_coord;
     int x_coord;
@@ -2722,9 +2754,9 @@ static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord
         x_coord = from_chan_coord;
     }
 
-    auto blk_type = grid.get_physical_type(x_coord, y_coord);
-    int width_offset = grid.get_width_offset(x_coord, y_coord);
-    int height_offset = grid.get_height_offset(x_coord, y_coord);
+    auto blk_type = grid.get_physical_type({x_coord, y_coord, layer_num});
+    int width_offset = grid.get_width_offset({x_coord, y_coord, layer_num});
+    int height_offset = grid.get_height_offset({x_coord, y_coord, layer_num});
 
     e_sb_type sb_type = blk_type->switchblock_locations[width_offset][height_offset];
     auto switch_override = blk_type->switchblock_switch_overrides[width_offset][height_offset];
diff --git a/vpr/src/route/rr_graph2.h b/vpr/src/route/rr_graph2.h
index 289521432dd..b2d47519739 100644
--- a/vpr/src/route/rr_graph2.h
+++ b/vpr/src/route/rr_graph2.h
@@ -31,6 +31,7 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder,
 
 void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder,
                                          t_physical_tile_type_ptr physical_tile,
+                                         int layer,
                                          int x,
                                          int y,
                                          int* num_rr_nodes);
@@ -125,6 +126,7 @@ bool is_sblock(const int chan,
                const enum e_directionality directionality);
 
 int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
+                               const int layer,
                                const int i,
                                const int j,
                                const int ipin,
@@ -135,6 +137,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder,
                                const t_chan_details& chan_details_y);
 
 int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder,
+                                const int layer,
                                 const int chan,
                                 const int seg,
                                 int Fc,
@@ -149,6 +152,7 @@ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder,
                                 bool* Fc_clipped);
 
 int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
+                      int layer,
                       int seg,
                       int chan,
                       int track,
@@ -163,6 +167,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder,
                       enum e_directionality directionality);
 
 int get_track_to_tracks(RRGraphBuilder& rr_graph_builder,
+                        const int layer,
                         const int from_chan,
                         const int from_seg,
                         const int from_track,
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 50584679ea1..e3815e4e006 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -128,17 +128,19 @@ void sync_grid_to_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& device_grid = device_ctx.grid;
 
+    int num_layers = device_ctx.grid.get_num_layers();
+
     /* Reset usage and allocate blocks list if needed */
+    place_ctx.grid_blocks = GridBlock(device_grid.width(),
+                                      device_grid.height(),
+                                      device_ctx.grid.get_num_layers());
     auto& grid_blocks = place_ctx.grid_blocks;
-    grid_blocks.resize({device_grid.width(), device_grid.height()});
-    for (size_t x = 0; x < device_grid.width(); ++x) {
-        for (size_t y = 0; y < device_grid.height(); ++y) {
-            auto& grid_block = grid_blocks[x][y];
-            const auto& type = device_ctx.grid.get_physical_type(x, y);
-            grid_block.blocks.resize(type->capacity);
-
-            for (int z = 0; z < type->capacity; ++z) {
-                grid_block.blocks[z] = EMPTY_BLOCK_ID;
+
+    for (int layer_num = 0; layer_num < num_layers; layer_num++) {
+        for (int x = 0; x < (int)device_grid.width(); ++x) {
+            for (int y = 0; y < (int)device_grid.height(); ++y) {
+                const auto& type = device_ctx.grid.get_physical_type({x, y, layer_num});
+                grid_blocks.initialized_grid_block_at_location({x, y, layer_num}, type->capacity);
             }
         }
     }
@@ -146,9 +148,11 @@ void sync_grid_to_blocks() {
     /* Go through each block */
     auto& cluster_ctx = g_vpr_ctx.clustering();
     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+        const auto& blk_loc = place_ctx.block_locs[blk_id].loc;
         int blk_x = place_ctx.block_locs[blk_id].loc.x;
         int blk_y = place_ctx.block_locs[blk_id].loc.y;
         int blk_z = place_ctx.block_locs[blk_id].loc.sub_tile;
+        int blk_layer = place_ctx.block_locs[blk_id].loc.layer;
 
         auto type = physical_tile_type(blk_id);
 
@@ -162,32 +166,42 @@ void sync_grid_to_blocks() {
         }
 
         /* Check types match */
-        if (type != device_ctx.grid.get_physical_type(blk_x, blk_y)) {
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n",
-                            blk_x, blk_y,
+        if (type != device_ctx.grid.get_physical_type({blk_x, blk_y, blk_layer})) {
+            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) layer (%d) with a conflicting types '%s' and '%s' .\n",
+                            blk_x, blk_y, blk_layer,
                             type->name,
-                            device_ctx.grid.get_physical_type(blk_x, blk_y)->name);
+                            device_ctx.grid.get_physical_type({blk_x, blk_y, blk_layer})->name);
         }
 
         /* Check already in use */
-        if ((EMPTY_BLOCK_ID != place_ctx.grid_blocks[blk_x][blk_y].blocks[blk_z])
-            && (INVALID_BLOCK_ID != place_ctx.grid_blocks[blk_x][blk_y].blocks[blk_z])) {
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Location (%d, %d, %d) is used more than once.\n",
-                            blk_x, blk_y, blk_z);
+        if ((EMPTY_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc))
+            && (INVALID_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc))) {
+            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Location (%d, %d, %d, %d) is used more than once.\n",
+                            blk_x, blk_y, blk_z, blk_layer);
         }
 
-        if (device_ctx.grid.get_width_offset(blk_x, blk_y) != 0 || device_ctx.grid.get_height_offset(blk_x, blk_y) != 0) {
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d).",
-                            size_t(blk_id), blk_x, blk_y, blk_z);
+        if (device_ctx.grid.get_width_offset({blk_x, blk_y, blk_layer}) != 0 || device_ctx.grid.get_height_offset({blk_x, blk_y, blk_layer}) != 0) {
+            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d, %d).",
+                            size_t(blk_id), blk_x, blk_y, blk_z, blk_layer);
         }
 
         /* Set the block */
         for (int width = 0; width < type->width; ++width) {
             for (int height = 0; height < type->height; ++height) {
-                place_ctx.grid_blocks[blk_x + width][blk_y + height].blocks[blk_z] = blk_id;
-                place_ctx.grid_blocks[blk_x + width][blk_y + height].usage++;
-                VTR_ASSERT(device_ctx.grid.get_width_offset(blk_x + width, blk_y + height) == width);
-                VTR_ASSERT(device_ctx.grid.get_height_offset(blk_x + width, blk_y + height) == height);
+                place_ctx.grid_blocks.set_block_at_location({blk_x + width,
+                                                             blk_y + height,
+                                                             blk_z,
+                                                             blk_layer},
+                                                            blk_id);
+                place_ctx.grid_blocks.set_usage({blk_x + width,
+                                                 blk_y + height,
+                                                 blk_layer},
+                                                place_ctx.grid_blocks.get_usage({blk_x + width,
+                                                                                 blk_y + height,
+                                                                                 blk_layer})
+                                                    + 1);
+                VTR_ASSERT(device_ctx.grid.get_width_offset({blk_x + width, blk_y + height, blk_layer}) == width);
+                VTR_ASSERT(device_ctx.grid.get_height_offset({blk_x + width, blk_y + height, blk_layer}) == height);
             }
         }
     }
@@ -202,11 +216,15 @@ std::string rr_node_arch_name(int inode, bool is_flat) {
     std::string rr_node_arch_name;
     if (rr_graph.node_type(RRNodeId(inode)) == OPIN || rr_graph.node_type(RRNodeId(inode)) == IPIN) {
         //Pin names
-        auto type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node));
+        auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node),
+                                                       rr_graph.node_ylow(rr_node),
+                                                       rr_graph.node_layer(rr_node)});
         rr_node_arch_name += block_type_pin_index_to_name(type, rr_graph.node_pin_num(rr_node), is_flat);
     } else if (rr_graph.node_type(RRNodeId(inode)) == SOURCE || rr_graph.node_type(RRNodeId(inode)) == SINK) {
         //Set of pins associated with SOURCE/SINK
-        auto type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node));
+        auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node),
+                                                       rr_graph.node_ylow(rr_node),
+                                                       rr_graph.node_layer(rr_node)});
         auto pin_names = block_type_class_index_to_pin_names(type, rr_graph.node_class_num(rr_node), is_flat);
         if (pin_names.size() > 1) {
             rr_node_arch_name += rr_graph.node_type_string(RRNodeId(inode));
@@ -516,7 +534,7 @@ t_physical_tile_type_ptr physical_tile_type(ClusterBlockId blk) {
     auto block_loc = place_ctx.block_locs[blk];
     auto loc = block_loc.loc;
 
-    return device_ctx.grid.get_physical_type(loc.x, loc.y);
+    return device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
 }
 
 t_physical_tile_type_ptr physical_tile_type(AtomBlockId atom_blk) {
@@ -546,7 +564,7 @@ int get_sub_tile_index(ClusterBlockId blk) {
     auto loc = block_loc.loc;
     int sub_tile_coordinate = loc.sub_tile;
 
-    auto type = device_ctx.grid.get_physical_type(loc.x, loc.y);
+    auto type = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
 
     for (const auto& sub_tile : type->sub_tiles) {
         if (sub_tile.capacity.is_in_range(sub_tile_coordinate)) {
@@ -732,9 +750,9 @@ t_logical_block_type_ptr infer_logic_block_type(const DeviceGrid& grid) {
         int rhs_num_instances = 0;
         // Count number of instances for each type
         for (auto type : lhs->equivalent_tiles)
-            lhs_num_instances += grid.num_instances(type);
+            lhs_num_instances += grid.num_instances(type, -1);
         for (auto type : rhs->equivalent_tiles)
-            rhs_num_instances += grid.num_instances(type);
+            rhs_num_instances += grid.num_instances(type, -1);
         return lhs_num_instances > rhs_num_instances;
     };
     std::stable_sort(logic_block_candidates.begin(), logic_block_candidates.end(), by_desc_grid_count);
@@ -759,7 +777,7 @@ t_logical_block_type_ptr find_most_common_block_type(const DeviceGrid& grid) {
     for (const auto& logical_block : device_ctx.logical_block_types) {
         size_t inst_cnt = 0;
         for (const auto& equivalent_tile : logical_block.equivalent_tiles) {
-            inst_cnt += grid.num_instances(equivalent_tile);
+            inst_cnt += grid.num_instances(equivalent_tile, -1);
         }
 
         if (max_count < inst_cnt) {
@@ -781,7 +799,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid) {
     t_physical_tile_type_ptr max_type = nullptr;
     size_t max_count = 0;
     for (const auto& physical_tile : device_ctx.physical_tile_types) {
-        size_t inst_cnt = grid.num_instances(&physical_tile);
+        size_t inst_cnt = grid.num_instances(&physical_tile, -1);
 
         if (max_count < inst_cnt) {
             max_count = inst_cnt;
@@ -1332,11 +1350,9 @@ std::tuple<t_physical_tile_type_ptr, const t_sub_tile*, int, t_logical_block_typ
     auto& grid = g_vpr_ctx.device().grid;
     auto& place_ctx = g_vpr_ctx.placement();
     auto& loc = place_ctx.block_locs[cluster_blk_id].loc;
-    int i = loc.x;
-    int j = loc.y;
     int cap = loc.sub_tile;
-    const auto& physical_type = grid.get_physical_type(i, j);
-    VTR_ASSERT(grid.get_width_offset(i, j) == 0 && grid.get_height_offset(i, j) == 0);
+    const auto& physical_type = grid.get_physical_type({loc.x, loc.y, loc.layer});
+    VTR_ASSERT(grid.get_width_offset({loc.x, loc.y, loc.layer}) == 0 && grid.get_height_offset(t_physical_tile_loc(loc.x, loc.y, loc.layer)) == 0);
     VTR_ASSERT(cap < physical_type->capacity);
 
     auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist;
@@ -2148,7 +2164,7 @@ t_physical_tile_type_ptr get_physical_tile_type(const ClusterBlockId blk) {
 
         t_pl_loc loc = place_ctx.block_locs[blk].loc;
 
-        return device_ctx.grid.get_physical_type(loc.x, loc.y);
+        return device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer});
     }
 }
 
@@ -2283,7 +2299,9 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view,
     VTR_ASSERT(node_type == IPIN || node_type == OPIN || node_type == SINK || node_type == SOURCE);
 
     const DeviceContext& device_ctx = g_vpr_ctx.device();
-    auto physical_type = device_ctx.grid.get_physical_type(rr_graph_view.node_xlow(node_id), rr_graph_view.node_ylow(node_id));
+    auto physical_type = device_ctx.grid.get_physical_type({rr_graph_view.node_xlow(node_id),
+                                                            rr_graph_view.node_ylow(node_id),
+                                                            rr_graph_view.node_layer(node_id)});
 
     if (node_type == SINK || node_type == SOURCE) {
         return get_tile_class_max_ptc(physical_type, is_flat);
@@ -2294,6 +2312,7 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view,
 
 RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
                             t_physical_tile_type_ptr physical_tile,
+                            const int layer,
                             const int root_i,
                             const int root_j,
                             int pin_physical_num) {
@@ -2306,7 +2325,8 @@ RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
     VTR_ASSERT(!x_offset.empty());
     RRNodeId node_id = RRNodeId::INVALID();
     for (int coord_idx = 0; coord_idx < (int)pin_sides.size(); coord_idx++) {
-        node_id = rr_spatial_lookup.find_node(root_i + x_offset[coord_idx],
+        node_id = rr_spatial_lookup.find_node(layer,
+                                              root_i + x_offset[coord_idx],
                                               root_j + y_offset[coord_idx],
                                               node_type,
                                               pin_physical_num,
@@ -2319,13 +2339,14 @@ RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
 
 RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
                               t_physical_tile_type_ptr physical_tile,
+                              const int layer,
                               const int i,
                               const int j,
                               int class_physical_num) {
     auto class_type = get_class_type_from_class_physical_num(physical_tile, class_physical_num);
     VTR_ASSERT(class_type == DRIVER || class_type == RECEIVER);
     t_rr_type node_type = (class_type == e_pin_type::DRIVER) ? t_rr_type::SOURCE : t_rr_type::SINK;
-    return rr_spatial_lookup.find_node(i, j, node_type, class_physical_num);
+    return rr_spatial_lookup.find_node(layer, i, j, node_type, class_physical_num);
 }
 
 bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) {
@@ -2340,17 +2361,19 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) {
     } else {
         VTR_ASSERT(first_rr_type == t_rr_type::IPIN || first_rr_type == t_rr_type::OPIN || first_rr_type == t_rr_type::SINK || first_rr_type == t_rr_type::SOURCE);
         VTR_ASSERT(second_rr_type == t_rr_type::IPIN || second_rr_type == t_rr_type::OPIN || second_rr_type == t_rr_type::SINK || second_rr_type == t_rr_type::SOURCE);
+        int first_layer = rr_graph.node_layer(node_first);
         int first_x = rr_graph.node_xlow(node_first);
         int first_y = rr_graph.node_ylow(node_first);
+        int sec_layer = rr_graph.node_layer(node_second);
         int sec_x = rr_graph.node_xlow(node_second);
         int sec_y = rr_graph.node_ylow(node_second);
 
         // Get the root-location of the pin's block
-        int first_root_x = first_x - device_ctx.grid.get_width_offset(first_x, first_y);
-        int first_root_y = first_y - device_ctx.grid.get_height_offset(first_x, first_y);
+        int first_root_x = first_x - device_ctx.grid.get_width_offset({first_x, first_y, first_layer});
+        int first_root_y = first_y - device_ctx.grid.get_height_offset({first_x, first_y, first_layer});
 
-        int sec_root_x = sec_x - device_ctx.grid.get_width_offset(sec_x, sec_y);
-        int sec_root_y = sec_y - device_ctx.grid.get_height_offset(sec_x, sec_y);
+        int sec_root_x = sec_x - device_ctx.grid.get_width_offset({sec_x, sec_y, sec_layer});
+        int sec_root_y = sec_y - device_ctx.grid.get_height_offset({sec_x, sec_y, sec_layer});
 
         // If the root-location of the nodes are similar, they should be located in the same tile
         if (first_root_x == sec_root_x && first_root_y == sec_root_y)
@@ -2360,23 +2383,24 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) {
     }
 }
 
-std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(const int i,
-                                                               const int j,
+std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(int layer,
+                                                               int i,
+                                                               int j,
                                                                t_physical_tile_type_ptr physical_type) {
     std::vector<int> class_num_vec;
 
     const auto& place_ctx = g_vpr_ctx.placement();
     const auto& atom_lookup = g_vpr_ctx.atom().lookup;
-    const auto& grid_block = place_ctx.grid_blocks[i][j];
+    const auto& grid_block = place_ctx.grid_blocks;
 
     class_num_vec.reserve(physical_type->primitive_class_inf.size());
 
     //iterate over different sub tiles inside a tile
     for (int abs_cap = 0; abs_cap < physical_type->capacity; abs_cap++) {
-        if (grid_block.subtile_empty(abs_cap)) {
+        if (grid_block.is_sub_tile_empty({i, j, layer}, abs_cap)) {
             continue;
         }
-        auto cluster_blk_id = grid_block.blocks[abs_cap];
+        auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer});
         VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() || cluster_blk_id != EMPTY_BLOCK_ID);
 
         auto primitive_classes = get_cluster_internal_class_pairs(atom_lookup,
@@ -2391,13 +2415,14 @@ std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(const int i,
     return class_num_vec;
 }
 
-std::vector<int> get_cluster_netlist_intra_tile_pins_at_loc(const int i,
+std::vector<int> get_cluster_netlist_intra_tile_pins_at_loc(const int layer,
+                                                            const int i,
                                                             const int j,
                                                             const vtr::vector<ClusterBlockId, t_cluster_pin_chain>& pin_chains,
                                                             const vtr::vector<ClusterBlockId, std::unordered_set<int>>& pin_chains_num,
                                                             t_physical_tile_type_ptr physical_type) {
     auto& place_ctx = g_vpr_ctx.placement();
-    auto grid_block = place_ctx.grid_blocks[i][j];
+    auto grid_block = place_ctx.grid_blocks;
 
     std::vector<int> pin_num_vec;
     pin_num_vec.reserve(get_tile_num_internal_pin(physical_type));
@@ -2405,10 +2430,10 @@ std::vector<int> get_cluster_netlist_intra_tile_pins_at_loc(const int i,
     for (int abs_cap = 0; abs_cap < physical_type->capacity; abs_cap++) {
         std::vector<int> cluster_internal_pins;
 
-        if (grid_block.subtile_empty(abs_cap)) {
+        if (grid_block.is_sub_tile_empty({i, j, layer}, abs_cap)) {
             continue;
         }
-        auto cluster_blk_id = grid_block.blocks[abs_cap];
+        auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer});
         VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() && cluster_blk_id != EMPTY_BLOCK_ID);
 
         cluster_internal_pins = get_cluster_internal_pins(cluster_blk_id);
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 67214a69a60..f3a8f8917e7 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -266,12 +266,14 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view,
 
 RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
                             t_physical_tile_type_ptr physical_tile,
+                            const int layer,
                             const int root_i,
                             const int root_j,
                             int pin_physical_num);
 
 RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
                               t_physical_tile_type_ptr physical_tile,
+                              const int layer,
                               const int i,
                               const int j,
                               int class_physical_num);
@@ -279,12 +281,14 @@ RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup,
 // Check whether the given nodes are in the same cluster
 bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second);
 
-std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(const int i,
-                                                               const int j,
+std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(int layer,
+                                                               int i,
+                                                               int j,
                                                                t_physical_tile_type_ptr physical_type);
 
 /**
- * @brief Returns the list of pins inside the tile located at (i, j), except fo the ones which are on a chain
+ * @brief Returns the list of pins inside the tile located at (layer, i, j), except for the ones which are on a chain
+ * @param layer
  * @param i
  * @param j
  * @param pin_chains
@@ -292,7 +296,8 @@ std::vector<int> get_cluster_netlist_intra_tile_classes_at_loc(const int i,
  * @param physical_type
  * @return
  */
-std::vector<int> get_cluster_netlist_intra_tile_pins_at_loc(const int i,
+std::vector<int> get_cluster_netlist_intra_tile_pins_at_loc(const int layer,
+                                                            const int i,
                                                             const int j,
                                                             const vtr::vector<ClusterBlockId, t_cluster_pin_chain>& pin_chains,
                                                             const vtr::vector<ClusterBlockId, std::unordered_set<int>>& pin_chains_num,
diff --git a/vpr/test/test_bfs_routing.cpp b/vpr/test/test_bfs_routing.cpp
index e45c486665e..21d96d75d48 100644
--- a/vpr/test/test_bfs_routing.cpp
+++ b/vpr/test/test_bfs_routing.cpp
@@ -24,12 +24,12 @@ TEST_CASE("test_route_flow", "[vpr_noc_bfs_routing]") {
 
     // store the reference to device grid with
     // need to add this before routers are added
-    noc_model.set_device_grid_width((int)4);
+    noc_model.set_device_grid_spec((int)4, 0);
 
     // add all the routers
     for (int i = 0; i < 4; i++) {
         for (int j = 0; j < 4; j++) {
-            noc_model.add_router((i * 4) + j, j, i);
+            noc_model.add_router((i * 4) + j, j, i, 0);
         }
     }
 
diff --git a/vpr/test/test_map_lookahead_serdes.cpp b/vpr/test/test_map_lookahead_serdes.cpp
index f9235bf340e..9beb03b3601 100644
--- a/vpr/test/test_map_lookahead_serdes.cpp
+++ b/vpr/test/test_map_lookahead_serdes.cpp
@@ -10,15 +10,17 @@ namespace {
 static constexpr const char kMapLookaheadBin[] = "test_map_lookahead.bin";
 
 TEST_CASE("round_trip_map_lookahead", "[vpr]") {
-    constexpr std::array<size_t, 4> kDim({10, 12, 15, 16});
+    constexpr std::array<size_t, 5> kDim({1, 10, 12, 15, 16});
 
     f_wire_cost_map.resize(kDim);
-    for (size_t x = 0; x < kDim[0]; ++x) {
-        for (size_t y = 0; y < kDim[1]; ++y) {
-            for (size_t z = 0; z < kDim[2]; ++z) {
-                for (size_t w = 0; w < kDim[3]; ++w) {
-                    f_wire_cost_map[x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1);
-                    f_wire_cost_map[x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1);
+    for (size_t layer = 0; layer < kDim[0]; layer++) {
+        for (size_t x = 0; x < kDim[1]; ++x) {
+            for (size_t y = 0; y < kDim[2]; ++y) {
+                for (size_t z = 0; z < kDim[3]; ++z) {
+                    for (size_t w = 0; w < kDim[4]; ++w) {
+                        f_wire_cost_map[layer][x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1);
+                        f_wire_cost_map[layer][x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1);
+                    }
                 }
             }
         }
@@ -26,18 +28,20 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") {
 
     write_router_lookahead(kMapLookaheadBin);
 
-    for (size_t x = 0; x < kDim[0]; ++x) {
-        for (size_t y = 0; y < kDim[1]; ++y) {
-            for (size_t z = 0; z < kDim[2]; ++z) {
-                for (size_t w = 0; w < kDim[3]; ++w) {
-                    f_wire_cost_map[x][y][z][w].delay = 0.f;
-                    f_wire_cost_map[x][y][z][w].congestion = 0.f;
+    for (size_t layer = 0; layer < kDim[0]; layer++) {
+        for (size_t x = 0; x < kDim[1]; ++x) {
+            for (size_t y = 0; y < kDim[2]; ++y) {
+                for (size_t z = 0; z < kDim[3]; ++z) {
+                    for (size_t w = 0; w < kDim[4]; ++w) {
+                        f_wire_cost_map[layer][x][y][z][w].delay = 0.f;
+                        f_wire_cost_map[layer][x][y][z][w].congestion = 0.f;
+                    }
                 }
             }
         }
     }
 
-    f_wire_cost_map.resize({0, 0, 0, 0});
+    f_wire_cost_map.resize({0, 0, 0, 0, 0});
 
     read_router_lookahead(kMapLookaheadBin);
 
@@ -45,12 +49,14 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") {
         REQUIRE(f_wire_cost_map.dim_size(i) == kDim[i]);
     }
 
-    for (size_t x = 0; x < kDim[0]; ++x) {
-        for (size_t y = 0; y < kDim[1]; ++y) {
-            for (size_t z = 0; z < kDim[2]; ++z) {
-                for (size_t w = 0; w < kDim[3]; ++w) {
-                    REQUIRE(f_wire_cost_map[x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1));
-                    REQUIRE(f_wire_cost_map[x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1));
+    for (size_t layer = 0; layer < kDim[0]; layer++) {
+        for (size_t x = 0; x < kDim[1]; ++x) {
+            for (size_t y = 0; y < kDim[2]; ++y) {
+                for (size_t z = 0; z < kDim[3]; ++z) {
+                    for (size_t w = 0; w < kDim[4]; ++w) {
+                        REQUIRE(f_wire_cost_map[layer][x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1));
+                        REQUIRE(f_wire_cost_map[layer][x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1));
+                    }
                 }
             }
         }
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index 65e76fb4dbb..978cdbe9248 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -38,7 +38,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
-    noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST);
+    noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
     // individual router parameters
     int curr_router_id;
@@ -58,7 +58,10 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
         id_of_all_hard_routers_in_device.push_back(router_number);
 
         // add the router to the noc
-        noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        noc_ctx.noc_model.add_router(curr_router_id,
+                                     router_grid_position_x,
+                                     router_grid_position_y,
+                                     0);
     }
 
     noc_ctx.noc_model.make_room_for_noc_router_link_list();
@@ -93,7 +96,10 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
         const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number);
         t_block_loc current_cluster_block_location;
         current_cluster_block_location.is_fixed = true;
-        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1);
+        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(),
+                                                      hard_router_block.get_router_grid_position_y(),
+                                                      -1,
+                                                      hard_router_block.get_router_layer_position());
 
         // now add the cluster and its placed location to the placement datastructures
         place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location);
@@ -212,7 +218,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
-    noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST);
+    noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
     // individual router parameters
     int curr_router_id;
@@ -232,7 +238,10 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         id_of_all_hard_routers_in_device.push_back(router_number);
 
         // add the router to the noc
-        noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        noc_ctx.noc_model.add_router(curr_router_id,
+                                     router_grid_position_x,
+                                     router_grid_position_y,
+                                     0);
     }
 
     noc_ctx.noc_model.make_room_for_noc_router_link_list();
@@ -267,7 +276,10 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number);
         t_block_loc current_cluster_block_location;
         current_cluster_block_location.is_fixed = true;
-        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1);
+        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(),
+                                                      hard_router_block.get_router_grid_position_y(),
+                                                      -1,
+                                                      hard_router_block.get_router_layer_position());
 
         // now add the cluster and its placed location to the placement datastructures
         place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location);
@@ -448,7 +460,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
-    noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST);
+    noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
     // individual router parameters
     int curr_router_id;
@@ -480,7 +492,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST;
 
         // add the router to the noc
-        noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        noc_ctx.noc_model.add_router(curr_router_id,
+                                     router_grid_position_x,
+                                     router_grid_position_y,
+                                     0);
     }
 
     noc_ctx.noc_model.make_room_for_noc_router_link_list();
@@ -515,7 +530,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number);
         t_block_loc current_cluster_block_location;
         current_cluster_block_location.is_fixed = true;
-        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1);
+        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(),
+                                                      hard_router_block.get_router_grid_position_y(),
+                                                      -1,
+                                                      hard_router_block.get_router_layer_position());
 
         router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number);
 
@@ -658,12 +676,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         blocks_affected.num_moved_blocks = 2;
 
         blocks_affected.moved_blocks[0].block_num = swap_router_block_one;
-        blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
-        blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
+        blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                           -1,
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
+        blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                           -1,
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
 
         blocks_affected.moved_blocks[1].block_num = swap_router_block_two;
-        blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
-        blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
+        blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                           -1,
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
+        blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                           -1,
+                                                           noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
 
         // swap the hard router blocks where the two cluster blocks are placed on
         NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one];
@@ -785,12 +815,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     blocks_affected.moved_blocks[0].block_num = swap_router_block_one;
 
-    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
+    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
 
     blocks_affected.moved_blocks[1].block_num = swap_router_block_two;
-    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
+    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
 
     // swap the hard router blocks where the two cluster blocks are placed on
     NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one];
@@ -892,12 +934,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     blocks_affected.moved_blocks[0].block_num = swap_router_block_one;
 
-    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
+    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
 
     blocks_affected.moved_blocks[1].block_num = swap_router_block_two;
-    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
+    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
 
     // swap the hard router blocks where the two cluster blocks are placed on
     router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one];
@@ -974,12 +1028,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     blocks_affected.moved_blocks[0].block_num = swap_router_block_one;
 
-    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
+    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
 
     blocks_affected.moved_blocks[1].block_num = swap_router_block_two;
-    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
+    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
 
     // swap the hard router blocks where the two cluster blocks are placed on
     router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one];
@@ -1137,7 +1203,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
-    noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST);
+    noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
     // individual router parameters
     int curr_router_id;
@@ -1166,7 +1232,10 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST;
 
         // add the router to the noc
-        noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        noc_ctx.noc_model.add_router(curr_router_id,
+                                     router_grid_position_x,
+                                     router_grid_position_y,
+                                     0);
     }
 
     noc_ctx.noc_model.make_room_for_noc_router_link_list();
@@ -1201,7 +1270,10 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number);
         t_block_loc current_cluster_block_location;
         current_cluster_block_location.is_fixed = true;
-        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1);
+        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(),
+                                                      hard_router_block.get_router_grid_position_y(),
+                                                      -1,
+                                                      hard_router_block.get_router_layer_position());
 
         router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number);
 
@@ -1319,12 +1391,24 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     blocks_affected.num_moved_blocks = 2;
 
     blocks_affected.moved_blocks[0].block_num = swap_router_block_one;
-    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
+    blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
 
     blocks_affected.moved_blocks[1].block_num = swap_router_block_two;
-    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1);
-    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1);
+    blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position());
+    blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(),
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(),
+                                                       -1,
+                                                       noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position());
 
     // swap the hard router blocks where the two cluster blocks are placed on
     // this is needed to that we can
@@ -1444,7 +1528,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
-    noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST);
+    noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
     // individual router parameters
     int curr_router_id;
@@ -1481,7 +1565,10 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST;
 
         // add the router to the noc
-        noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        noc_ctx.noc_model.add_router(curr_router_id,
+                                     router_grid_position_x,
+                                     router_grid_position_y,
+                                     0);
     }
 
     noc_ctx.noc_model.make_room_for_noc_router_link_list();
@@ -1516,7 +1603,10 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number);
         t_block_loc current_cluster_block_location;
         current_cluster_block_location.is_fixed = true;
-        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1);
+        current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(),
+                                                      hard_router_block.get_router_grid_position_y(),
+                                                      -1,
+                                                      hard_router_block.get_router_layer_position());
 
         router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number);
 
diff --git a/vpr/test/test_noc_storage.cpp b/vpr/test/test_noc_storage.cpp
index 8ceec3f36e1..a0a3e2ee54b 100644
--- a/vpr/test/test_noc_storage.cpp
+++ b/vpr/test/test_noc_storage.cpp
@@ -32,7 +32,7 @@ TEST_CASE("test_adding_routers_to_noc_storage", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     NocRouterId converted_id;
 
@@ -44,10 +44,10 @@ TEST_CASE("test_adding_routers_to_noc_storage", "[vpr_noc]") {
         router_grid_position_y = router_number + dist(rand_num_gen);
 
         // add router to the golden vector
-        golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y);
+        golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y, 0);
 
         // add tje router to the noc
-        test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y, 0);
     }
 
     // now verify that the routers were added properly by reading the routers back from the noc and comparing them to the golden set
@@ -84,7 +84,7 @@ TEST_CASE("test_router_id_conversion", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     NocRouterId converted_id;
 
@@ -96,10 +96,10 @@ TEST_CASE("test_router_id_conversion", "[vpr_noc]") {
         router_grid_position_y = router_number + dist(rand_num_gen);
 
         // add router to the golden vector
-        golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y);
+        golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y, 0);
 
         // add tje router to the noc
-        test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y, 0);
     }
 
     // now verify that the routers were added properly by reading the routers back from the noc and comparing them to the golden set
@@ -131,7 +131,7 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     // keeps track of the number of links created
     int total_num_of_links = 0;
@@ -147,7 +147,10 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
         router_id = router_number;
 
         // add tje router to the noc
-        test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos);
+        test_noc.add_router(router_id,
+                            curr_router_x_pos,
+                            curr_router_y_pos,
+                            0);
     }
 
     // allocate the size for outgoing link vector for each router
@@ -209,7 +212,7 @@ TEST_CASE("test_router_link_list", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     // need to assign
 
@@ -228,7 +231,7 @@ TEST_CASE("test_router_link_list", "[vpr_noc]") {
         router_id = router_number;
 
         // add tje router to the noc
-        test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos);
+        test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos, 0);
     }
 
     // allocate the size for outgoing link vector for each router
@@ -284,7 +287,7 @@ TEST_CASE("test_remove_link", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     // temp variables that hold the routers involved within a link
     NocRouterId source;
@@ -302,7 +305,10 @@ TEST_CASE("test_remove_link", "[vpr_noc]") {
         router_id = router_number;
 
         // add tje router to the noc
-        test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos);
+        test_noc.add_router(router_id,
+                            curr_router_x_pos,
+                            curr_router_y_pos,
+                            0);
     }
 
     // now go through and add the links to the NoC
@@ -404,7 +410,7 @@ TEST_CASE("test_generate_router_key_from_grid_location", "[vpr_noc]") {
 
     // store the reference to device grid with
     // this will be set to the total number of routers (and should be set before adding routers)
-    test_noc.set_device_grid_width((int)NUM_OF_ROUTERS);
+    test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0);
 
     NocRouterId converted_id;
 
@@ -420,15 +426,21 @@ TEST_CASE("test_generate_router_key_from_grid_location", "[vpr_noc]") {
         golden_set.emplace_back((NocRouterId)router_number);
 
         // add the router to the noc
-        test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y);
+        test_noc.add_router(curr_router_id,
+                            router_grid_position_x,
+                            router_grid_position_y,
+                            0);
     }
 
     // now verify the test function by identifying all the routers using their grid locations
     // the grid locations go from 0 to the total number of routers in the NoC
     for (int grid_location = 0; grid_location < NUM_OF_ROUTERS; grid_location++) {
         // contains the grid location of a router block seen during placement
-        // we don't care about the subtile so give it an arbitrary value
-        t_pl_loc placement_router_grid_location = t_pl_loc(grid_location, grid_location, -1);
+        // we dont care about the subtile so give it a arbritary value
+        t_pl_loc placement_router_grid_location = t_pl_loc(grid_location,
+                                                           grid_location,
+                                                           -1,
+                                                           0);
 
         NocRouterId found_router_at_grid_location = test_noc.get_router_at_grid_location(placement_router_grid_location);
 
diff --git a/vpr/test/test_place_delay_model_serdes.cpp b/vpr/test/test_place_delay_model_serdes.cpp
index dbf140c9017..818b5cc3dfe 100644
--- a/vpr/test/test_place_delay_model_serdes.cpp
+++ b/vpr/test/test_place_delay_model_serdes.cpp
@@ -9,16 +9,20 @@ static constexpr const char kDeltaDelayBin[] = "test_delta_delay.bin";
 static constexpr const char kOverrideDelayBin[] = "test_override_delay.bin";
 
 TEST_CASE("round_trip_delta_delay_model", "[vpr]") {
+    constexpr size_t kDimLayer = 1;
     constexpr size_t kDimX = 10;
     constexpr size_t kDimY = 10;
-    vtr::Matrix<float> delays;
-    delays.resize({kDimX, kDimY});
-
-    for (size_t x = 0; x < kDimX; ++x) {
-        for (size_t y = 0; y < kDimY; ++y) {
-            delays[x][y] = (x + 1) * (y + 1);
+    vtr::NdMatrix<float, 3> delays;
+    delays.resize({kDimLayer, kDimX, kDimY});
+
+    for (size_t layer = 0; layer < kDimLayer; ++layer) {
+        for (size_t x = 0; x < kDimX; ++x) {
+            for (size_t y = 0; y < kDimY; ++y) {
+                delays[layer][x][y] = (x + 1) * (y + 1);
+            }
         }
     }
+
     DeltaDelayModel model(std::move(delays), false);
     const auto& delays1 = model.delays();
 
@@ -35,22 +39,26 @@ TEST_CASE("round_trip_delta_delay_model", "[vpr]") {
         REQUIRE(delays1.dim_size(dim) == delays2.dim_size(dim));
     }
 
-    for (size_t x = 0; x < kDimX; ++x) {
-        for (size_t y = 0; y < kDimY; ++y) {
-            CHECK(delays1[x][y] == delays2[x][y]);
+    for (size_t layer = 0; layer < kDimLayer; ++layer) {
+        for (size_t x = 0; x < kDimX; ++x) {
+            for (size_t y = 0; y < kDimY; ++y) {
+                CHECK(delays1[layer][x][y] == delays2[layer][x][y]);
+            }
         }
     }
 }
 
 TEST_CASE("round_trip_override_delay_model", "[vpr]") {
+    constexpr size_t kDimLayer = 1;
     constexpr size_t kDimX = 10;
     constexpr size_t kDimY = 10;
-    vtr::Matrix<float> delays;
-    delays.resize({kDimX, kDimY});
-
-    for (size_t x = 0; x < kDimX; ++x) {
-        for (size_t y = 0; y < kDimY; ++y) {
-            delays[x][y] = (x + 1) * (y + 1);
+    vtr::NdMatrix<float, 3> delays;
+    delays.resize({kDimLayer, kDimX, kDimY});
+    for (size_t layer = 0; layer < kDimLayer; ++layer) {
+        for (size_t x = 0; x < kDimX; ++x) {
+            for (size_t y = 0; y < kDimY; ++y) {
+                delays[layer][x][y] = (x + 1) * (y + 1);
+            }
         }
     }
     OverrideDelayModel model(false);
@@ -73,9 +81,11 @@ TEST_CASE("round_trip_override_delay_model", "[vpr]") {
         REQUIRE(delays1.dim_size(dim) == delays2.dim_size(dim));
     }
 
-    for (size_t x = 0; x < kDimX; ++x) {
-        for (size_t y = 0; y < kDimY; ++y) {
-            CHECK(delays1[x][y] == delays2[x][y]);
+    for (size_t layer = 0; layer < kDimLayer; ++layer) {
+        for (size_t x = 0; x < kDimX; ++x) {
+            for (size_t y = 0; y < kDimY; ++y) {
+                CHECK(delays1[layer][x][y] == delays2[layer][x][y]);
+            }
         }
     }
 
diff --git a/vpr/test/test_setup_noc.cpp b/vpr/test/test_setup_noc.cpp
index 5e81373cf0d..b88949b11f3 100644
--- a/vpr/test/test_setup_noc.cpp
+++ b/vpr/test/test_setup_noc.cpp
@@ -418,17 +418,17 @@ TEST_CASE("test_create_noc_routers", "[vpr_setup_noc]") {
      * - router 8: (4,8)
      * - router 9: (8,8)
      */
-    list_of_routers.push_back({0, 0, 0.5, 1});
-    list_of_routers.push_back({4, 0, 4.5, 1});
-    list_of_routers.push_back({8, 0, 8.5, 1});
+    list_of_routers.push_back({0, 0, 0, 0.5, 1});
+    list_of_routers.push_back({4, 0, 0, 4.5, 1});
+    list_of_routers.push_back({8, 0, 0, 8.5, 1});
 
-    list_of_routers.push_back({0, 4, 0.5, 5});
-    list_of_routers.push_back({4, 4, 4.5, 5});
-    list_of_routers.push_back({8, 4, 8.5, 5});
+    list_of_routers.push_back({0, 4, 0, 0.5, 5});
+    list_of_routers.push_back({4, 4, 0, 4.5, 5});
+    list_of_routers.push_back({8, 4, 0, 8.5, 5});
 
-    list_of_routers.push_back({0, 8, 0.5, 9});
-    list_of_routers.push_back({4, 8, 4.5, 9});
-    list_of_routers.push_back({8, 8, 8.5, 9});
+    list_of_routers.push_back({0, 8, 0, 0.5, 9});
+    list_of_routers.push_back({4, 8, 0, 4.5, 9});
+    list_of_routers.push_back({8, 8, 0, 8.5, 9});
 
     // create the noc model (to store the routers)
     NocStorage noc_model;
@@ -595,24 +595,24 @@ TEST_CASE("test_create_noc_links", "[vpr_setup_noc]") {
      * - router 8: (4,8)
      * - router 9: (8,8)
      */
-    list_of_routers.push_back({0, 0, 0.5, 1});
-    list_of_routers.push_back({4, 0, 4.5, 1});
-    list_of_routers.push_back({8, 0, 8.5, 1});
+    list_of_routers.push_back({0, 0, 0, 0.5, 1});
+    list_of_routers.push_back({4, 0, 0, 4.5, 1});
+    list_of_routers.push_back({8, 0, 0, 8.5, 1});
 
-    list_of_routers.push_back({0, 4, 0.5, 5});
-    list_of_routers.push_back({4, 4, 4.5, 5});
-    list_of_routers.push_back({8, 4, 8.5, 5});
+    list_of_routers.push_back({0, 4, 0, 0.5, 5});
+    list_of_routers.push_back({4, 4, 0, 4.5, 5});
+    list_of_routers.push_back({8, 4, 0, 8.5, 5});
 
-    list_of_routers.push_back({0, 8, 0.5, 9});
-    list_of_routers.push_back({4, 8, 4.5, 9});
-    list_of_routers.push_back({8, 8, 8.5, 9});
+    list_of_routers.push_back({0, 8, 0, 0.5, 9});
+    list_of_routers.push_back({4, 8, 0, 4.5, 9});
+    list_of_routers.push_back({8, 8, 0, 8.5, 9});
 
     // create the noc model (to store the routers)
     NocStorage noc_model;
 
     // store the reference to device grid with
     // this will be set to the device grid width
-    noc_model.set_device_grid_width((int)3);
+    noc_model.set_device_grid_spec((int)3, 0);
 
     // create the logical router list
     t_noc_inf noc_info;
@@ -632,7 +632,10 @@ TEST_CASE("test_create_noc_links", "[vpr_setup_noc]") {
         noc_info.router_list.push_back(*temp_router);
 
         // add the router to the NoC
-        noc_model.add_router(router_id, list_of_routers[router_id - 1].grid_width_position, list_of_routers[router_id - 1].grid_height_position);
+        noc_model.add_router(router_id,
+                             list_of_routers[router_id - 1].grid_width_position,
+                             list_of_routers[router_id - 1].grid_height_position,
+                             list_of_routers[router_id - 1].layer_position);
     }
 
     delete temp_router;
@@ -738,17 +741,17 @@ TEST_CASE("test_setup_noc", "[vpr_setup_noc]") {
      * - router 8: (4,8)
      * - router 9: (8,8)
      */
-    list_of_routers.push_back({0, 0, 0.5, 1});
-    list_of_routers.push_back({4, 0, 4.5, 1});
-    list_of_routers.push_back({8, 0, 8.5, 1});
+    list_of_routers.push_back({0, 0, 0, 0.5, 1});
+    list_of_routers.push_back({4, 0, 0, 4.5, 1});
+    list_of_routers.push_back({8, 0, 0, 8.5, 1});
 
-    list_of_routers.push_back({0, 4, 0.5, 5});
-    list_of_routers.push_back({4, 4, 4.5, 5});
-    list_of_routers.push_back({8, 4, 8.5, 5});
+    list_of_routers.push_back({0, 4, 0, 0.5, 5});
+    list_of_routers.push_back({4, 4, 0, 4.5, 5});
+    list_of_routers.push_back({8, 4, 0, 8.5, 5});
 
-    list_of_routers.push_back({0, 8, 0.5, 9});
-    list_of_routers.push_back({4, 8, 4.5, 9});
-    list_of_routers.push_back({8, 8, 8.5, 9});
+    list_of_routers.push_back({0, 8, 0, 0.5, 9});
+    list_of_routers.push_back({4, 8, 0, 4.5, 9});
+    list_of_routers.push_back({8, 8, 0, 8.5, 9});
 
     for (int router_id = 1; router_id < 10; router_id++) {
         // we will have 9 logical routers that will take up all physical routers
diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp
index ca58509468d..30772950e19 100644
--- a/vpr/test/test_vpr_constraints.cpp
+++ b/vpr/test/test_vpr_constraints.cpp
@@ -18,26 +18,27 @@
 TEST_CASE("Region", "[vpr]") {
     Region r1;
 
-    r1.set_region_rect(1, 2, 3, 4);
+    r1.set_region_rect({1, 2, 3, 4, 5});
     r1.set_sub_tile(2);
 
-    vtr::Rect<int> rect;
-    rect = r1.get_region_rect();
+    const auto r1_coord = r1.get_region_rect();
 
-    REQUIRE(rect.xmin() == 1);
-    REQUIRE(rect.ymin() == 2);
-    REQUIRE(rect.xmax() == 3);
-    REQUIRE(rect.ymax() == 4);
+    REQUIRE(r1_coord.xmin == 1);
+    REQUIRE(r1_coord.ymin == 2);
+    REQUIRE(r1_coord.xmax == 3);
+    REQUIRE(r1_coord.ymax == 4);
+    REQUIRE(r1_coord.layer_num == 5);
     REQUIRE(r1.get_sub_tile() == 2);
 
     //checking that default constructor creates an empty rectangle (999, 999,-1,-1)
     Region def_region;
     bool is_def_empty = false;
 
-    vtr::Rect<int> def_rect = def_region.get_region_rect();
-    is_def_empty = def_rect.empty();
+    const auto def_coord = def_region.get_region_rect();
+    is_def_empty = def_region.empty();
     REQUIRE(is_def_empty == true);
-    REQUIRE(def_rect.xmin() == 999);
+    REQUIRE(def_coord.xmin == 999);
+    REQUIRE(def_coord.layer_num == -1);
     REQUIRE(def_region.get_sub_tile() == -1);
 }
 
@@ -45,7 +46,7 @@ TEST_CASE("Region", "[vpr]") {
 TEST_CASE("PartitionRegion", "[vpr]") {
     Region r1;
 
-    r1.set_region_rect(2, 3, 6, 7);
+    r1.set_region_rect({2, 3, 6, 7, 0});
     r1.set_sub_tile(3);
 
     PartitionRegion pr1;
@@ -54,12 +55,13 @@ TEST_CASE("PartitionRegion", "[vpr]") {
 
     std::vector<Region> pr_regions = pr1.get_partition_region();
     REQUIRE(pr_regions[0].get_sub_tile() == 3);
-    vtr::Rect<int> rect;
-    rect = pr_regions[0].get_region_rect();
-    REQUIRE(rect.xmin() == 2);
-    REQUIRE(rect.ymin() == 3);
-    REQUIRE(rect.xmax() == 6);
-    REQUIRE(rect.ymax() == 7);
+
+    const auto pr_reg_coord = pr_regions[0].get_region_rect();
+    REQUIRE(pr_reg_coord.layer_num == 0);
+    REQUIRE(pr_reg_coord.xmin == 2);
+    REQUIRE(pr_reg_coord.ymin == 3);
+    REQUIRE(pr_reg_coord.xmax == 6);
+    REQUIRE(pr_reg_coord.ymax == 7);
 }
 
 //Test Partition class accessors and mutators
@@ -71,7 +73,7 @@ TEST_CASE("Partition", "[vpr]") {
 
     //create region and partitionregions objects to test functions of the Partition class
     Region r1;
-    r1.set_region_rect(2, 3, 7, 8);
+    r1.set_region_rect({2, 3, 7, 8, 0});
     r1.set_sub_tile(3);
 
     PartitionRegion part_reg;
@@ -82,12 +84,13 @@ TEST_CASE("Partition", "[vpr]") {
     std::vector<Region> regions = part_reg_2.get_partition_region();
 
     REQUIRE(regions[0].get_sub_tile() == 3);
-    vtr::Rect<int> rect;
-    rect = regions[0].get_region_rect();
-    REQUIRE(rect.xmin() == 2);
-    REQUIRE(rect.ymin() == 3);
-    REQUIRE(rect.xmax() == 7);
-    REQUIRE(rect.ymax() == 8);
+
+    const auto pr_reg_coord = regions[0].get_region_rect();
+    REQUIRE(pr_reg_coord.layer_num == 0);
+    REQUIRE(pr_reg_coord.xmin == 2);
+    REQUIRE(pr_reg_coord.ymin == 3);
+    REQUIRE(pr_reg_coord.xmax == 7);
+    REQUIRE(pr_reg_coord.ymax == 8);
 }
 
 //Test VprConstraints class accessors and mutators
@@ -134,35 +137,37 @@ TEST_CASE("RegionIntersect", "[vpr]") {
     Region region1;
     Region region2;
 
-    region1.set_region_rect(1, 2, 3, 5);
-    region2.set_region_rect(2, 3, 4, 6);
+    region1.set_region_rect({1, 2, 3, 5, 0});
+    region2.set_region_rect({2, 3, 4, 6, 0});
 
     Region int_reg;
 
     int_reg = intersection(region1, region2);
-    vtr::Rect<int> rect = int_reg.get_region_rect();
+    auto intersect_reg_coord = int_reg.get_region_rect();
 
-    REQUIRE(rect.xmin() == 2);
-    REQUIRE(rect.ymin() == 3);
-    REQUIRE(rect.xmax() == 3);
-    REQUIRE(rect.ymax() == 5);
+    REQUIRE(intersect_reg_coord.layer_num == 0);
+    REQUIRE(intersect_reg_coord.xmin == 2);
+    REQUIRE(intersect_reg_coord.ymin == 3);
+    REQUIRE(intersect_reg_coord.xmax == 3);
+    REQUIRE(intersect_reg_coord.ymax == 5);
 
     //Test full overlap
     Region region3;
     Region region4;
 
-    region3.set_region_rect(5, 1, 8, 6);
-    region4.set_region_rect(6, 3, 8, 6);
+    region3.set_region_rect({5, 1, 8, 6, 0});
+    region4.set_region_rect({6, 3, 8, 6, 0});
 
     Region int_reg_2;
 
     int_reg_2 = intersection(region3, region4);
-    vtr::Rect<int> rect_2 = int_reg_2.get_region_rect();
+    intersect_reg_coord = int_reg_2.get_region_rect();
 
-    REQUIRE(rect_2.xmin() == 6);
-    REQUIRE(rect_2.ymin() == 3);
-    REQUIRE(rect_2.xmax() == 8);
-    REQUIRE(rect_2.ymax() == 6);
+    REQUIRE(intersect_reg_coord.layer_num == 0);
+    REQUIRE(intersect_reg_coord.xmin == 6);
+    REQUIRE(intersect_reg_coord.ymin == 3);
+    REQUIRE(intersect_reg_coord.xmax == 8);
+    REQUIRE(intersect_reg_coord.ymax == 6);
 
     //Test no intersection (rectangles don't overlap, intersect region will be returned empty)
 
@@ -187,11 +192,12 @@ TEST_CASE("RegionIntersect", "[vpr]") {
 
     Region int_reg_5;
     int_reg_5 = intersection(region1, region2);
-    vtr::Rect<int> rect_5 = int_reg_5.get_region_rect();
-    REQUIRE(rect_5.xmin() == 2);
-    REQUIRE(rect_5.ymin() == 3);
-    REQUIRE(rect_5.xmax() == 3);
-    REQUIRE(rect_5.ymax() == 5);
+    const auto reg_5_coord = int_reg_5.get_region_rect();
+    REQUIRE(reg_5_coord.layer_num == 0);
+    REQUIRE(reg_5_coord.xmin == 2);
+    REQUIRE(reg_5_coord.ymin == 3);
+    REQUIRE(reg_5_coord.xmax == 3);
+    REQUIRE(reg_5_coord.ymax == 5);
 }
 
 //The following six test cases test the intersection function for PartitionRegions
@@ -204,9 +210,23 @@ TEST_CASE("PartRegionIntersect", "[vpr]") {
     Region r2;
     Region r3;
 
-    r1.set_region_rect(0, 0, 1, 1);
-    r2.set_region_rect(1, 1, 2, 2);
-    r3.set_region_rect(0, 0, 2, 2);
+    r1.set_region_rect({0,
+                        0,
+                        1,
+                        1,
+                        0});
+
+    r2.set_region_rect({1,
+                        1,
+                        2,
+                        2,
+                        0});
+
+    r3.set_region_rect({0,
+                        0,
+                        2,
+                        2,
+                        0});
 
     pr1.add_to_part_region(r1);
     pr1.add_to_part_region(r2);
@@ -219,8 +239,13 @@ TEST_CASE("PartRegionIntersect", "[vpr]") {
 
     vtr::Rect<int> int_rect(0, 0, 1, 1);
     vtr::Rect<int> int_rect_2(1, 1, 2, 2);
-    REQUIRE(regions[0].get_region_rect() == int_rect);
-    REQUIRE(regions[1].get_region_rect() == int_rect_2);
+
+    const auto first_reg_coord = regions[0].get_region_rect();
+    const auto second_reg_coord = regions[1].get_region_rect();
+    REQUIRE(vtr::Rect<int>(first_reg_coord.xmin, first_reg_coord.ymin, first_reg_coord.xmax, first_reg_coord.ymax) == int_rect);
+    REQUIRE(vtr::Rect<int>(second_reg_coord.xmin, second_reg_coord.ymin, second_reg_coord.xmax, second_reg_coord.ymax) == int_rect_2);
+    REQUIRE(first_reg_coord.layer_num == 0);
+    REQUIRE(second_reg_coord.layer_num == 0);
 }
 
 //2x1 regions, 1 overlap
@@ -232,9 +257,9 @@ TEST_CASE("PartRegionIntersect2", "[vpr]") {
     Region r2;
     Region r3;
 
-    r1.set_region_rect(0, 0, 2, 2);
-    r2.set_region_rect(4, 4, 6, 6);
-    r3.set_region_rect(0, 0, 2, 2);
+    r1.set_region_rect({0, 0, 2, 2, 0});
+    r2.set_region_rect({4, 4, 6, 6, 0});
+    r3.set_region_rect({0, 0, 2, 2, 0});
 
     pr1.add_to_part_region(r1);
     pr1.add_to_part_region(r2);
@@ -246,7 +271,9 @@ TEST_CASE("PartRegionIntersect2", "[vpr]") {
     std::vector<Region> regions = int_pr.get_partition_region();
     vtr::Rect<int> int_rect(0, 0, 2, 2);
     REQUIRE(regions.size() == 1);
-    REQUIRE(regions[0].get_region_rect() == int_rect);
+    const auto first_reg_coord = regions[0].get_region_rect();
+    REQUIRE(vtr::Rect<int>(first_reg_coord.xmin, first_reg_coord.ymin, first_reg_coord.xmax, first_reg_coord.ymax) == int_rect);
+    REQUIRE(first_reg_coord.layer_num == 0);
 }
 
 //2x2 regions, no overlaps
@@ -259,14 +286,14 @@ TEST_CASE("PartRegionIntersect3", "[vpr]") {
     Region r3;
     Region r4;
 
-    r1.set_region_rect(1, 2, 3, 5);
+    r1.set_region_rect({1, 2, 3, 5, 0});
     r1.set_sub_tile(2);
 
-    r2.set_region_rect(4, 2, 6, 4);
+    r2.set_region_rect({4, 2, 6, 4, 0});
 
-    r3.set_region_rect(4, 5, 5, 7);
+    r3.set_region_rect({4, 5, 5, 7, 0});
 
-    r4.set_region_rect(1, 2, 3, 5);
+    r4.set_region_rect({1, 2, 3, 5, 0});
     r4.set_sub_tile(4);
 
     pr1.add_to_part_region(r1);
@@ -292,14 +319,14 @@ TEST_CASE("PartRegionIntersect4", "[vpr]") {
     Region r3;
     Region r4;
 
-    r1.set_region_rect(1, 2, 3, 5);
+    r1.set_region_rect({1, 2, 3, 5, 0});
     r1.set_sub_tile(2);
 
-    r2.set_region_rect(4, 2, 6, 4);
+    r2.set_region_rect({4, 2, 6, 4, 0});
 
-    r3.set_region_rect(4, 5, 5, 7);
+    r3.set_region_rect({4, 5, 5, 7, 0});
 
-    r4.set_region_rect(1, 2, 3, 4);
+    r4.set_region_rect({1, 2, 3, 4, 0});
     r4.set_sub_tile(2);
 
     pr1.add_to_part_region(r1);
@@ -315,7 +342,9 @@ TEST_CASE("PartRegionIntersect4", "[vpr]") {
     vtr::Rect<int> intersect(1, 2, 3, 4);
 
     REQUIRE(regions.size() == 1);
-    REQUIRE(regions[0].get_region_rect() == intersect);
+    const auto first_reg_coord = regions[0].get_region_rect();
+    REQUIRE(first_reg_coord.layer_num == 0);
+    REQUIRE(first_reg_coord.get_rect() == intersect);
     REQUIRE(regions[0].get_sub_tile() == 2);
 }
 
@@ -329,13 +358,13 @@ TEST_CASE("PartRegionIntersect5", "[vpr]") {
     Region r3;
     Region r4;
 
-    r1.set_region_rect(1, 5, 5, 7);
+    r1.set_region_rect({1, 5, 5, 7, 0});
 
-    r2.set_region_rect(6, 3, 8, 5);
+    r2.set_region_rect({6, 3, 8, 5, 0});
 
-    r3.set_region_rect(2, 6, 4, 9);
+    r3.set_region_rect({2, 6, 4, 9, 0});
 
-    r4.set_region_rect(6, 4, 8, 7);
+    r4.set_region_rect({6, 4, 8, 7, 0});
 
     pr1.add_to_part_region(r1);
     pr1.add_to_part_region(r2);
@@ -351,8 +380,13 @@ TEST_CASE("PartRegionIntersect5", "[vpr]") {
     vtr::Rect<int> int_r2r4(6, 4, 8, 5);
 
     REQUIRE(regions.size() == 2);
-    REQUIRE(regions[0].get_region_rect() == int_r1r3);
-    REQUIRE(regions[1].get_region_rect() == int_r2r4);
+    const auto first_reg_coord = regions[0].get_region_rect();
+    const auto second_reg_coord = regions[1].get_region_rect();
+
+    REQUIRE(first_reg_coord.layer_num == 0);
+    REQUIRE(second_reg_coord.layer_num == 0);
+    REQUIRE(first_reg_coord.get_rect() == int_r1r3);
+    REQUIRE(second_reg_coord.get_rect() == int_r2r4);
 }
 
 //2x2 regions, 4 overlap
@@ -365,13 +399,13 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") {
     Region r3;
     Region r4;
 
-    r1.set_region_rect(2, 3, 4, 7);
+    r1.set_region_rect({2, 3, 4, 7, 0});
 
-    r2.set_region_rect(5, 3, 7, 8);
+    r2.set_region_rect({5, 3, 7, 8, 0});
 
-    r3.set_region_rect(2, 2, 7, 4);
+    r3.set_region_rect({2, 2, 7, 4, 0});
 
-    r4.set_region_rect(2, 6, 7, 8);
+    r4.set_region_rect({2, 6, 7, 8, 0});
 
     pr1.add_to_part_region(r1);
     pr1.add_to_part_region(r2);
@@ -389,10 +423,15 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") {
     vtr::Rect<int> int_r2r4(5, 6, 7, 8);
 
     REQUIRE(regions.size() == 4);
-    REQUIRE(regions[0].get_region_rect() == int_r1r3);
-    REQUIRE(regions[1].get_region_rect() == int_r1r4);
-    REQUIRE(regions[2].get_region_rect() == int_r2r3);
-    REQUIRE(regions[3].get_region_rect() == int_r2r4);
+    REQUIRE(regions[0].get_region_rect().get_rect() == int_r1r3);
+    REQUIRE(regions[1].get_region_rect().get_rect() == int_r1r4);
+    REQUIRE(regions[2].get_region_rect().get_rect() == int_r2r3);
+    REQUIRE(regions[3].get_region_rect().get_rect() == int_r2r4);
+
+    REQUIRE(regions[0].get_region_rect().layer_num == 0);
+    REQUIRE(regions[1].get_region_rect().layer_num == 0);
+    REQUIRE(regions[2].get_region_rect().layer_num == 0);
+    REQUIRE(regions[3].get_region_rect().layer_num == 0);
 }
 
 //Test calculation of macro constraints
@@ -405,12 +444,12 @@ TEST_CASE("MacroConstraints", "[vpr]") {
     t_pl_offset offset(2, 1, 0);
 
     Region reg;
-    reg.set_region_rect(5, 2, 9, 6);
+    reg.set_region_rect({5, 2, 9, 6, 0});
 
     head_pr.add_to_part_region(reg);
 
     Region grid_reg;
-    grid_reg.set_region_rect(0, 0, 20, 20);
+    grid_reg.set_region_rect({0, 0, 20, 20, 0});
     PartitionRegion grid_pr;
     grid_pr.add_to_part_region(grid_reg);
 
@@ -418,12 +457,13 @@ TEST_CASE("MacroConstraints", "[vpr]") {
 
     std::vector<Region> mac_regions = macro_pr.get_partition_region();
 
-    vtr::Rect<int> mac_rect = mac_regions[0].get_region_rect();
+    const auto mac_first_reg_coord = mac_regions[0].get_region_rect();
 
-    REQUIRE(mac_rect.xmin() == 7);
-    REQUIRE(mac_rect.ymin() == 3);
-    REQUIRE(mac_rect.xmax() == 11);
-    REQUIRE(mac_rect.ymax() == 7);
+    REQUIRE(mac_first_reg_coord.layer_num == 0);
+    REQUIRE(mac_first_reg_coord.xmin == 7);
+    REQUIRE(mac_first_reg_coord.ymin == 3);
+    REQUIRE(mac_first_reg_coord.xmax == 11);
+    REQUIRE(mac_first_reg_coord.ymax == 7);
 }
 
 #if 0
diff --git a/vpr/test/test_xy_routing.cpp b/vpr/test/test_xy_routing.cpp
index bc163a7308f..67517271f43 100644
--- a/vpr/test/test_xy_routing.cpp
+++ b/vpr/test/test_xy_routing.cpp
@@ -54,7 +54,7 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
     // add all the routers
     for (int i = 0; i < 4; i++) {
         for (int j = 0; j < 4; j++) {
-            noc_model.add_router((i * 4) + j, j, i);
+            noc_model.add_router((i * 4) + j, j, i, 0);
         }
     }
 
@@ -222,12 +222,12 @@ TEST_CASE("test_route_flow when it fails in a mesh topology.", "[vpr_noc_xy_rout
 
     // store the reference to device grid with
     // this will be set to the device grid width
-    noc_model.set_device_grid_width((int)4);
+    noc_model.set_device_grid_spec((int)4, 0);
 
     // add all the routers
     for (int i = 0; i < 4; i++) {
         for (int j = 0; j < 4; j++) {
-            noc_model.add_router((i * 4) + j, j, i);
+            noc_model.add_router((i * 4) + j, j, i, 0);
         }
     }
 
@@ -331,12 +331,12 @@ TEST_CASE("test_route_flow when it fails in a non mesh topology.", "[vpr_noc_xy_
 
     // store the reference to device grid with
     // this will be set to the device grid width
-    noc_model.set_device_grid_width((int)4);
+    noc_model.set_device_grid_spec((int)4, 0);
 
-    noc_model.add_router(0, 0, 0);
-    noc_model.add_router(1, 2, 2);
-    noc_model.add_router(2, 1, 2);
-    noc_model.add_router(3, 3, 0);
+    noc_model.add_router(0, 0, 0, 0);
+    noc_model.add_router(1, 2, 2, 0);
+    noc_model.add_router(2, 1, 2, 0);
+    noc_model.add_router(3, 3, 0, 0);
 
     noc_model.make_room_for_noc_router_link_list();
 
diff --git a/vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml b/vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
similarity index 99%
rename from vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml
rename to vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
index bb403bb473e..0d7462a75e1 100644
--- a/vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml
+++ b/vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
@@ -4417,19 +4417,37 @@
         <port name="outclk" is_clock="1"/>
       </output_ports>
     </model>
-    <!-- Defining a NoC router here, where the inputs come from a AXI interface-->
-    <model name="router">
+    <!-- NoC router adapter primitives-->
+    <model name="noc_router_adapter_block">
       <input_ports>
-        <!-- input flit streams (packetized data)-->
-        <port name="flit_in_one" clock="i_clk"/>
-        <port name="flit_in_two" clock="i_clk"/>
+        <!-- Master Interface-->
+        <port name="master_tready" clock="clk"/>
+        <!-- Slave Interface-->
+        <port name="slave_tvalid" clock="clk"/>
+        <port name="slave_tdata" clock="clk"/>
+        <port name="slave_tstrb" clock="clk"/>
+        <port name="slave_tkeep" clock="clk"/>
+        <port name="slave_tid" clock="clk"/>
+        <port name="slave_tdest" clock="clk"/>
+        <port name="slave_tuser" clock="clk"/>
+        <port name="slave_tlast" clock="clk"/>
         <!-- clk signal-->
-        <port name="i_clk" is_clock="1"/>
+        <port name="clk" is_clock="1"/>
+        <!-- reset signal-->
+        <port name="reset" clock="clk"/>
       </input_ports>
       <output_ports>
-        <!-- output flit streams (packetized data)-->
-        <port name="flit_out_one" clock="i_clk"/>
-        <port name="flit_out_two" clock="i_clk"/>
+        <!-- Master Interface-->
+        <port name="master_tvalid" clock="clk"/>
+        <port name="master_tdata" clock="clk"/>
+        <port name="master_tstrb" clock="clk"/>
+        <port name="master_tkeep" clock="clk"/>
+        <port name="master_tid" clock="clk"/>
+        <port name="master_tdest" clock="clk"/>
+        <port name="master_tuser" clock="clk"/>
+        <port name="master_tlast" clock="clk"/>
+        <!-- Slave Interface-->
+        <port name="slave_tready" clock="clk"/>
       </output_ports>
     </model> 
   </models>
@@ -4743,29 +4761,69 @@
         </pinlocations>
       </sub_tile>
     </tile>
-    <!-- below we specify a NoC router tile. Not sure about the area (might need to update this)-->
-    <tile name="noc_router" width="2" height="2" area="0">
-      <sub_tile name="noc_router">
+    <!-- below we specify a NoC router tile. Not sure about the area (might need to update this) Look into making this block bigger for large number of ports. Also put the master and slave ports on seperate sides-->
+    <tile name="noc_router_adapter" width="2" height="2" area="0">
+      <sub_tile name="noc_router_adapter">
         <!-- We need to have a sub tile definition for each physical tile-->
         <equivalent_sites>
-          <site pb_type="noc_router" pin_mapping="direct"/>
+          <site pb_type="noc_router_adapter" pin_mapping="direct"/>
         </equivalent_sites>
-        <!-- Temporary pin sizes, we need to update this for the future. Pins are not equivalent.-->
-        <input name="flit_in_one" num_pins="32"/>
-        <input name="flit_in_two" num_pins="32"/>
-        <clock name="i_clk" num_pins="1"/>
-        <output name="flit_out_one" num_pins="32"/>
-        <output name="flit_out_two" num_pins="32"/>
-        <fc in_type="frac" in_val="0.20" out_type="frac" out_val="0.10"/>
-        <!-- Let us spread the pins throughout the block-->
-        <pinlocations pattern="spread"/>
-         <!-- <loc side="left" xoffset="0" yoffset="0"> noc_router.flit_in_one[31:0]</loc>
-          <loc side="right" xoffset="0" yoffset="0"> noc_router.flit_in_two[31:0]</loc>
-          <loc side="bottom" xoffset="0" yoffset="0"> noc_router.flit_out_one[31:0]</loc>
-          <loc side="top" xoffset="0" yoffset="0"> noc_router.flit_out_two[31:0]</loc>
-          <loc side="left" xoffset="1" yoffset="1"> noc_router.i_clk[0]</loc>
-
-        </pinlocations> -->
+        <!-- Pin sizes can change, if they do then it should be updated here as well.-->
+        <!-- Master Interface-->
+        <input name="master_tready" num_pins="1"/>
+        <!-- Slave Interface-->
+        <input name="slave_tvalid" num_pins="1"/>
+        <input name="slave_tdata" num_pins="32"/>
+        <input name="slave_tstrb" num_pins="4"/>
+        <input name="slave_tkeep" num_pins="4"/>
+        <input name="slave_tid" num_pins="8"/>
+        <input name="slave_tdest" num_pins="8"/>
+        <input name="slave_tuser" num_pins="8"/>
+        <input name="slave_tlast" num_pins="1"/>
+        <!-- clk signal-->
+        <clock name="clk" num_pins="1"/>
+        <!-- reset signal-->
+        <input name="reset" num_pins="1"/>
+        <!-- Master Interface-->
+        <output name="master_tvalid" num_pins="1"/>
+        <output name="master_tdata" num_pins="32"/>
+        <output name="master_tstrb" num_pins="4"/>
+        <output name="master_tkeep" num_pins="4"/>
+        <output name="master_tid" num_pins="8"/>
+        <output name="master_tdest" num_pins="8"/>
+        <output name="master_tuser" num_pins="8"/>
+        <output name="master_tlast" num_pins="1"/>
+        <!-- Slave Interface-->
+        <output name="slave_tready" num_pins="1"/>
+		<!-- Based on the info of the M144K block we can calculate the Fc values as follows:
+				The pins are going to be located on the left anf bottom sides of the block. The
+				master signal pins will be on the left and the slave pins will located on the bottom
+				of the tile. So half of the pins on the left and the other half on the bottom,
+				
+				Based on the explanation above, each pin can only connect on one side of the tile. And
+				all the output/input pins are mostly located on one side of the tile.
+				So all we have to do is take the ratio of the input and output pins to the M144k block
+				and multiply this with the Fc values :
+				Fc_in: 0.055
+                Fc_out: 0.075
+                
+                So for the current case we have 67 input pins and 67 output pins:
+                Fc in: 67/416 * 0.055  = 0.0089
+                Fc out: 67/123 * 0.075 = 0.041
+                
+                JUst keep this the same to memory block
+               -->
+        <fc in_type="frac" in_val="0.055" out_type="frac" out_val="0.075"/>
+        <!-- In a mesh topology, the hard NoC links can connect to all sides of the tile. So we want
+			to reduce the pin spread of the ports connected to the FPGA fabric. So we will seperate
+			the master and slave interfaces and spread the master ports on the bottom half of the left side of the
+			tile. The slave ports will be spread on the left half of the bottom side of the tile.-->
+        <pinlocations pattern="custom">
+          <loc side="left" xoffset="0" yoffset="0"> noc_router_adapter.master_tready noc_router_adapter.master_tvalid noc_router_adapter.master_tdata[31:0] noc_router_adapter.clk noc_router_adapter.reset</loc>
+          <loc side="left" xoffset="0" yoffset="1"> noc_router_adapter.master_tstrb[3:0] noc_router_adapter.master_tkeep[3:0] noc_router_adapter.master_tid[7:0] noc_router_adapter.master_tdest[7:0] noc_router_adapter.master_tuser[7:0] noc_router_adapter.master_tlast</loc>
+          <loc side="bottom" xoffset="0" yoffset="0"> noc_router_adapter.slave_tready noc_router_adapter.slave_tvalid noc_router_adapter.slave_tdata[31:0]</loc>
+          <loc side="bottom" xoffset="1" yoffset="0"> noc_router_adapter.slave_tstrb[3:0] noc_router_adapter.slave_tkeep[3:0] noc_router_adapter.slave_tid[7:0] noc_router_adapter.slave_tdest[7:0] noc_router_adapter.slave_tuser[7:0] noc_router_adapter.slave_tlast</loc>
+        </pinlocations>
       </sub_tile>
     </tile>
   </tiles>
@@ -4837,109 +4895,81 @@
       <col type="EMPTY" startx="33" repeatx="43" starty="1" priority="99"/>
 
       <!-- Instantiate a single NoC router within the device (top left corner) -->
-      <region type="noc_router" startx="4" starty="H-4" incrx="W/20" incry="W/20" priority="2000"/>
+      <region type="noc_router_adapter" startx="4" starty="H-4" incrx="W/20" incry="W/20" priority="2000"/>
     </auto_layout>
-    <fixed_layout name="EP4SGX110" width="82" height="66">
-        <layer die="0">
-            <!-- Stratix IV IO blocks connect to both horizontal and vertical
-                    routing channels. We approximate this by placing them one unit
-                    'in' from the true perimeter (which is left empty). This means
-                    that they are fully surrounded by routing channels, allowing them
-                    to connect to both horizontal and vertical channels. This is a
-                    minor approximation since on real Stratix IV devices there is no 
-                    perimeter-side vertical routing channel -->
-            <perimeter type="EMPTY" priority="5000"/>
-            <col type="io" startx="1" priority="1001"/>
-            <!-- Left side -->
-            <col type="io" startx="W-2" priority="1001"/>
-            <!-- Right side -->
-            <row type="io" starty="1" priority="1000"/>
-            <!-- Bottom side -->
-            <row type="io" starty="H-2" priority="1000"/>
-            <!-- Top side -->
-            <!-- Stratix IV devices support up to 16 Global clocks and 12 PLLs located 
-                    around the device perimeter as indicated in Figure 5-1 of Chapter 5 
-                    of the Stratix IV handbook.
+    <fixed_layout name="EP4SE820" width="220" height="162">
+      <layer die="1">
+        <!-- Stratix IV IO blocks connect to both horizontal and vertical
+               routing channels. We approximate this by placing them one unit
+               'in' from the true perimeter (which is left empty). This means
+               that they are fully surrounded by routing channels, allowing them
+               to connect to both horizontal and vertical channels. This is a
+               minor approximation since on real Stratix IV devices theEP4SGX830re is no 
+               perimeter-side vertical routing channel -->
+        <perimeter type="EMPTY" priority="5000"/>
+        <col type="io" startx="1" priority="1001"/>
+        <!-- Left side -->
+        <col type="io" startx="W-2" priority="1001"/>
+        <!-- Right side -->
+        <row type="io" starty="1" priority="1000"/>
+        <!-- Bottom side -->
+        <row type="io" starty="H-2" priority="1000"/>
+        <!-- Top side -->
+        <!-- Stratix IV devices support up to 16 Global clocks and 12 PLLs located 
+               around the device perimeter as indicated in Figure 5-1 of Chapter 5 
+               of the Stratix IV handbook.
 
-                    Note the use of different priorities to avoid ambiguity on small devices -->
-            <single type="PLL" x="1" y="H-2" priority="1997"/>
-            <!-- L1 -->
-            <single type="PLL" x="1" y="H/2" priority="2000"/>
-            <!-- L2 -->
-            <single type="PLL" x="1" y="H/2 - 1" priority="1999"/>
-            <!-- L3 -->
-            <single type="PLL" x="1" y="1" priority="1998"/>
-            <!-- L4 -->
-            <single type="PLL" x="W - 2" y="H-2" priority="1997"/>
-            <!-- R1 -->
-            <single type="PLL" x="W - 2" y="H/2" priority="2000"/>
-            <!-- R2 -->
-            <single type="PLL" x="W - 2" y="H/2 - 1" priority="1999"/>
-            <!-- R3 -->
-            <single type="PLL" x="W - 2" y="1" priority="1998"/>
-            <!-- R4 -->
-            <single type="PLL" x="W/2" y="H-2" priority="1996"/>
-            <!-- T1 -->
-            <single type="PLL" x="W/2 + 1" y="H-2" priority="1995"/>
-            <!-- T2 -->
-            <single type="PLL" x="W/2 - 1" y="H-2" priority="1992"/>
-            <!-- GCLK -->
-            <single type="PLL" x="W/2 + 2" y="H-2" priority="1991"/>
-            <!-- GCLK -->
-            <single type="PLL" x="W/2" y="1" priority="1994"/>
-            <!-- B1 -->
-            <single type="PLL" x="W/2 + 1" y="1" priority="1993"/>
-            <!-- B2 -->
-            <single type="PLL" x="W/2 - 1" y="1" priority="1990"/>
-            <!-- GCLK -->
-            <single type="PLL" x="W/2 + 2" y="1" priority="1989"/>
-            <!-- GCLK -->
-            <!--Fill with 'LAB'-->
-            <fill type="LAB" priority="10"/>
-            <!--Column of 'DSP' with 'EMPTY' blocks wherever a 'DSP' does not fit. Vertical offset by 1 for perimeter.-->
-            <col type="DSP" startx="6" starty="2" repeatx="40" priority="150"/>
-            <col type="EMPTY" startx="6" repeatx="40" starty="1" priority="149"/>
-            <!--Column of 'M9K' with 'EMPTY' blocks wherever a 'M9K' does not fit. Vertical offset by 1 for perimeter.-->
-            <col type="M9K" startx="5" starty="2" repeatx="26" priority="50"/>
-            <col type="EMPTY" startx="5" repeatx="26" starty="1" priority="49"/>
-            <!--Column of 'M144K' with 'EMPTY' blocks wherever a 'M144K' does not fit. Vertical offset by 1 for perimeter.-->
-            <col type="M144K" startx="33" starty="2" repeatx="43" priority="100"/>
-            <col type="EMPTY" startx="33" repeatx="43" starty="1" priority="99"/>
-
-            <!-- Instantiate a 3x3 mesh NoC -->
-            
-            <!-- bottom row -->
-            <single type="noc_router" x="2" y="2" priority="2000"/>
-            <single type="noc_router" x="41" y="2" priority="2000"/>
-            <single type="noc_router" x="78" y="2" priority="2000"/>
-            
-            <!-- middle row -->
-            <single type="noc_router" x="2" y="31" priority="2000"/>
-            <single type="noc_router" x="41" y="31" priority="2000"/>
-            <single type="noc_router" x="78" y="31" priority="2000"/>
-            
-            <!-- top row -->
-            <single type="noc_router" x="2" y="62" priority="2000"/>
-            <single type="noc_router" x="41" y="62" priority="2000"/>
-            <single type="noc_router" x="78" y="62" priority="2000"/>
-        </layer>
-        <layer die="1">
-            <!-- Second die is all empty expect for the LAB locations which are on top of NoC Block -->
-            <!-- bottom row -->
-            <region type="LAB" startx="2"  endx="2+2-1"  starty="2" endy="2+2-1" priority="2000"/>
-            <region type="LAB" startx="41" endx="41+2-1" starty="2" endy="2+2-1" priority="2000"/>
-            <region type="LAB" startx="78" endx="78+2-1" starty="2" endy="2+2-1" priority="2000"/>
-            
-            <!-- middle row -->
-            <region type="LAB" startx="2"  endx="2+2-1"  starty="31" endy="31+2-1" priority="2000"/>
-            <region type="LAB" startx="41" endx="41+2-1" starty="31" endy="31+2-1" priority="2000"/>
-            <region type="LAB" startx="78" endx="78+2-1" starty="31" endy="31+2-1" priority="2000"/>
-            
-            <!-- top row -->
-            <region type="LAB" startx="2"  endx="2+2-1"  starty="62" endy="62+2-1" priority="2000"/>
-            <region type="LAB" startx="41" endx="41+2-1" starty="62" endy="62+2-1" priority="2000"/>
-            <region type="LAB" startx="78" endx="78+2-1" starty="62" endy="62+2-1" priority="2000"/>
-        </layer> 
+               Note the use of different priorities to avoid ambiguity on small devices -->
+        <single type="PLL" x="1" y="H-2" priority="1997"/>
+        <!-- L1 -->
+        <single type="PLL" x="1" y="H/2" priority="2000"/>
+        <!-- L2 -->
+        <single type="PLL" x="1" y="H/2 - 1" priority="1999"/>
+        <!-- L3 -->
+        <single type="PLL" x="1" y="1" priority="1998"/>
+        <!-- L4 -->
+        <single type="PLL" x="W - 2" y="H-2" priority="1997"/>
+        <!-- R1 -->
+        <single type="PLL" x="W - 2" y="H/2" priority="2000"/>
+        <!-- R2 -->
+        <single type="PLL" x="W - 2" y="H/2 - 1" priority="1999"/>
+        <!-- R3 -->
+        <single type="PLL" x="W - 2" y="1" priority="1998"/>
+        <!-- R4 -->
+        <single type="PLL" x="W/2" y="H-2" priority="1996"/>
+        <!-- T1 -->
+        <single type="PLL" x="W/2 + 1" y="H-2" priority="1995"/>
+        <!-- T2 -->
+        <single type="PLL" x="W/2 - 1" y="H-2" priority="1992"/>
+        <!-- GCLK -->
+        <single type="PLL" x="W/2 + 2" y="H-2" priority="1991"/>
+        <!-- GCLK -->
+        <single type="PLL" x="W/2" y="1" priority="1994"/>
+        <!-- B1 -->
+        <single type="PLL" x="W/2 + 1" y="1" priority="1993"/>
+        <!-- B2 -->
+        <single type="PLL" x="W/2 - 1" y="1" priority="1990"/>
+        <!-- GCLK -->
+        <single type="PLL" x="W/2 + 2" y="1" priority="1989"/>
+        <!-- GCLK -->
+        <!--Fill with 'LAB'-->
+        <fill type="LAB" priority="10"/>
+        <!--Column of 'DSP' with 'EMPTY' blocks wherever a 'DSP' does not fit. Vertical offset by 1 for perimeter.-->
+        <col type="DSP" startx="6" starty="2" repeatx="40" priority="150"/>
+        <col type="EMPTY" startx="6" repeatx="40" starty="1" priority="149"/>
+        <!--Column of 'M9K' with 'EMPTY' blocks wherever a 'M9K' does not fit. Vertical offset by 1 for perimeter.-->
+        <col type="M9K" startx="5" starty="2" repeatx="26" priority="50"/>
+        <col type="EMPTY" startx="5" repeatx="26" starty="1" priority="49"/>
+        <!--Column of 'M144K' with 'EMPTY' blocks wherever a 'M144K' does not fit. Vertical offset by 1 for perimeter.-->
+        <col type="M144K" startx="33" starty="2" repeatx="43" priority="100"/>
+        <col type="EMPTY" startx="33" repeatx="43" starty="1" priority="99"/>
+      </layer>
+      <layer die="0">
+        <perimeter type="EMPTY" priority="5000"/>
+        <!-- Create a 10x10 mesh of NoC routers (width 2, height 2) whose relative positions
+        will scale with the device dimensions -->
+        <region type="noc_router_adapter" startx="4" starty="4" incrx="W/9 - 1" incry="H/9 - 1" priority="2000"/>
+      </layer>
     </fixed_layout>
     <!--
         Stratix IV has only 4 unique device dies. 
@@ -48006,50 +48036,141 @@
     <!-- Define NoC router below-->
     <!-- The initial definition of this NoC router is a block that has internal registering of the the input and output signals
           and then a bit of combination delay is added within the block. Did not include power values either. -->
-    <pb_type name="noc_router">
-      <input name="flit_in_one" num_pins="32"/>
-      <input name="flit_in_two" num_pins="32"/>
-      <clock name="i_clk" num_pins="1"/>
-      <output name="flit_out_one" num_pins="32"/>
-      <output name="flit_out_two" num_pins="32"/>
+    <pb_type name="noc_router_adapter">
+	  <!-- Pin sizes can change, if they do then it should be updated here as well.-->
+      <!-- Master Interface-->
+      <input name="master_tready" num_pins="1"/>
+      <!-- Slave Interface-->
+      <input name="slave_tvalid" num_pins="1"/>
+      <input name="slave_tdata" num_pins="32"/>
+      <input name="slave_tstrb" num_pins="4"/>
+      <input name="slave_tkeep" num_pins="4"/>
+      <input name="slave_tid" num_pins="8"/>
+      <input name="slave_tdest" num_pins="8"/>
+      <input name="slave_tuser" num_pins="8"/>
+      <input name="slave_tlast" num_pins="1"/>
+      <!-- clk signal-->
+      <clock name="clk" num_pins="1"/>
+      <!-- reset signal-->
+      <input name="reset" num_pins="1"/>
+      <!-- Master Interface-->
+      <output name="master_tvalid" num_pins="1"/>
+      <output name="master_tdata" num_pins="32"/>
+      <output name="master_tstrb" num_pins="4"/>
+      <output name="master_tkeep" num_pins="4"/>
+      <output name="master_tid" num_pins="8"/>
+      <output name="master_tdest" num_pins="8"/>
+      <output name="master_tuser" num_pins="8"/>
+      <output name="master_tlast" num_pins="1"/>
+      <!-- Slave Interface-->
+      <output name="slave_tready" num_pins="1"/>
       <!-- There is only a single mode right now -->
-      <mode name="noc_router_basic">
-        <pb_type name="noc_router_basic" blif_model=".subckt router" num_pb="1">
-          <input name="flit_in_one" num_pins="32"/>
-          <input name="flit_in_two" num_pins="32"/>
-          <clock name="i_clk" num_pins="1"/>
-          <output name="flit_out_one" num_pins="32"/>
-          <output name="flit_out_two" num_pins="32"/>
-          <!-- Defining some timing constrains of the router (for now we are going to mimic the memory blocks) -->
-          <T_setup value="509e-12" port="noc_router_basic.flit_in_one" clock="i_clk"/>
-          <T_setup value="509e-12" port="noc_router_basic.flit_in_two" clock="i_clk"/>
-          <T_setup value="509e-12" port="noc_router_basic.flit_out_one" clock="i_clk"/>
-          <T_setup value="509e-12" port="noc_router_basic.flit_out_two" clock="i_clk"/>
+      <mode name="noc_router_adapter_basic">
+        <pb_type name="noc_router_adapter_basic" blif_model=".subckt noc_router_adapter_block" num_pb="1">
+          <!-- Pin sizes can change, if they do then it should be updated here as well.-->
+		  <!-- Master Interface-->
+		  <input name="master_tready" num_pins="1"/>
+		  <!-- Slave Interface-->
+		  <input name="slave_tvalid" num_pins="1"/>
+		  <input name="slave_tdata" num_pins="32"/>
+		  <input name="slave_tstrb" num_pins="4"/>
+		  <input name="slave_tkeep" num_pins="4"/>
+		  <input name="slave_tid" num_pins="8"/>
+		  <input name="slave_tdest" num_pins="8"/>
+		  <input name="slave_tuser" num_pins="8"/>
+		  <input name="slave_tlast" num_pins="1"/>
+		  <!-- clk signal-->
+		  <clock name="clk" num_pins="1"/>
+		  <!-- reset signal-->
+		  <input name="reset" num_pins="1"/>
+		  <!-- Master Interface-->
+		  <output name="master_tvalid" num_pins="1"/>
+		  <output name="master_tdata" num_pins="32"/>
+		  <output name="master_tstrb" num_pins="4"/>
+		  <output name="master_tkeep" num_pins="4"/>
+		  <output name="master_tid" num_pins="8"/>
+		  <output name="master_tdest" num_pins="8"/>
+		  <output name="master_tuser" num_pins="8"/>
+		  <output name="master_tlast" num_pins="1"/>
+		  <!-- Slave Interface-->
+		  <output name="slave_tready" num_pins="1"/>
+          <!-- Defining some timing constrains of the router (for now we are going to mimic the dffeas block) -->
+		  <T_setup value="66e-12" port="noc_router_adapter_basic.reset" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.master_tready" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tvalid" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tdata" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tstrb" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tkeep" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tid" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tdest" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tuser" clock="clk"/>
+          <T_setup value="66e-12" port="noc_router_adapter_basic.slave_tlast" clock="clk"/>
           
-          <T_hold value="238e-12" port="noc_router_basic.flit_in_one" clock="i_clk"/>
-          <T_hold value="238e-12" port="noc_router_basic.flit_in_two" clock="i_clk"/>
-
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="noc_router_basic.flit_out_one" clock="i_clk"/>
-          <T_clock_to_Q max="1.234e-9" min="1.196e-9" port="noc_router_basic.flit_out_two" clock="i_clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.reset" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.master_tready" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tvalid" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tdata" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tstrb" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tkeep" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tid" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tdest" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tuser" clock="clk"/>
+          <T_hold value="37e-12" port="noc_router_adapter_basic.slave_tlast" clock="clk"/>
           
-          <!-- delays we dont need
-          <delay_constant max="1.523e-9" in_port="noc_router_basic.id" out_port="noc_router_basic.sc_flit"/>
-          <delay_constant max="1.523e-9" in_port="noc_router_basic.payload" out_port="noc_router_basic.sc_flit"/>
-          <delay_constant max="1.523e-9" in_port="noc_router_basic.ctrl" out_port="noc_router_basic.sc_flit"/>
-          <delay_constant max="1.523e-9" in_port="noc_router_basic.valid" out_port="noc_router_basic.sc_flit"/>
-          <delay_constant max="1.523e-9" in_port="noc_router_basic.type" out_port="noc_router_basic.sc_flit"/>  -->
+          
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.slave_tready" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tvalid" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tdata" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tstrb" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tkeep" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tid" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tdest" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tuser" clock="clk"/>
+          <T_clock_to_Q max="42e-12" min="37e-12" port="noc_router_adapter_basic.master_tlast" clock="clk"/>
         </pb_type>
         <interconnect>
           <!-- Not adding any wire delay for now-->
-          <direct name="flit_in_one_connection" input="noc_router.flit_in_one" output="noc_router_basic.flit_in_one">
+          <direct name="master_reset_connection" input="noc_router_adapter.reset" output="noc_router_adapter_basic.reset">
+          </direct>
+          <direct name="master_tready_connection" input="noc_router_adapter.master_tready" output="noc_router_adapter_basic.master_tready">
+          </direct>
+          <direct name="slave_tvalid_connection" input="noc_router_adapter.slave_tvalid" output="noc_router_adapter_basic.slave_tvalid">
+          </direct>
+          <direct name="slave_tdata_connection" input="noc_router_adapter.slave_tdata" output="noc_router_adapter_basic.slave_tdata">
+          </direct>
+          <direct name="slave_tstrb_connection" input="noc_router_adapter.slave_tstrb" output="noc_router_adapter_basic.slave_tstrb">
+          </direct>
+          <direct name="slave_tkeep_connection" input="noc_router_adapter.slave_tkeep" output="noc_router_adapter_basic.slave_tkeep">
+          </direct>
+          <direct name="slave_tid_connection" input="noc_router_adapter.slave_tid" output="noc_router_adapter_basic.slave_tid">
+          </direct>
+          <direct name="slave_tdest_connection" input="noc_router_adapter.slave_tdest" output="noc_router_adapter_basic.slave_tdest">
+          </direct>
+          <direct name="slave_tuser_connection" input="noc_router_adapter.slave_tuser" output="noc_router_adapter_basic.slave_tuser">
+          </direct>
+          <direct name="slave_tlast_connection" input="noc_router_adapter.slave_tlast" output="noc_router_adapter_basic.slave_tlast">
+          </direct>
+          
+          
+          <direct name="slave_tready_connection" input="noc_router_adapter_basic.slave_tready" output="noc_router_adapter.slave_tready">
+          </direct>
+          <direct name="master_tvalid_connection" input="noc_router_adapter_basic.master_tvalid" output="noc_router_adapter.master_tvalid">
+          </direct>
+		  <direct name="master_tdata_connection" input="noc_router_adapter_basic.master_tdata" output="noc_router_adapter.master_tdata">
+          </direct>
+          <direct name="master_tstrb_connection" input="noc_router_adapter_basic.master_tstrb" output="noc_router_adapter.master_tstrb">
+          </direct>
+          <direct name="master_tkeep_connection" input="noc_router_adapter_basic.master_tkeep" output="noc_router_adapter.master_tkeep">
+          </direct>
+          <direct name="master_tid_connection" input="noc_router_adapter_basic.master_tid" output="noc_router_adapter.master_tid">
           </direct>
-           <direct name="flit_in_two_connection" input="noc_router.flit_in_two" output="noc_router_basic.flit_in_two">
+          <direct name="master_tdest_connection" input="noc_router_adapter_basic.master_tdest" output="noc_router_adapter.master_tdest">
           </direct>
-          <direct name="flit_out_one_connection" input="noc_router_basic.flit_out_one" output="noc_router.flit_out_one">
+          <direct name="master_tuser_connection" input="noc_router_adapter_basic.master_tuser" output="noc_router_adapter.master_tuser">
           </direct>
-          <direct name="flit_out_two_connection" input="noc_router_basic.flit_out_two" output="noc_router.flit_out_two">
+          <direct name="master_tlast_connection" input="noc_router_adapter_basic.master_tlast" output="noc_router_adapter.master_tlast">
           </direct>
-          <direct name="clock_connection" input="noc_router.i_clk" output="noc_router_basic.i_clk">
+          <direct name="clock_connection" input="noc_router_adapter.clk" output="noc_router_adapter_basic.clk">
           </direct>  
         </interconnect>
       </mode>
@@ -48312,7 +48433,7 @@
     </switchblock>
   </switchblocklist>
   <!-- The NoC routers were added to the FPGA device in the fixed layout section-->
-  <noc link_bandwidth="12.7" router_latency="11.1" link_latency="13.4" noc_router_tile_name="noc_router">
-	  <mesh startx="2" starty="2" endx="78" endy="62" size="3"/>
+  <noc link_bandwidth="1e6" router_latency="1e-9" link_latency="1e-9" noc_router_tile_name="noc_router_adapter">
+	  <mesh startx="2" starty="2" endx="220" endy="162" size="10"/>
   </noc>
 </architecture>
diff --git a/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml b/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml
deleted file mode 100644
index 280ac485dce..00000000000
--- a/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml
+++ /dev/null
@@ -1,264 +0,0 @@
-<!-- This architecture definition represents a simplified version of a SLICEM site -->
-<architecture xmlns:xi="http://www.w3.org/2001/XInclude">
-  <models>
-    <model name="IO_0">
-      <input_ports>
-        <port name="in" combinational_sink_ports="out"/>
-        <port name="enable" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="IO_1">
-      <input_ports>
-        <port name="in" combinational_sink_ports="out"/>
-        <port name="enable" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="IO_2">
-      <input_ports>
-        <port name="in" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-    <model name="IO_3">
-      <input_ports>
-        <port name="in" combinational_sink_ports="out"/>
-      </input_ports>
-      <output_ports>
-        <port name="out"/>
-      </output_ports>
-    </model>
-  </models>
-  <tiles>
-    <tile name="io_tile">
-      <sub_tile name="io_tile_0">
-        <input name="in" num_pins="1"/>
-        <output name="out" num_pins="1"/>
-        <equivalent_sites>
-          <site pb_type="io_block" pin_mapping="custom">
-            <direct from="io_tile_0.in" to="io_block.in"/>
-            <direct from="io_tile_0.out" to="io_block.out"/>
-          </site>
-        </equivalent_sites>
-        <pinlocations pattern="custom">
-          <loc side="top" xoffset="0" yoffset="0">io_tile_0.in io_tile_0.out</loc>
-          <loc side="left" xoffset="0" yoffset="0">io_tile_0.in io_tile_0.out</loc>
-          <loc side="bottom" xoffset="0" yoffset="0">io_tile_0.in io_tile_0.out</loc>
-          <loc side="right" xoffset="0" yoffset="0">io_tile_0.in io_tile_0.out</loc>
-        </pinlocations>
-        <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
-      </sub_tile>
-      <sub_tile name="io_tile_1" capacity="10">
-        <input name="in" num_pins="1"/>
-        <output name="out" num_pins="1"/>
-        <equivalent_sites>
-          <site pb_type="io_block" pin_mapping="custom">
-            <direct from="io_tile_1.in" to="io_block.in"/>
-            <direct from="io_tile_1.out" to="io_block.out"/>
-          </site>
-        </equivalent_sites>
-        <pinlocations pattern="custom">
-          <loc side="top" xoffset="0" yoffset="0">io_tile_1.in io_tile_1.out</loc>
-          <loc side="left" xoffset="0" yoffset="0">io_tile_1.in io_tile_1.out</loc>
-          <loc side="bottom" xoffset="0" yoffset="0">io_tile_1.in io_tile_1.out</loc>
-          <loc side="right" xoffset="0" yoffset="0">io_tile_1.in io_tile_1.out</loc>
-        </pinlocations>
-        <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
-      </sub_tile>
-      <sub_tile name="io_tile_2" capacity="54">
-        <input name="in" num_pins="1"/>
-        <output name="out" num_pins="1"/>
-        <equivalent_sites>
-          <site pb_type="io_block" pin_mapping="custom">
-            <direct from="io_tile_2.in" to="io_block.in"/>
-            <direct from="io_tile_2.out" to="io_block.out"/>
-          </site>
-        </equivalent_sites>
-        <pinlocations pattern="custom">
-          <loc side="top" xoffset="0" yoffset="0">io_tile_2.in io_tile_2.out</loc>
-          <loc side="left" xoffset="0" yoffset="0">io_tile_2.in io_tile_2.out</loc>
-          <loc side="bottom" xoffset="0" yoffset="0">io_tile_2.in io_tile_2.out</loc>
-          <loc side="right" xoffset="0" yoffset="0">io_tile_2.in io_tile_2.out</loc>
-        </pinlocations>
-        <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
-      </sub_tile>
-    </tile>
-    <tile name="pass_through_tile">
-      <sub_tile name="pass_through_tile_0" capacity="10">
-        <input name="in" num_pins="1"/>
-        <output name="out" num_pins="1"/>
-        <equivalent_sites>
-          <site pb_type="io_site_2" pin_mapping="custom">
-            <direct from="pass_through_tile_0.in" to="io_site_2.in"/>
-            <direct from="pass_through_tile_0.out" to="io_site_2.out"/>
-          </site>
-          <site pb_type="io_site_3" pin_mapping="custom">
-            <direct from="pass_through_tile_0.in" to="io_site_3.in"/>
-            <direct from="pass_through_tile_0.out" to="io_site_3.out"/>
-          </site>
-        </equivalent_sites>
-        <pinlocations pattern="custom">
-          <loc side="top" xoffset="0" yoffset="0">pass_through_tile_0.in pass_through_tile_0.out</loc>
-          <loc side="left" xoffset="0" yoffset="0">pass_through_tile_0.in pass_through_tile_0.out</loc>
-          <loc side="bottom" xoffset="0" yoffset="0">pass_through_tile_0.in pass_through_tile_0.out</loc>
-          <loc side="right" xoffset="0" yoffset="0">pass_through_tile_0.in pass_through_tile_0.out</loc>
-        </pinlocations>
-        <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
-      </sub_tile>
-      <sub_tile name="pass_through_tile_1" capacity="43">
-        <input name="in" num_pins="1"/>
-        <input name="enable" num_pins="1"/>
-        <output name="out" num_pins="1"/>
-        <equivalent_sites>
-          <site pb_type="io_site_0" pin_mapping="custom">
-            <direct from="pass_through_tile_1.in" to="io_site_0.in"/>
-            <direct from="pass_through_tile_1.enable" to="io_site_0.enable"/>
-            <direct from="pass_through_tile_1.out" to="io_site_0.out"/>
-          </site>
-          <site pb_type="io_site_1" pin_mapping="custom">
-            <direct from="pass_through_tile_1.in" to="io_site_1.in"/>
-            <direct from="pass_through_tile_1.enable" to="io_site_1.enable"/>
-            <direct from="pass_through_tile_1.out" to="io_site_1.out"/>
-          </site>
-        </equivalent_sites>
-        <pinlocations pattern="custom">
-          <loc side="top" xoffset="0" yoffset="0">pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out</loc>
-          <loc side="left" xoffset="0" yoffset="0">pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out</loc>
-          <loc side="bottom" xoffset="0" yoffset="0">pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out</loc>
-          <loc side="right" xoffset="0" yoffset="0">pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out</loc>
-        </pinlocations>
-        <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
-      </sub_tile>
-    </tile>
-  </tiles>
-  <complexblocklist>
-    <pb_type name="io_block">
-      <input name="in" num_pins="1"/>
-      <output name="out" num_pins="1"/>
-      <mode name="IN">
-        <pb_type blif_model=".input" name="input_0" num_pb="1">
-          <output name="inpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct input="input_0.inpad" name="i_0" output="io_block.out"/>
-        </interconnect>
-      </mode>
-      <mode name="OUT">
-        <pb_type blif_model=".output" name="output_0" num_pb="1">
-          <input name="outpad" num_pins="1"/>
-        </pb_type>
-        <interconnect>
-          <direct input="io_block.in" name="o_0" output="output_0.outpad"/>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <pb_type name="io_site_0">
-      <input name="in" num_pins="1"/>
-      <input name="enable" num_pins="1"/>
-      <output name="out" num_pins="1"/>
-      <mode name="INOUT">
-        <pb_type blif_model=".subckt IO_0" name="io_0" num_pb="1">
-          <input name="in" num_pins="1"/>
-          <input name="enable" num_pins="1"/>
-          <output name="out" num_pins="1"/>
-          <delay_constant max="1.667e-9" in_port="io_0.in" out_port="io_0.out"/>
-          <delay_constant max="1.667e-9" in_port="io_0.enable" out_port="io_0.out"/>
-        </pb_type>
-        <interconnect>
-          <mux input="io_site_0.in io_site_0.enable" name="i_0" output="io_0.in"/>
-          <direct input="io_0.out" name="o_0" output="io_site_0.out"/>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <pb_type name="io_site_1">
-      <input name="in" num_pins="1"/>
-      <input name="enable" num_pins="1"/>
-      <output name="out" num_pins="1"/>
-      <mode name="INOUT">
-        <pb_type blif_model=".subckt IO_1" name="io_1" num_pb="1">
-          <input name="in" num_pins="1"/>
-          <input name="enable" num_pins="1"/>
-          <output name="out" num_pins="1"/>
-          <delay_constant max="1.667e-9" in_port="io_1.in" out_port="io_1.out"/>
-          <delay_constant max="1.667e-9" in_port="io_1.enable" out_port="io_1.out"/>
-        </pb_type>
-        <interconnect>
-          <mux input="io_site_1.in io_site_1.enable" name="i_1" output="io_1.in"/>
-          <direct input="io_1.out" name="o_1" output="io_site_1.out"/>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <pb_type name="io_site_2">
-      <input name="in" num_pins="1"/>
-      <output name="out" num_pins="1"/>
-      <mode name="INOUT">
-        <pb_type blif_model=".subckt IO_2" name="io_2" num_pb="1">
-          <input name="in" num_pins="1"/>
-          <output name="out" num_pins="1"/>
-          <delay_constant max="1.667e-9" in_port="io_2.in" out_port="io_2.out"/>
-        </pb_type>
-        <interconnect>
-          <direct input="io_site_2.in" name="i_2" output="io_2.in"/>
-          <direct input="io_2.out" name="o_2" output="io_site_2.out"/>
-        </interconnect>
-      </mode>
-    </pb_type>
-    <pb_type name="io_site_3">
-      <input name="in" num_pins="1"/>
-      <output name="out" num_pins="1"/>
-      <mode name="INOUT">
-        <pb_type blif_model=".subckt IO_3" name="io_3" num_pb="1">
-          <input name="in" num_pins="1"/>
-          <output name="out" num_pins="1"/>
-          <delay_constant max="1.667e-9" in_port="io_3.in" out_port="io_3.out"/>
-        </pb_type>
-        <interconnect>
-          <direct input="io_site_3.in" name="i_3" output="io_3.in"/>
-          <direct input="io_3.out" name="o_3" output="io_site_3.out"/>
-        </interconnect>
-      </mode>
-    </pb_type>
-  </complexblocklist>
-  <layout>
-    <fixed_layout name="3d_grid" width="3" height="3">
-        <layer die="0">
-            <fill type="pass_through_tile" priority="1"/>
-            <perimeter type="io_tile" priority="2"/>
-            <corners type="EMPTY" priority="3"/>
-        </layer>
-        <layer die="1">
-            <fill type="pass_through_tile" priority="1"/>
-            <perimeter type="io_tile" priority="2"/>
-            <corners type="EMPTY" priority="3"/>
-        </layer>
-    </fixed_layout>
-  </layout>
-  <device>
-    <sizing R_minW_nmos="6065.520020" R_minW_pmos="18138.500000"/>
-    <area grid_logic_tile_area="14813.392"/>
-    <connection_block input_switch_name="sw"/>
-    <switch_block fs="3" type="universal"/>
-    <chan_width_distr>
-      <x distr="uniform" peak="1.0"/>
-      <y distr="uniform" peak="1.0"/>
-    </chan_width_distr>
-  </device>
-  <switchlist>
-    <switch Cin=".77e-15" Cout="4e-15" R="1" Tdel="58e-12" buf_size="27.645901" mux_trans_size="2.630740" name="sw" type="mux"/>
-  </switchlist>
-  <segmentlist>
-    <segment Cmetal="22.5e-15" Rmetal="101" freq="1.0" name="wire" type="bidir" length="1">
-      <wire_switch name="sw"/>
-      <opin_switch name="sw"/>
-      <sb type="pattern">1 1</sb>
-      <cb type="pattern">1</cb>
-    </segment>
-  </segmentlist>
-</architecture>
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt
index 5743c2bbe74..5714a36569d 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt
@@ -11,6 +11,6 @@ regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/figure_8
 regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/multless_consts
 regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/open_cores
 regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/open_cores_frac
-regression_tests/vtr_reg_nightly_test1/symbiflow
+#regression_tests/vtr_reg_nightly_test1/symbiflow
 regression_tests/vtr_reg_nightly_test1/power_extended_arch_list
 regression_tests/vtr_reg_nightly_test1/power_extended_circuit_list
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt
index 436eece4174..c4ddf07fa25 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt
@@ -219,7 +219,7 @@ k6_frac_N8_22nm.xml	fir_nopipe_35.v	common	18.54	vpr	68.44 MiB		0.11	13884	-1	-1
 k6_frac_N8_22nm.xml	fir_nopipe_36.v	common	27.33	vpr	68.89 MiB		0.16	13980	-1	-1	13	2.08	-1	-1	40032	-1	-1	153	22	0	10	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	70548	22	19	3011	2724	1	2050	204	22	22	484	mult_36	auto	31.5 MiB	0.99	14453	68.9 MiB	0.85	0.01	13.7707	-751.494	-13.7707	13.7707	1.96	0.0058713	0.00525496	0.38135	0.334677	76	29166	47	1.29336e+07	6.02122e+06	2.20457e+06	4554.90	15.06	2.28366	2.01976	56682	573177	-1	24174	21	12121	23445	2974695	614218	0	0	2974695	614218	21988	13845	0	0	122333	110950	0	0	168746	134322	0	0	22790	15583	0	0	1310893	167979	0	0	1327945	171539	0	0	21988	0	0	9898	19163	19575	82292	1497	212	14.9018	14.9018	-1391.02	-14.9018	0	0	2.73077e+06	5642.09	0.92	0.68	0.57	-1	-1	0.92	0.223405	0.203602	1168	1801	-1	-1	-1	-1	
 k6_frac_N8_22nm.xml	fir_nopipe_37.v	common	20.49	vpr	70.30 MiB		0.16	14328	-1	-1	13	1.92	-1	-1	39096	-1	-1	158	22	0	11	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	71988	22	19	3132	2828	1	2123	210	24	24	576	mult_36	auto	32.1 MiB	0.83	15481	70.3 MiB	0.52	0.01	14.4868	-903.454	-14.4868	14.4868	1.47	0.00306847	0.00263064	0.207105	0.179949	74	29452	32	1.56141e+07	6.48458e+06	2.56259e+06	4448.94	8.43	1.17288	1.02856	66498	666725	-1	25337	21	11769	22731	2983928	599958	0	0	2983928	599958	21317	13737	0	0	114590	102878	0	0	157808	125386	0	0	21951	15152	0	0	1311616	173115	0	0	1356646	169690	0	0	21317	0	0	9577	19036	19669	80749	1448	22	14.8704	14.8704	-1523.2	-14.8704	0	0	3.19068e+06	5539.38	1.52	1.15	0.64	-1	-1	1.52	0.412289	0.377043	1192	1872	-1	-1	-1	-1	
 k6_frac_N8_22nm.xml	fir_nopipe_38.v	common	23.84	vpr	69.82 MiB		0.13	14456	-1	-1	13	2.29	-1	-1	39136	-1	-1	160	22	0	11	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	71496	22	19	3159	2855	1	2172	212	24	24	576	mult_36	auto	32.5 MiB	0.64	15337	69.8 MiB	0.44	0.01	14.4084	-940.203	-14.4084	14.4084	1.48	0.00302875	0.00265189	0.178818	0.156722	74	30259	35	1.56141e+07	6.51152e+06	2.56259e+06	4448.94	12.51	1.52698	1.35053	66498	666725	-1	25578	20	12078	22960	2674420	542857	0	0	2674420	542857	21228	13939	0	0	113706	101270	0	0	158343	125181	0	0	21777	15614	0	0	1174697	148697	0	0	1184669	138156	0	0	21228	0	0	9177	19896	21161	78183	1763	104	15.2386	15.2386	-1564.1	-15.2386	0	0	3.19068e+06	5539.38	1.40	0.63	0.66	-1	-1	1.40	0.210823	0.191438	1207	1880	-1	-1	-1	-1	
-k6_frac_N8_22nm.xml	fir_nopipe_39.v	common	28.11	vpr	70.57 MiB		0.17	14744	-1	-1	13	2.23	-1	-1	39360	-1	-1	169	22	0	11	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	72264	22	19	3284	2963	1	2259	221	24	24	576	mult_36	auto	33.3 MiB	1.01	16556	70.6 MiB	0.90	0.01	14.8416	-966.85	-14.8416	14.8416	1.59	0.00650907	0.00585699	0.393855	0.346342	76	32170	30	1.56141e+07	6.63277e+06	2.61600e+06	4541.67	14.41	2.18024	1.93456	67070	679911	-1	26884	21	13810	27680	3176058	652222	0	0	3176058	652222	25655	16135	0	0	137932	124280	0	0	192955	149921	0	0	26692	18056	0	0	1400623	174048	0	0	1392201	169782	0	0	25655	0	0	11873	26622	27157	100786	2051	376	15.4681	15.4681	-1620.41	-15.4681	0	0	3.24203e+06	5628.53	1.62	1.27	0.67	-1	-1	1.62	0.473982	0.431196	1267	1957	-1	-1	-1	-1	
+k6_frac_N8_22nm.xml	fir_nopipe_39.v	common	28.11	vpr	70.57 MiB		0.17	14744	-1	-1	13	2.23	-1	-1	39360	-1	-1	169	22	0	11	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	72264	22	19	3284	2963	1	2259	221	24	24	576	mult_36	auto	33.3 MiB	1.01	16556	70.6 MiB	0.90	0.01	14.8416	-966.85	-14.8416	14.8416	1.59	0.00650907	0.00585699	0.393855	0.346342	76	32170	30	1.56141e+07	6.63277e+06	2.61600e+06	4541.67	19	2.18024	1.93456	67070	679911	-1	26884	21	13810	27680	3176058	652222	0	0	3176058	652222	25655	16135	0	0	137932	124280	0	0	192955	149921	0	0	26692	18056	0	0	1400623	174048	0	0	1392201	169782	0	0	25655	0	0	11873	26622	27157	100786	2051	376	15.4681	15.4681	-1620.41	-15.4681	0	0	3.24203e+06	5628.53	1.62	1.27	0.67	-1	-1	1.62	0.473982	0.431196	1267	1957	-1	-1	-1	-1
 k6_frac_N8_22nm.xml	fir_nopipe_40.v	common	26.77	vpr	70.73 MiB		0.17	14836	-1	-1	13	2.00	-1	-1	39440	-1	-1	169	22	0	11	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	72424	22	19	3343	3022	1	2282	221	24	24	576	mult_36	auto	33.6 MiB	1.05	16103	70.7 MiB	0.58	0.01	14.5379	-829.329	-14.5379	14.5379	2.04	0.00343434	0.00300382	0.232702	0.203659	80	29222	36	1.56141e+07	6.63277e+06	2.72095e+06	4723.87	14.59	2.49836	2.21411	68798	719145	-1	25637	21	12426	23913	3552178	755815	0	0	3552178	755815	22411	14441	0	0	128209	114994	0	0	173105	138175	0	0	23278	16247	0	0	1607227	230429	0	0	1597948	241529	0	0	22411	0	0	10013	19456	19893	83655	1535	171	14.9564	14.9564	-1203.73	-14.9564	0	0	3.41546e+06	5929.62	1.10	0.91	0.59	-1	-1	1.10	0.2891	0.260412	1284	1997	-1	-1	-1	-1	
 k6_frac_N8_22nm.xml	fir_nopipe_41.v	common	37.06	vpr	71.28 MiB		0.18	15156	-1	-1	13	2.57	-1	-1	41032	-1	-1	175	22	0	12	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	72988	22	19	3448	3110	1	2364	228	24	24	576	mult_36	auto	34.2 MiB	1.07	17649	71.3 MiB	1.02	0.02	14.3188	-964.321	-14.3188	14.3188	2.43	0.00675759	0.00584811	0.444538	0.388722	78	32357	39	1.56141e+07	7.1096e+06	2.67122e+06	4637.53	22.91	3.03376	2.67828	68222	705597	-1	28453	21	13551	26328	3635002	733011	0	0	3635002	733011	24522	15496	0	0	141779	127416	0	0	197677	156706	0	0	25322	17215	0	0	1581279	210240	0	0	1664423	205938	0	0	24522	0	0	11003	24627	22161	93773	1842	81	14.7304	14.7304	-1614.39	-14.7304	0	0	3.35110e+06	5817.88	1.09	0.86	0.49	-1	-1	1.09	0.25542	0.230798	1333	2054	-1	-1	-1	-1	
 k6_frac_N8_22nm.xml	fir_nopipe_42.v	common	38.22	vpr	72.71 MiB		0.19	15256	-1	-1	13	2.42	-1	-1	41432	-1	-1	179	22	0	12	success	v8.0.0-7665-g5d69764bf	Release IPO VTR_ASSERT_LEVEL=2	GNU 9.4.0 on Linux-4.15.0-197-generic x86_64	2023-04-24T18:37:45	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks	74460	22	19	3510	3172	1	2403	232	24	24	576	mult_36	auto	34.7 MiB	0.87	18000	72.7 MiB	0.63	0.01	14.4441	-997.144	-14.4441	14.4441	1.61	0.00364618	0.0032181	0.254867	0.222427	78	33010	27	1.56141e+07	7.16349e+06	2.67122e+06	4637.53	24.63	3.03733	2.68228	68222	705597	-1	29079	20	13363	25835	3098611	622458	0	0	3098611	622458	24053	15542	0	0	131352	117386	0	0	185001	145637	0	0	24858	17574	0	0	1358194	166877	0	0	1375153	159442	0	0	24053	0	0	10717	23329	23655	92480	1844	32	14.9453	14.9453	-1699.6	-14.9453	0	0	3.35110e+06	5817.88	1.62	1.21	0.73	-1	-1	1.62	0.445071	0.40401	1352	2097	-1	-1	-1	-1	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt
index 82389f84a1e..54defeb95f6 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt
@@ -11,6 +11,6 @@ regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac
 regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/multless_consts
 regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/open_cores
 regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/open_cores_frac
-regression_tests/vtr_reg_nightly_test1_odin/symbiflow
+#regression_tests/vtr_reg_nightly_test1_odin/symbiflow
 regression_tests/vtr_reg_nightly_test1_odin/power_extended_arch_list
 regression_tests/vtr_reg_nightly_test1_odin/power_extended_circuit_list
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt
new file mode 100644
index 00000000000..2a773936a9a
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt
@@ -0,0 +1,34 @@
+##############################################
+# Configuration file for running experiments
+##############################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_clique/
+
+# Path to directory of architectures to use
+archs_dir=arch/multi_die/
+
+# Path to directory of NoC Traffic Patterns to use
+noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_clique
+
+# Add circuits to list to sweep
+circuit_list_add=complex_64_noc_clique.blif
+
+# Add architectures to list to sweep
+arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
+
+# Add NoC Traffic Patterns to list to sweep
+noc_traffic_list_add=complex_64_noc_clique.flows
+
+# Parse info and how to parse
+parse_file=vpr_noc.txt
+
+# How to parse QoR info
+qor_parse_file=qor_noc_spec.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements.txt
+
+# Script parameters
+script_params =-starting_stage vpr  --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place
+
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt
new file mode 100644
index 00000000000..86934fc4bc0
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt
@@ -0,0 +1,2 @@
+arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	NoC_agg_bandwidth	NoC_latency	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_clique.blif	common	8722.02	vpr	7.77 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8148772	2	64	249332	210540	1	129121	8146	220	162	35640	-1	EP4SE820	2824.5 MiB	402.18	1227222	7957.8 MiB	792.01	4.20	6.60816	-853447	-6.60816	6.60816	2267.92	0.667678	0.54378	90.027	73.7401	154	1426225	49	0	0	3.59543e+08	10088.2	4276.17	411.681	346.038	1425419	20	357462	849967	447693681	43661832	7.19548	7.19548	-1.04483e+06	-7.19548	0	0	4.57197e+08	12828.2	417.73	79.91	33.4499	29.4545	8.4624e+09	8.0592e-05	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt
new file mode 100644
index 00000000000..e08d2586bc5
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt
@@ -0,0 +1,34 @@
+##############################################
+# Configuration file for running experiments
+##############################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_nearest_neighbor/
+
+# Path to directory of architectures to use
+archs_dir=arch/multi_die/
+
+# Path to directory of NoC Traffic Patterns to use
+noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_nearest_neighbor
+
+# Add circuits to list to sweep
+circuit_list_add=complex_64_noc_nearest_neighbor.blif
+
+# Add architectures to list to sweep
+arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
+
+# Add NoC Traffic Patterns to list to sweep
+noc_traffic_list_add=complex_64_noc_nearest_neighbor.flows
+
+# Parse info and how to parse
+parse_file=vpr_noc.txt
+
+# How to parse QoR info
+qor_parse_file=qor_noc_spec.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements.txt
+
+# Script parameters
+script_params =-starting_stage vpr  --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place
+
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt
new file mode 100644
index 00000000000..7bfc23cd7a9
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt
@@ -0,0 +1,2 @@
+arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	NoC_agg_bandwidth	NoC_latency	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_2D_chain.blif	common	8560.06	vpr	7.77 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-wintermute.eecg.utoronto.ca	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8144244	2	32	245317	207097	1	127846	7926	220	162	35640	-1	EP4SE820	2807.4 MiB	400.67	1238130	7953.4 MiB	769.07	5.03	6.71786	-823307	-6.71786	6.71786	2196.38	0.600359	0.532866	91.0284	76.9373	154	1432666	41	0	0	3.59543e+08	10088.2	4213.30	388.018	328.35	1435190	17	353532	839730	444668516	43599148	7.3303	7.3303	-1.03553e+06	-7.3303	0	0	4.57197e+08	12828.2	437.18	75.13	30.7833	27.281	7.4e+07	6.28e-07	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt
new file mode 100644
index 00000000000..83f46463453
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt
@@ -0,0 +1,38 @@
+##############################################
+# Configuration file for running experiments
+##############################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_star/
+
+# Path to directory of architectures to use
+archs_dir=arch/multi_die/
+
+# Path to directory of NoC Traffic Patterns to use
+noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_star/
+
+# Add circuits to list to sweep
+circuit_list_add=complex_64_noc_star.blif
+
+
+# Add architectures to list to sweep
+arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
+
+# Add NoC Traffic Patterns to list to sweep
+noc_traffic_list_add=complex_64_noc_star_no_constraints.flows
+noc_traffic_list_add=complex_64_noc_star_2_bandwidths.flows
+noc_traffic_list_add=complex_64_noc_star_6_bandwidths.flows
+noc_traffic_list_add=complex_64_noc_star_24_latency_constraints.flows
+noc_traffic_list_add=complex_64_noc_star_63_latency_constraints.flows
+
+# Parse info and how to parse
+parse_file=vpr_noc.txt
+
+# How to parse QoR info
+qor_parse_file=qor_noc_spec.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements.txt
+
+# Script parameters
+script_params_common =-starting_stage vpr --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt
new file mode 100644
index 00000000000..785e33bf66a
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt
@@ -0,0 +1,6 @@
+arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	NoC_agg_bandwidth	NoC_latency	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_star.blif	common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_no_constraints.flows	9220.99	vpr	7.73 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-pchenry	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8108432	2	32	239118	200960	1	125464	7868	220	162	35640	-1	EP4SE820	2747.6 MiB	451.61	1045127	7918.4 MiB	842.47	4.96	7.29224	-793365	-7.29224	7.29224	2452.84	0.655239	0.571814	107.218	87.7599	154	1195521	42	0	0	3.59543e+08	10088.2	4553.02	410.733	344.98	1194844	18	337553	761991	386559130	36912927	7.62869	7.62869	-982536	-7.62869	0	0	4.57197e+08	12828.2	372.54	65.96	28.4745	25.3252	9.6e+07	5.43e-07	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_star.blif	common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_2_bandwidths.flows	9063.36	vpr	7.73 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-pchenry	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8108084	2	32	239118	200960	1	125464	7868	220	162	35640	-1	EP4SE820	2747.4 MiB	442.55	1135803	7918.1 MiB	787.79	4.52	6.81554	-811789	-6.81554	6.81554	2376.12	0.650221	0.521368	100.126	80.0299	156	1282477	29	0	0	3.63383e+08	10195.9	4524.00	344.409	286.954	1284251	18	340249	763996	395948352	37975896	7.19262	7.19262	-1.01766e+06	-7.19262	0	0	4.60857e+08	12930.9	374.68	69.31	29.5326	26.1693	1.056e+08	5.51e-07	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_star.blif	common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_6_bandwidths.flows	8608.75	vpr	7.73 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-pchenry	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8108236	2	32	239118	200960	1	125464	7868	220	162	35640	-1	EP4SE820	2747.7 MiB	426.92	1113432	7918.2 MiB	824.40	4.65	7.74124	-815509	-7.74124	7.74124	2331.45	0.653023	0.521789	103.46	83.6904	156	1266529	23	0	0	3.63383e+08	10195.9	4077.60	341.899	286.116	1270380	18	332509	742781	383975630	36523096	8.09967	8.09967	-988125	-8.09967	0	0	4.60857e+08	12930.9	407.61	68.56	30.06	26.6489	2.636e+08	5.51e-07	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_star.blif	common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_24_latency_constraints.flows	9546.78	vpr	7.73 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-pchenry	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8108132	2	32	239118	200960	1	125464	7868	220	162	35640	-1	EP4SE820	2747.6 MiB	423.91	1068621	7918.1 MiB	811.89	4.04	7.72717	-803754	-7.72717	7.72717	2426.52	0.710002	0.571345	102.84	83.4583	154	1224191	48	0	0	3.59543e+08	10088.2	5008.21	405.36	340.486	1223376	17	334021	746747	382394575	36624473	7.87541	7.87541	-971559	-7.87541	0	0	4.57197e+08	12828.2	337.96	63.14	27.0181	24.0375	9.6e+07	5.43e-07	
+stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml	complex_64_noc_star.blif	common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_63_latency_constraints.flows	8773.66	vpr	7.73 GiB		-1	2	-1	-1	success	v8.0.0-6827-g874e0cb8d-dirty	release IPO VTR_ASSERT_LEVEL=2	GNU 7.5.0 on Linux-4.15.0-167-generic x86_64	2023-01-19T13:42:08	betzgrp-pchenry	/home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks	8108316	2	32	239118	200960	1	125464	7868	220	162	35640	-1	EP4SE820	2747.6 MiB	429.96	1059490	7918.3 MiB	784.75	4.56	7.39441	-807678	-7.39441	7.39441	2390.87	0.606083	0.527822	95.4872	77.8918	156	1207682	34	0	0	3.63383e+08	10195.9	4234.99	357.12	300.047	1211566	17	332447	741871	381156942	36122392	7.96259	7.96259	-1.01178e+06	-7.96259	0	0	4.60857e+08	12930.9	391.24	65.60	28.0846	24.9773	9.6e+07	5.49e-07