From 88850508b74f63b9e0a24d326693e7c0348ac3ff Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:53:38 +0200 Subject: [PATCH 01/41] Add connection box lookahead. Signed-off-by: Alessandro Comodi --- libs/libvtrcapnproto/CMakeLists.txt | 1 + libs/libvtrcapnproto/connection_map.capnp | 19 + vpr/src/base/echo_files.cpp | 2 + vpr/src/base/echo_files.h | 1 + vpr/src/route/connection_box.cpp | 145 +++++ vpr/src/route/connection_box.h | 80 +++ .../route/connection_box_lookahead_map.cpp | 583 ++++++++++++++++++ vpr/src/route/connection_box_lookahead_map.h | 17 + vpr/src/route/router_lookahead_map_utils.cpp | 192 ++++++ vpr/src/route/router_lookahead_map_utils.h | 144 +++++ vpr/src/route/rr_graph_storage.h | 2 +- 11 files changed, 1185 insertions(+), 1 deletion(-) create mode 100644 libs/libvtrcapnproto/connection_map.capnp create mode 100644 vpr/src/route/connection_box.cpp create mode 100644 vpr/src/route/connection_box.h create mode 100644 vpr/src/route/connection_box_lookahead_map.cpp create mode 100644 vpr/src/route/connection_box_lookahead_map.h create mode 100644 vpr/src/route/router_lookahead_map_utils.cpp create mode 100644 vpr/src/route/router_lookahead_map_utils.h diff --git a/libs/libvtrcapnproto/CMakeLists.txt b/libs/libvtrcapnproto/CMakeLists.txt index 91a77b08014..d0d50dd50e9 100644 --- a/libs/libvtrcapnproto/CMakeLists.txt +++ b/libs/libvtrcapnproto/CMakeLists.txt @@ -19,6 +19,7 @@ endif() # Each schema used should appear here. set(CAPNP_DEFS place_delay_model.capnp + connection_map.capnp matrix.capnp gen/rr_graph_uxsdcxx.capnp map_lookahead.capnp diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp new file mode 100644 index 00000000000..ac03ddfc9c0 --- /dev/null +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -0,0 +1,19 @@ +@0x876ec83c2fea5a18; + +using Matrix = import "matrix.capnp"; + +struct VprCostEntry { + delay @0 :Float32; + congestion @1 :Float32; +} + +struct VprVector2D { + x @0 :Int64; + y @1 :Int64; +} + +struct VprCostMap { + costMap @0 :List(Matrix.Matrix(VprCostEntry)); + offset @1 :List(VprVector2D); + segmentMap @2 :List(Int64); +} diff --git a/vpr/src/base/echo_files.cpp b/vpr/src/base/echo_files.cpp index 6f8de45933b..cd4e2566f9b 100644 --- a/vpr/src/base/echo_files.cpp +++ b/vpr/src/base/echo_files.cpp @@ -114,6 +114,8 @@ void alloc_and_load_echo_file_info() { setEchoFileName(E_ECHO_CHAN_DETAILS, "chan_details.txt"); setEchoFileName(E_ECHO_SBLOCK_PATTERN, "sblock_pattern.txt"); setEchoFileName(E_ECHO_ENDPOINT_TIMING, "endpoint_timing.echo.json"); + + setEchoFileName(E_ECHO_LOOKAHEAD_MAP, "lookahead_map.echo"); } void free_echo_file_info() { diff --git a/vpr/src/base/echo_files.h b/vpr/src/base/echo_files.h index 70df3a3d962..d273c575d50 100644 --- a/vpr/src/base/echo_files.h +++ b/vpr/src/base/echo_files.h @@ -46,6 +46,7 @@ enum e_echo_files { E_ECHO_CHAN_DETAILS, E_ECHO_SBLOCK_PATTERN, E_ECHO_ENDPOINT_TIMING, + E_ECHO_LOOKAHEAD_MAP, //Timing Graphs E_ECHO_PRE_PACKING_TIMING_GRAPH, diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp new file mode 100644 index 00000000000..1bee129f0d6 --- /dev/null +++ b/vpr/src/route/connection_box.cpp @@ -0,0 +1,145 @@ +#include "connection_box.h" +#include "vtr_assert.h" +#include "globals.h" + +ConnectionBoxes::ConnectionBoxes() + : size_(std::make_pair(0, 0)) { +} + +size_t ConnectionBoxes::num_connection_box_types() const { + return boxes_.size(); +} + +std::pair ConnectionBoxes::connection_box_grid_size() const { + return size_; +} + +const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const { + if (!bool(box)) { + return nullptr; + } + + size_t index = size_t(box); + if (index >= boxes_.size()) { + return nullptr; + } + + return &boxes_.at(index); +} + +bool ConnectionBoxes::find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location, + float* site_pin_delay) const { + VTR_ASSERT(box_id != nullptr); + VTR_ASSERT(box_location != nullptr); + + if (inode >= (ssize_t)ipin_map_.size()) { + return false; + } + + const auto& conn_box_loc = ipin_map_[inode]; + if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { + return false; + } + + *box_id = conn_box_loc.box_id; + *box_location = conn_box_loc.box_location; + *site_pin_delay = conn_box_loc.site_pin_delay; + return true; +} + +// Clear IPIN map and set connection box grid size and box ids. +void ConnectionBoxes::reset_boxes(std::pair size, + const std::vector boxes) { + clear(); + + size_ = size; + boxes_ = boxes; +} + +void ConnectionBoxes::resize_nodes(size_t rr_node_size) { + ipin_map_.resize(rr_node_size); + ipin_map_.shrink_to_fit(); + canonical_loc_map_.resize(rr_node_size, + std::make_pair(-1, -1)); + canonical_loc_map_.shrink_to_fit(); +} + +void ConnectionBoxes::clear() { + ipin_map_.clear(); + size_ = std::make_pair(0, 0); + boxes_.clear(); + canonical_loc_map_.clear(); + sink_to_ipin_.clear(); +} + +void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay) { + // Ensure that box location is in bounds + VTR_ASSERT(box_location.first < size_.first); + VTR_ASSERT(box_location.second < size_.second); + + // Bounds check box_id + VTR_ASSERT(bool(box_id)); + VTR_ASSERT(size_t(box_id) < boxes_.size()); + + // Make sure sink map will not be invalidated upon insertion. + VTR_ASSERT(sink_to_ipin_.size() == 0); + + if (inode >= (ssize_t)(ipin_map_.size())) { + ipin_map_.resize(inode + 1); + } + ipin_map_[inode] = ConnBoxLoc(box_location, site_pin_delay, box_id); +} + +void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { + VTR_ASSERT(loc.first < size_.first); + VTR_ASSERT(loc.second < size_.second); + if (inode >= (ssize_t)(canonical_loc_map_.size())) { + canonical_loc_map_.resize(inode + 1); + } + canonical_loc_map_[inode] = loc; +} + +const std::pair* ConnectionBoxes::find_canonical_loc(int inode) const { + if (inode >= (ssize_t)canonical_loc_map_.size()) { + return nullptr; + } + + const auto& canon_loc = canonical_loc_map_[inode]; + if (canon_loc.first == size_t(-1)) { + return nullptr; + } + + return &canon_loc; +} + +void ConnectionBoxes::create_sink_back_ref() { + const auto& device_ctx = g_vpr_ctx.device(); + + sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); + + for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) { + const auto& ipin_node = device_ctx.rr_nodes[i]; + if (ipin_node.type() != IPIN) { + continue; + } + + if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) { + continue; + } + + for (auto edge : ipin_node.edges()) { + int sink_inode = ipin_node.edge_sink_node(edge); + VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK); + VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4); + auto& sink_to_ipin = sink_to_ipin_[sink_inode]; + sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i; + } + } +} + +const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes( + int inode) const { + return sink_to_ipin_[inode]; +} diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h new file mode 100644 index 00000000000..25fd0ea422c --- /dev/null +++ b/vpr/src/route/connection_box.h @@ -0,0 +1,80 @@ +#ifndef CONNECTION_BOX_H +#define CONNECTION_BOX_H +// Some routing graphs have connectivity driven by types of connection boxes. +// This class relates IPIN rr nodes with connection box type and locations, used +// for connection box driven map lookahead. + +#include +#include "vtr_strong_id.h" +#include "vtr_flat_map.h" +#include "vtr_range.h" +#include + +struct connection_box_tag {}; +typedef vtr::StrongId ConnectionBoxId; + +struct ConnectionBox { + std::string name; +}; + +struct ConnBoxLoc { + ConnBoxLoc() + : box_location(std::make_pair(-1, -1)) {} + ConnBoxLoc( + const std::pair& a_box_location, + float a_site_pin_delay, + ConnectionBoxId a_box_id) + : box_location(a_box_location) + , box_id(a_box_id) + , site_pin_delay(a_site_pin_delay) {} + + std::pair box_location; + ConnectionBoxId box_id; + float site_pin_delay; +}; + +struct SinkToIpin { + int ipin_nodes[4]; + int ipin_count; +}; + +class ConnectionBoxes { + public: + ConnectionBoxes(); + + size_t num_connection_box_types() const; + std::pair connection_box_grid_size() const; + const ConnectionBox* get_connection_box(ConnectionBoxId box) const; + + bool find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location, + float* site_pin_delay) const; + const std::pair* find_canonical_loc(int inode) const; + + // Clear IPIN map and set connection box grid size and box ids. + void clear(); + void reset_boxes(std::pair size, + const std::vector boxes); + void resize_nodes(size_t rr_node_size); + + void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay); + void add_canonical_loc(int inode, std::pair loc); + + // Create map from SINK's back to IPIN's + // + // This must be called after all connection boxes have been added. + void create_sink_back_ref(); + const SinkToIpin& find_sink_connection_boxes( + int inode) const; + + private: + std::pair size_; + std::vector boxes_; + std::vector ipin_map_; + std::vector sink_to_ipin_; + std::vector> + canonical_loc_map_; +}; + +#endif diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp new file mode 100644 index 00000000000..2b1b3b48c34 --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -0,0 +1,583 @@ +#include "connection_box_lookahead_map.h" + +#include +#include + +#include "connection_box.h" +#include "rr_node.h" +#include "router_lookahead_map_utils.h" +#include "globals.h" +#include "vtr_math.h" +#include "vtr_time.h" +#include "echo_files.h" + +#include "route_timing.h" + +#include "capnp/serialize.h" +#include "connection_map.capnp.h" +#include "ndmatrix_serdes.h" +#include "mmap_file.h" +#include "serdes_utils.h" +#include "route_common.h" + +/* we're profiling routing cost over many tracks for each wire type, so we'll + * have many cost entries at each |dx|,|dy| offset. There are many ways to + * "boil down" the many costs at each offset to a single entry for a given + * (wire type, chan_type) combination we can take the smallest cost, the + * average, median, etc. This define selects the method we use. + * + * See e_representative_entry_method */ +#define REPRESENTATIVE_ENTRY_METHOD SMALLEST + +#define REF_X 25 +#define REF_Y 23 + +static int signum(int x) { + if (x > 0) return 1; + if (x < 0) + return -1; + else + return 0; +} + +typedef std::vector, Cost_Entry>> t_routing_cost_map; +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* cost_map); + +class CostMap { + public: + void set_segment_count(size_t seg_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize(seg_count); + offset_.resize(seg_count); + + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; + + segment_map_[i] = from_seg_index; + } + } + + int node_to_segment(int from_node_ind) { + return segment_map_[from_node_ind]; + } + + Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + int dx = delta_x - offset_[from_seg_index].first; + int dy = delta_y - offset_[from_seg_index].second; + const auto& cost_map = cost_map_[from_seg_index]; + + if (dx < 0) { + dx = 0; + } + if (dy < 0) { + dy = 0; + } + + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } + + return cost_map_[from_seg_index][dx][dy]; + } + + void set_cost_map(int from_seg_index, + const t_routing_cost_map& cost_map, + e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + + // Find coordinate offset for this segment. + int min_dx = 0; + int min_dy = 0; + int max_dx = 0; + int max_dy = 0; + for (const auto& entry : cost_map) { + min_dx = std::min(entry.first.first, min_dx); + min_dy = std::min(entry.first.second, min_dy); + + max_dx = std::max(entry.first.first, max_dx); + max_dy = std::max(entry.first.second, max_dy); + } + + offset_[from_seg_index].first = min_dx; + offset_[from_seg_index].second = min_dy; + size_t dim_x = max_dx - min_dx + 1; + size_t dim_y = max_dy - min_dy + 1; + + vtr::NdMatrix expansion_cost_map( + {dim_x, dim_y}); + + for (const auto& entry : cost_map) { + int x = entry.first.first - min_dx; + int y = entry.first.second - min_dy; + expansion_cost_map[x][y].add_cost_entry( + method, entry.second.delay, + entry.second.congestion); + } + + cost_map_[from_seg_index] = vtr::NdMatrix( + {dim_x, dim_y}); + + /* set the lookahead cost map entries with a representative cost + * entry from routing_cost_map */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + } + } + + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + + if (!cost_entry.valid()) { + Cost_Entry copied_entry = get_nearby_cost_entry( + from_seg_index, + offset_[from_seg_index].first + ix, + offset_[from_seg_index].second + iy); + cost_map_[from_seg_index][ix][iy] = copied_entry; + } + } + } + } + + Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + /* compute the slope from x,y to 0,0 and then move towards 0,0 by one + * unit to get the coordinates of the cost entry to be copied */ + + float slope; + int copy_x, copy_y; + if (x == 0 || y == 0) { + slope = std::numeric_limits::infinity(); + copy_x = x - signum(x); + copy_y = y - signum(y); + } else { + slope = (float)y / (float)x; + if (slope >= 1.0) { + copy_y = y - signum(y); + copy_x = vtr::nint((float)y / slope); + } else { + copy_x = x - signum(x); + copy_y = vtr::nint((float)x * slope); + } + } + + Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + + /* if the entry to be copied is also empty, recurse */ + if (copy_entry.valid()) { + return copy_entry; + } else if (copy_x == 0 && copy_y == 0) { + return Cost_Entry(); + } + + return get_nearby_cost_entry(segment_index, copy_x, copy_y); + } + + void print_cost_map(const std::vector& segment_inf, + const char* fname) { + FILE* fp = vtr::fopen(fname, "w"); + for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { + fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), + iseg, + offset_[iseg].first, + offset_[iseg].second); + for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { + for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { + fprintf(fp, "%.4g,\t", + cost_map_[iseg][ix][iy].delay); + } + fprintf(fp, "\n"); + } + fprintf(fp, "\n\n"); + } + + fclose(fp); + } + + void read(const std::string& file); + void write(const std::string& file) const; + + private: + std::vector> cost_map_; + std::vector> offset_; + std::vector segment_map_; +}; + +static CostMap g_cost_map; + +class StartNode { + public: + StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) + : start_x_(start_x) + , start_y_(start_y) + , rr_type_(rr_type) + , seg_index_(seg_index) + , index_(0) {} + int get_next_node() { + const auto& device_ctx = g_vpr_ctx.device(); + const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + + for (; index_ < channel_node_list.size(); index_++) { + int node_ind = channel_node_list[index_]; + + if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { + continue; + } + + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + continue; + } + + int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); + int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; + if (node_seg_ind == seg_index_) { + index_ += 1; + return node_ind; + } + } + + return UNDEFINED; + } + + private: + int start_x_; + int start_y_; + t_rr_type rr_type_; + int seg_index_; + size_t index_; +}; + +// Minimum size of search for channels to profile. kMinProfile results +// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. +// +// Making this value larger will increase the sample size, but also the runtime +// to produce the lookahead. +static constexpr int kMinProfile = 1; + +// Maximum size of search for channels to profile. Once search is outside of +// kMinProfile distance, lookahead will stop searching once: +// - At least one channel has been profiled +// - kMaxProfile is exceeded. +static constexpr int kMaxProfile = 7; + +static void compute_connection_box_lookahead( + const std::vector& segment_inf) { + size_t num_segments = segment_inf.size(); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + /* free previous delay map and allocate new one */ + g_cost_map.set_segment_count(segment_inf.size()); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + t_routing_cost_map cost_map; + + int count = 0; + + int dx = 0; + int dy = 0; + //int start_x = vtr::nint(device_ctx.grid.width()/2); + //int start_y = vtr::nint(device_ctx.grid.height()/2); + int start_x = REF_X; + int start_y = REF_Y; + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, &cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + if (count == 0) { + VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", + segment_inf[iseg].name.c_str(), iseg); + } + + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + g_cost_map.set_cost_map(iseg, cost_map, + REPRESENTATIVE_ENTRY_METHOD); + } + + if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { + g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); + } +} + +static float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) { + if (from_node_ind == to_node_ind) { + return 0.f; + } + + auto& device_ctx = g_vpr_ctx.device(); + + std::pair from_location; + std::pair to_location; + auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); + + if (to_node_type == SINK) { + const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); + if (sink_to_ipin.ipin_count > 1) { + float cost = std::numeric_limits::infinity(); + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_connection_box_lookahead_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + } + + return cost; + } else if (sink_to_ipin.ipin_count == 1) { + to_node_ind = sink_to_ipin.ipin_nodes[0]; + if (from_node_ind == to_node_ind) { + return 0.f; + } + } else { + return std::numeric_limits::infinity(); + } + } + + if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); + } + + to_location = box_location; + } else { + const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); + if (!to_canonical_loc) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); + } + + to_location = *to_canonical_loc; + } + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", + from_node_ind, to_node_ind); + } + + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + + int from_seg_index = g_cost_map.node_to_segment(from_node_ind); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + float expected_delay = cost_entry.delay; + float expected_congestion = cost_entry.congestion; + + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + return expected_cost; +} + +/* runs Dijkstra's algorithm from specified node until all nodes have been + * visited. Each time a pin is visited, the delay/congestion information + * to that pin is stored to an entry in the routing_cost_map */ +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* routing_cost_map) { + auto& device_ctx = g_vpr_ctx.device(); + + /* a list of boolean flags (one for each rr node) to figure out if a + * certain node has already been expanded */ + std::vector node_expanded(device_ctx.rr_nodes.size(), false); + /* for each node keep a list of the cost with which that node has been + * visited (used to determine whether to push a candidate node onto the + * expansion queue */ + std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + /* a priority queue for expansion */ + std::priority_queue pq; + + /* first entry has no upstream delay or congestion */ + util::PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + pq.push(first_entry); + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } + + /* now do routing */ + while (!pq.empty()) { + util::PQ_Entry current = pq.top(); + pq.pop(); + + int node_ind = current.rr_node_ind; + + /* check that we haven't already expanded from this node */ + if (node_expanded[node_ind]) { + continue; + } + + /* if this node is an ipin record its congestion/delay in the routing_cost_map */ + if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + + routing_cost_map->push_back(std::make_pair( + std::make_pair(delta_x, delta_y), + Cost_Entry( + current.delay, + current.congestion_upstream))); + } + + expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); + node_expanded[node_ind] = true; + } +} + +void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { + compute_connection_box_lookahead(segment_inf); +} + +float ConnectionBoxMapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { + auto& device_ctx = g_vpr_ctx.device(); + + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + + if (rr_type == CHANX || rr_type == CHANY) { + return get_connection_box_lookahead_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); + } +} + +void ConnectionBoxMapLookahead::read(const std::string& file) { + g_cost_map.read(file); +} +void ConnectionBoxMapLookahead::write(const std::string& file) const { + g_cost_map.write(file); +} + +static void ToCostEntry(Cost_Entry* out, const VprCostEntry::Reader& in) { + out->delay = in.getDelay(); + out->congestion = in.getCongestion(); +} + +static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { + out->setDelay(in.delay); + out->setCongestion(in.congestion); +} + +void CostMap::read(const std::string& file) { + MmapFile f(file); + + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + auto cost_map = reader.getRoot(); + + { + const auto& segment_map = cost_map.getSegmentMap(); + segment_map_.resize(segment_map.size()); + auto dst_iter = segment_map_.begin(); + for (const auto& src : segment_map) { + *dst_iter++ = src; + } + } + + { + const auto& offset = cost_map.getOffset(); + offset_.resize(offset.size()); + auto dst_iter = offset_.begin(); + for (const auto& src : offset) { + *dst_iter++ = std::make_pair(src.getX(), src.getY()); + } + } + + { + const auto& cost_maps = cost_map.getCostMap(); + cost_map_.resize(cost_maps.size()); + auto dst_iter = cost_map_.begin(); + for (const auto& src : cost_maps) { + ToNdMatrix<2, VprCostEntry, Cost_Entry>(&(*dst_iter++), src, ToCostEntry); + } + } +} + +void CostMap::write(const std::string& file) const { + ::capnp::MallocMessageBuilder builder; + + auto cost_map = builder.initRoot(); + + { + auto segment_map = cost_map.initSegmentMap(segment_map_.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + segment_map.set(i, segment_map_[i]); + } + } + + { + auto offset = cost_map.initOffset(offset_.size()); + for (size_t i = 0; i < offset_.size(); ++i) { + auto elem = offset[i]; + elem.setX(offset_[i].first); + elem.setY(offset_[i].second); + } + } + + { + auto cost_maps = cost_map.initCostMap(cost_map_.size()); + for (size_t i = 0; i < cost_map_.size(); ++i) { + Matrix::Builder elem = cost_maps[i]; + FromNdMatrix<2, VprCostEntry, Cost_Entry>( + &elem, cost_map_[i], FromCostEntry); + } + } + + writeMessageToFile(file, &builder); +} diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h new file mode 100644 index 00000000000..03e1a140f0b --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -0,0 +1,17 @@ +#ifndef CONNECTION_BOX_LOOKAHEAD_H_ +#define CONNECTION_BOX_LOOKAHEAD_H_ + +#include +#include "physical_types.h" +#include "router_lookahead.h" + +class ConnectionBoxMapLookahead : public RouterLookahead { + public: + float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + void compute(const std::vector& segment_inf) override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; +}; + +#endif diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp new file mode 100644 index 00000000000..d50aa372a5f --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -0,0 +1,192 @@ +#include "router_lookahead_map_utils.h" + +#include "globals.h" +#include "vpr_context.h" +#include "vtr_math.h" + +/* Number of CLBs I think the average conn. goes. */ +static const int CLB_DIST = 3; + +util::PQ_Entry::PQ_Entry( + int set_rr_node_ind, + int switch_ind, + float parent_delay, + float parent_R_upstream, + float parent_congestion_upstream, + bool starting_node) { + this->rr_node_ind = set_rr_node_ind; + + auto& device_ctx = g_vpr_ctx.device(); + this->delay = parent_delay; + this->congestion_upstream = parent_congestion_upstream; + this->R_upstream = parent_R_upstream; + if (!starting_node) { + int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); + + float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; + float Rsw = device_ctx.rr_switch_inf[switch_ind].R; + float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); + float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); + + float T_linear = 0.f; + float T_quadratic = 0.f; + if (device_ctx.rr_switch_inf[switch_ind].buffered()) { + T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; + T_quadratic = 0.; + } else { /* Pass transistor */ + T_linear = Tsw + 0.5 * Rsw * Cnode; + T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; + } + + float base_cost; + if (device_ctx.rr_indexed_data[cost_index].inv_length < 0) { + base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; + } else { + float frac_num_seg = CLB_DIST * device_ctx.rr_indexed_data[cost_index].inv_length; + + base_cost = frac_num_seg * T_linear + + frac_num_seg * frac_num_seg * T_quadratic; + } + + VTR_ASSERT(T_linear >= 0.); + VTR_ASSERT(base_cost >= 0.); + this->delay += T_linear; + + this->congestion_upstream += base_cost; + } + + /* set the cost of this node */ + this->cost = this->delay; +} + +/* returns cost entry with the smallest delay */ +Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const { + Cost_Entry smallest_entry; + + for (auto entry : this->cost_vector) { + if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) { + smallest_entry = entry; + } + } + + return smallest_entry; +} + +/* returns a cost entry that represents the average of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_average_entry() const { + float avg_delay = 0; + float avg_congestion = 0; + + for (auto cost_entry : this->cost_vector) { + avg_delay += cost_entry.delay; + avg_congestion += cost_entry.congestion; + } + + avg_delay /= (float)this->cost_vector.size(); + avg_congestion /= (float)this->cost_vector.size(); + + return Cost_Entry(avg_delay, avg_congestion); +} + +/* returns a cost entry that represents the geomean of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const { + float geomean_delay = 0; + float geomean_cong = 0; + for (auto cost_entry : this->cost_vector) { + geomean_delay += log(cost_entry.delay); + geomean_cong += log(cost_entry.congestion); + } + + geomean_delay = exp(geomean_delay / (float)this->cost_vector.size()); + geomean_cong = exp(geomean_cong / (float)this->cost_vector.size()); + + return Cost_Entry(geomean_delay, geomean_cong); +} + +/* returns a cost entry that represents the medial of all recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_median_entry() const { + /* find median by binning the delays of all entries and then chosing the bin + * with the largest number of entries */ + + int num_bins = 10; + + /* find entries with smallest and largest delays */ + Cost_Entry min_del_entry; + Cost_Entry max_del_entry; + for (auto entry : this->cost_vector) { + if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) { + min_del_entry = entry; + } + if (!max_del_entry.valid() || entry.delay > max_del_entry.delay) { + max_del_entry = entry; + } + } + + /* get the bin size */ + float delay_diff = max_del_entry.delay - min_del_entry.delay; + float bin_size = delay_diff / (float)num_bins; + + /* sort the cost entries into bins */ + std::vector > entry_bins(num_bins, std::vector()); + for (auto entry : this->cost_vector) { + float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size); + + VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins); + if (vtr::nint(bin_num) == num_bins) { + /* largest entry will otherwise have an out-of-bounds bin number */ + bin_num -= 1; + } + entry_bins[vtr::nint(bin_num)].push_back(entry); + } + + /* find the bin with the largest number of elements */ + int largest_bin = 0; + int largest_size = 0; + for (int ibin = 0; ibin < num_bins; ibin++) { + if (entry_bins[ibin].size() > (unsigned)largest_size) { + largest_bin = ibin; + largest_size = (unsigned)entry_bins[ibin].size(); + } + } + + /* get the representative delay of the largest bin */ + Cost_Entry representative_entry = entry_bins[largest_bin][0]; + + return representative_entry; +} + +/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ +void expand_dijkstra_neighbours(util::PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq) { + auto& device_ctx = g_vpr_ctx.device(); + + int parent_ind = parent_entry.rr_node_ind; + + auto& parent_node = device_ctx.rr_nodes[parent_ind]; + + for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { + int child_node_ind = parent_node.edge_sink_node(iedge); + int switch_ind = parent_node.edge_switch(iedge); + + /* skip this child if it has already been expanded from */ + if (node_expanded[child_node_ind]) { + continue; + } + + util::PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + + VTR_ASSERT(child_entry.cost >= 0); + + /* skip this child if it has been visited with smaller cost */ + if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) { + continue; + } + + /* finally, record the cost with which the child was visited and put the child entry on the queue */ + node_visited_costs[child_node_ind] = child_entry.cost; + pq.push(child_entry); + } +} diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h new file mode 100644 index 00000000000..af37006669a --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -0,0 +1,144 @@ +#ifndef ROUTER_LOOKAHEAD_MAP_UTILS_H_ +#define ROUTER_LOOKAHEAD_MAP_UTILS_H_ +/* + * The router lookahead provides an estimate of the cost from an intermediate node to the target node + * during directed (A*-like) routing. + * + * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and + * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used + * to complete the route. While this method is efficient, it can run into trouble with architectures that use + * multiple interconnected wire types. + * + * The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates + * delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file. + * See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion. + * + */ + +#include +#include +#include +#include +#include "vpr_types.h" + +/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single + * representative entry, this enum is passed-in to specify how that representative entry should be + * calculated */ +enum e_representative_entry_method { + FIRST = 0, //the first cost that was recorded + SMALLEST, //the smallest-delay cost recorded + AVERAGE, + GEOMEAN, + MEDIAN +}; + +/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates + * to travel relative x/y distances */ +class Cost_Entry { + public: + float delay; + float congestion; + + Cost_Entry() { + delay = std::numeric_limits::infinity(); + congestion = std::numeric_limits::infinity(); + } + Cost_Entry(float set_delay, float set_congestion) { + delay = set_delay; + congestion = set_congestion; + } + + bool valid() const { + return std::isfinite(delay) && std::isfinite(congestion); + } +}; + +/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion. + * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored + * in the final lookahead cost map */ +class Expansion_Cost_Entry { + private: + std::vector cost_vector; + + Cost_Entry get_smallest_entry() const; + Cost_Entry get_average_entry() const; + Cost_Entry get_geomean_entry() const; + Cost_Entry get_median_entry() const; + + public: + void add_cost_entry(e_representative_entry_method method, + float add_delay, + float add_congestion) { + Cost_Entry cost_entry(add_delay, add_congestion); + if (method == SMALLEST) { + /* taking the smallest-delay entry anyway, so no need to push back multple entries */ + if (this->cost_vector.empty()) { + this->cost_vector.push_back(cost_entry); + } else { + if (add_delay < this->cost_vector[0].delay) { + this->cost_vector[0] = cost_entry; + } + } + } else { + this->cost_vector.push_back(cost_entry); + } + } + void clear_cost_entries() { + this->cost_vector.clear(); + } + + Cost_Entry get_representative_cost_entry(e_representative_entry_method method) const { + Cost_Entry entry; + + if (!cost_vector.empty()) { + switch (method) { + case FIRST: + entry = cost_vector[0]; + break; + case SMALLEST: + entry = this->get_smallest_entry(); + break; + case AVERAGE: + entry = this->get_average_entry(); + break; + case GEOMEAN: + entry = this->get_geomean_entry(); + break; + case MEDIAN: + entry = this->get_median_entry(); + break; + default: + break; + } + } + return entry; + } +}; + +namespace util { +/* a class that represents an entry in the Dijkstra expansion priority queue */ +class PQ_Entry { + public: + int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents + float cost; //the cost of the path to get to this node + + /* store backward delay, R and congestion info */ + float delay; + float R_upstream; + float congestion_upstream; + + PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node); + + bool operator<(const PQ_Entry& obj) const { + /* inserted into max priority queue so want queue entries with a lower cost to be greater */ + return (this->cost > obj.cost); + } +}; +} // namespace util + +void expand_dijkstra_neighbours(util::PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq); + +#endif diff --git a/vpr/src/route/rr_graph_storage.h b/vpr/src/route/rr_graph_storage.h index cc0207ee123..66837ef5f78 100644 --- a/vpr/src/route/rr_graph_storage.h +++ b/vpr/src/route/rr_graph_storage.h @@ -48,7 +48,7 @@ * This field is valid only for IPINs and OPINs and should be ignored * * otherwise. */ struct alignas(16) t_rr_node_data { - int8_t cost_index_ = -1; + int16_t cost_index_ = -1; int16_t rc_index_ = -1; int16_t xlow_ = -1; From 4f49a02143d7fd7e0e6c4e19cc22e1aea189ddbc Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:55:16 +0200 Subject: [PATCH 02/41] Integrate upstream uxsdcxx parser with connection box lookahead Signed-off-by: Alessandro Comodi --- .../gen/rr_graph_uxsdcxx.capnp | 33 +- vpr/src/base/read_options.cpp | 15 +- vpr/src/base/vpr_context.h | 3 + vpr/src/base/vpr_types.h | 5 +- vpr/src/route/gen/rr_graph_uxsdcxx.h | 615 +++++++++++++++++- vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h | 125 ++++ .../route/gen/rr_graph_uxsdcxx_interface.h | 74 +++ vpr/src/route/router_lookahead.cpp | 3 + vpr/src/route/rr_graph.cpp | 1 + vpr/src/route/rr_graph.xsd | 29 + vpr/src/route/rr_graph_reader.cpp | 1 + vpr/src/route/rr_graph_uxsdcxx_serializer.h | 170 +++++ vpr/src/route/rr_graph_writer.cpp | 1 + 13 files changed, 1060 insertions(+), 15 deletions(-) diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 145245b3d49..6ccf1055434 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -137,6 +137,18 @@ struct BlockTypes { blockTypes @0 :List(BlockType); } +struct ConnectionBoxDeclaration { + id @0 :UInt32; + name @1 :Text; +} + +struct ConnectionBoxes { + numBoxes @0 :UInt32; + xDim @1 :UInt32; + yDim @2 :UInt32; + connectionBoxes @3 :List(ConnectionBoxDeclaration); +} + struct GridLoc { blockTypeId @0 :Int32; heightOffset @1 :Int32; @@ -176,6 +188,18 @@ struct Metadata { metas @0 :List(Meta); } +struct CanonicalLoc { + x @0 :UInt32; + y @1 :UInt32; +} + +struct ConnectionBoxAnnotation { + id @0 :UInt32; + sitePinDelay @1 :Float32; + x @2 :UInt32; + y @3 :UInt32; +} + struct Node { capacity @0 :UInt32; direction @1 :NodeDirection; @@ -185,6 +209,8 @@ struct Node { timing @5 :NodeTiming; segment @6 :NodeSegment; metadata @7 :Metadata; + canonicalLoc @8 :CanonicalLoc; + connectionBox @9 :ConnectionBoxAnnotation; } struct RrNodes { @@ -210,7 +236,8 @@ struct RrGraph { switches @4 :Switches; segments @5 :Segments; blockTypes @6 :BlockTypes; - grid @7 :GridLocs; - rrNodes @8 :RrNodes; - rrEdges @9 :RrEdges; + connectionBoxes @7 :ConnectionBoxes; + grid @8 :GridLocs; + rrNodes @9 :RrNodes; + rrEdges @10 :RrEdges; } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 2c54f90394b..af12ec39311 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -692,6 +692,8 @@ struct ParseRouterLookahead { conv_value.set_value(e_router_lookahead::CLASSIC); else if (str == "map") conv_value.set_value(e_router_lookahead::MAP); + else if (str == "connection_box_map") + conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP); else { std::stringstream msg; msg << "Invalid conversion from '" @@ -705,17 +707,22 @@ struct ParseRouterLookahead { ConvertedValue to_str(e_router_lookahead val) { ConvertedValue conv_value; - if (val == e_router_lookahead::CLASSIC) + if (val == e_router_lookahead::CLASSIC) { conv_value.set_value("classic"); - else { - VTR_ASSERT(val == e_router_lookahead::MAP); + } else if (val == e_router_lookahead::MAP) { conv_value.set_value("map"); + } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) { + conv_value.set_value("connection_box_map"); + } else { + std::stringstream msg; + msg << "Unrecognized e_router_lookahead"; + conv_value.set_error(msg.str()); } return conv_value; } std::vector default_choices() { - return {"classic", "map"}; + return {"classic", "map", "connection_box_map"}; } }; diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 302a93b6b8e..1083c920a1b 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -24,6 +24,7 @@ #include "place_macro.h" #include "compressed_grid.h" #include "metadata_storage.h" +#include "connection_box.h" //A Context is collection of state relating to a particular part of VPR // @@ -189,6 +190,8 @@ struct DeviceContext : public Context { // Name of rrgraph file read (if any). // Used to determine when reading rrgraph if file is already loaded. std::string read_rr_graph_filename; + + ConnectionBoxes connection_boxes; }; //State relating to power analysis diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 3b4b661eb07..c282d9fde60 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -108,7 +108,10 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY"; enum class e_router_lookahead { CLASSIC, //VPR's classic lookahead (assumes uniform wire types) MAP, //Lookahead considering different wire types (see Oleg Petelin's MASc Thesis) - NO_OP //A no-operation lookahead which always returns zero + NO_OP, //A no-operation lookahead which always returns zero + CONNECTION_BOX_MAP, + // Lookahead considering different wire types and IPIN + // connection box. }; enum class e_route_bb_update { diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx.h b/vpr/src/route/gen/rr_graph_uxsdcxx.h index 78aade3b638..53b2e63772c 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx.h @@ -80,6 +80,12 @@ inline void load_block_type_required_attributes(const pugi::xml_node& root, int* template inline void load_block_types(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error); +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error); +template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error); template @@ -98,6 +104,12 @@ inline void load_meta(const pugi::xml_node& root, T& out, Context& context, cons template inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template +inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error); +template +inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error); +template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error); template @@ -130,6 +142,8 @@ inline void write_block_type(T& in, std::ostream& os, const void* data, void* it template inline void write_block_types(T& in, std::ostream& os, const void* data, void* iter); template +inline void write_connection_boxes(T& in, std::ostream& os, const void* data, void* iter); +template inline void write_grid_locs(T& in, std::ostream& os, const void* data, void* iter); template inline void write_meta(T& in, std::ostream& os, const void* data, void* iter); @@ -286,6 +300,17 @@ constexpr const char* atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types { BLOCK_TYPE }; constexpr const char* gtok_lookup_t_block_types[] = {"block_type"}; +enum class atok_t_connection_box_declaration { ID, + NAME }; +constexpr const char* atok_lookup_t_connection_box_declaration[] = {"id", "name"}; + +enum class gtok_t_connection_boxes { CONNECTION_BOX }; +constexpr const char* gtok_lookup_t_connection_boxes[] = {"connection_box"}; +enum class atok_t_connection_boxes { NUM_BOXES, + X_DIM, + Y_DIM }; +constexpr const char* atok_lookup_t_connection_boxes[] = {"num_boxes", "x_dim", "y_dim"}; + enum class atok_t_grid_loc { BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, @@ -316,11 +341,24 @@ constexpr const char* atok_lookup_t_meta[] = {"name"}; enum class gtok_t_metadata { META }; constexpr const char* gtok_lookup_t_metadata[] = {"meta"}; + +enum class atok_t_canonical_loc { X, + Y }; +constexpr const char* atok_lookup_t_canonical_loc[] = {"x", "y"}; + +enum class atok_t_connection_box_annotation { ID, + SITE_PIN_DELAY, + X, + Y }; +constexpr const char* atok_lookup_t_connection_box_annotation[] = {"id", "site_pin_delay", "x", "y"}; + enum class gtok_t_node { LOC, TIMING, SEGMENT, - METADATA }; -constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata"}; + METADATA, + CANONICAL_LOC, + CONNECTION_BOX }; +constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata", "canonical_loc", "connection_box"}; enum class atok_t_node { CAPACITY, DIRECTION, ID, @@ -342,10 +380,11 @@ enum class gtok_t_rr_graph { CHANNELS, SWITCHES, SEGMENTS, BLOCK_TYPES, + CONNECTION_BOXES, GRID, RR_NODES, RR_EDGES }; -constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "grid", "rr_nodes", "rr_edges"}; +constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "connection_boxes", "grid", "rr_nodes", "rr_edges"}; enum class atok_t_rr_graph { TOOL_COMMENT, TOOL_NAME, TOOL_VERSION }; @@ -1143,6 +1182,122 @@ inline gtok_t_block_types lex_node_t_block_types(const char* in, const std::func noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); } +inline atok_t_connection_box_declaration lex_attr_t_connection_box_declaration(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 2: + switch (in[0]) { + case onechar('i', 0, 8): + switch (in[1]) { + case onechar('d', 0, 8): + return atok_t_connection_box_declaration::ID; + break; + default: + break; + } + break; + default: + break; + } + break; + case 4: + switch (*((triehash_uu32*)&in[0])) { + case onechar('n', 0, 32) | onechar('a', 8, 32) | onechar('m', 16, 32) | onechar('e', 24, 32): + return atok_t_connection_box_declaration::NAME; + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline gtok_t_connection_boxes lex_node_t_connection_boxes(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 14: + switch (*((triehash_uu64*)&in[0])) { + case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): + switch (*((triehash_uu32*)&in[8])) { + case onechar('o', 0, 32) | onechar('n', 8, 32) | onechar('_', 16, 32) | onechar('b', 24, 32): + switch (in[12]) { + case onechar('o', 0, 8): + switch (in[13]) { + case onechar('x', 0, 8): + return gtok_t_connection_boxes::CONNECTION_BOX; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); +} +inline atok_t_connection_boxes lex_attr_t_connection_boxes(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 5: + switch (*((triehash_uu32*)&in[0])) { + case onechar('x', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::X_DIM; + break; + default: + break; + } + break; + case onechar('y', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::Y_DIM; + break; + default: + break; + } + break; + default: + break; + } + break; + case 9: + switch (*((triehash_uu64*)&in[0])) { + case onechar('n', 0, 64) | onechar('u', 8, 64) | onechar('m', 16, 64) | onechar('_', 24, 64) | onechar('b', 32, 64) | onechar('o', 40, 64) | onechar('x', 48, 64) | onechar('e', 56, 64): + switch (in[8]) { + case onechar('s', 0, 8): + return atok_t_connection_boxes::NUM_BOXES; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + inline atok_t_grid_loc lex_attr_t_grid_loc(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1389,6 +1544,90 @@ inline gtok_t_metadata lex_node_t_metadata(const char* in, const std::function.").c_str()); } +inline atok_t_canonical_loc lex_attr_t_canonical_loc(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 1: + switch (in[0]) { + case onechar('x', 0, 8): + return atok_t_canonical_loc::X; + break; + case onechar('y', 0, 8): + return atok_t_canonical_loc::Y; + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline atok_t_connection_box_annotation lex_attr_t_connection_box_annotation(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 1: + switch (in[0]) { + case onechar('x', 0, 8): + return atok_t_connection_box_annotation::X; + break; + case onechar('y', 0, 8): + return atok_t_connection_box_annotation::Y; + break; + default: + break; + } + break; + case 2: + switch (in[0]) { + case onechar('i', 0, 8): + switch (in[1]) { + case onechar('d', 0, 8): + return atok_t_connection_box_annotation::ID; + break; + default: + break; + } + break; + default: + break; + } + break; + case 14: + switch (*((triehash_uu64*)&in[0])) { + case onechar('s', 0, 64) | onechar('i', 8, 64) | onechar('t', 16, 64) | onechar('e', 24, 64) | onechar('_', 32, 64) | onechar('p', 40, 64) | onechar('i', 48, 64) | onechar('n', 56, 64): + switch (*((triehash_uu32*)&in[8])) { + case onechar('_', 0, 32) | onechar('d', 8, 32) | onechar('e', 16, 32) | onechar('l', 24, 32): + switch (in[12]) { + case onechar('a', 0, 8): + switch (in[13]) { + case onechar('y', 0, 8): + return atok_t_connection_box_annotation::SITE_PIN_DELAY; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1470,6 +1709,54 @@ inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { + std::bitset<2> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_box_declaration::ID: + *id = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_declaration::NAME: + /* Attribute name set after element init */ + break; + default: + break; /* Not possible. */ + } + } + std::bitset<2> test_astate = astate | std::bitset<2>(0b00); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_declaration, report_error); +} + +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error) { + std::bitset<3> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_boxes in = lex_attr_t_connection_boxes(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_boxes::NUM_BOXES: + *num_boxes = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::X_DIM: + *x_dim = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::Y_DIM: + *y_dim = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<3> test_astate = astate | std::bitset<3>(0b000); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_boxes, report_error); +} + inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error) { std::bitset<5> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -2488,6 +2839,58 @@ inline void load_node_segment_required_attributes(const pugi::xml_node& root, in if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_segment, report_error); } +inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error) { + std::bitset<2> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_canonical_loc in = lex_attr_t_canonical_loc(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_canonical_loc::X: + *x = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_canonical_loc::Y: + *y = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<2> test_astate = astate | std::bitset<2>(0b00); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_canonical_loc, report_error); +} + +inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error) { + std::bitset<4> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_annotation in = lex_attr_t_connection_box_annotation(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_box_annotation::ID: + *id = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::SITE_PIN_DELAY: + *site_pin_delay = load_float(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::X: + *x = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::Y: + *y = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<4> test_astate = astate | std::bitset<4>(0b0000); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_annotation, report_error); +} + inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error) { std::bitset<4> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -3212,6 +3615,94 @@ inline void load_block_types(const pugi::xml_node& root, T& out, Context& contex if (state != 0) dfa_error("end of input", gstate_t_block_types[state], gtok_lookup_t_block_types, 1, report_error); } +template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + switch (in) { + case atok_t_connection_box_declaration::ID: + /* Attribute id is already set */ + break; + case atok_t_connection_box_declaration::NAME: + out.set_connection_box_declaration_name(attr.value(), context); + break; + default: + break; /* Not possible. */ + } + } + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + +constexpr int NUM_T_CONNECTION_BOXES_STATES = 2; +constexpr const int NUM_T_CONNECTION_BOXES_INPUTS = 1; +constexpr int gstate_t_connection_boxes[NUM_T_CONNECTION_BOXES_STATES][NUM_T_CONNECTION_BOXES_INPUTS] = { + {0}, + {0}, +}; +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + // Preallocate arrays by counting child nodes (if any) + size_t connection_box_count = 0; + { + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: + connection_box_count += 1; + break; + default: + break; /* Not possible. */ + } + } + + out.preallocate_connection_boxes_connection_box(context, connection_box_count); + } + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: { + unsigned int connection_box_declaration_id; + memset(&connection_box_declaration_id, 0, sizeof(connection_box_declaration_id)); + load_connection_box_declaration_required_attributes(node, &connection_box_declaration_id, report_error); + auto child_context = out.add_connection_boxes_connection_box(context, connection_box_declaration_id); + load_connection_box_declaration(node, out, child_context, report_error, offset_debug); + out.finish_connection_boxes_connection_box(child_context); + } break; + default: + break; /* Not possible. */ + } + } + if (state != 0) dfa_error("end of input", gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); +} + template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3448,6 +3939,32 @@ inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, if (state != 0) dfa_error("end of input", gstate_t_metadata[state], gtok_lookup_t_metadata, 1, report_error); } +template +inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + +template +inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3477,7 +3994,7 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons } } - std::bitset<4> gstate = 0; + std::bitset<6> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_node in = lex_node_t_node(node.name(), report_error); @@ -3525,11 +4042,35 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons load_metadata(node, out, child_context, report_error, offset_debug); out.finish_node_metadata(child_context); } break; + case gtok_t_node::CANONICAL_LOC: { + unsigned int canonical_loc_x; + memset(&canonical_loc_x, 0, sizeof(canonical_loc_x)); + unsigned int canonical_loc_y; + memset(&canonical_loc_y, 0, sizeof(canonical_loc_y)); + load_canonical_loc_required_attributes(node, &canonical_loc_x, &canonical_loc_y, report_error); + auto child_context = out.init_node_canonical_loc(context, canonical_loc_x, canonical_loc_y); + load_canonical_loc(node, out, child_context, report_error, offset_debug); + out.finish_node_canonical_loc(child_context); + } break; + case gtok_t_node::CONNECTION_BOX: { + unsigned int connection_box_annotation_id; + memset(&connection_box_annotation_id, 0, sizeof(connection_box_annotation_id)); + float connection_box_annotation_site_pin_delay; + memset(&connection_box_annotation_site_pin_delay, 0, sizeof(connection_box_annotation_site_pin_delay)); + unsigned int connection_box_annotation_x; + memset(&connection_box_annotation_x, 0, sizeof(connection_box_annotation_x)); + unsigned int connection_box_annotation_y; + memset(&connection_box_annotation_y, 0, sizeof(connection_box_annotation_y)); + load_connection_box_annotation_required_attributes(node, &connection_box_annotation_id, &connection_box_annotation_site_pin_delay, &connection_box_annotation_x, &connection_box_annotation_y, report_error); + auto child_context = out.init_node_connection_box(context, connection_box_annotation_id, connection_box_annotation_site_pin_delay, connection_box_annotation_x, connection_box_annotation_y); + load_connection_box_annotation(node, out, child_context, report_error, offset_debug); + out.finish_node_connection_box(child_context); + } break; default: break; /* Not possible. */ } } - std::bitset<4> test_gstate = gstate | std::bitset<4>(0b1110); + std::bitset<6> test_gstate = gstate | std::bitset<6>(0b111110); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_node, report_error); } @@ -3726,7 +4267,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, } } - std::bitset<7> gstate = 0; + std::bitset<8> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_rr_graph in = lex_node_t_rr_graph(node.name(), report_error); @@ -3755,6 +4296,18 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, load_block_types(node, out, child_context, report_error, offset_debug); out.finish_rr_graph_block_types(child_context); } break; + case gtok_t_rr_graph::CONNECTION_BOXES: { + unsigned int connection_boxes_num_boxes; + memset(&connection_boxes_num_boxes, 0, sizeof(connection_boxes_num_boxes)); + unsigned int connection_boxes_x_dim; + memset(&connection_boxes_x_dim, 0, sizeof(connection_boxes_x_dim)); + unsigned int connection_boxes_y_dim; + memset(&connection_boxes_y_dim, 0, sizeof(connection_boxes_y_dim)); + load_connection_boxes_required_attributes(node, &connection_boxes_num_boxes, &connection_boxes_x_dim, &connection_boxes_y_dim, report_error); + auto child_context = out.init_rr_graph_connection_boxes(context, connection_boxes_num_boxes, connection_boxes_x_dim, connection_boxes_y_dim); + load_connection_boxes(node, out, child_context, report_error, offset_debug); + out.finish_rr_graph_connection_boxes(child_context); + } break; case gtok_t_rr_graph::GRID: { auto child_context = out.init_rr_graph_grid(context); load_grid_locs(node, out, child_context, report_error, offset_debug); @@ -3774,7 +4327,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, break; /* Not possible. */ } } - std::bitset<7> test_gstate = gstate | std::bitset<7>(0b0000000); + std::bitset<8> test_gstate = gstate | std::bitset<8>(0b00010000); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_rr_graph, report_error); } @@ -3963,6 +4516,22 @@ inline void write_block_types(T& in, std::ostream& os, Context& context) { } } +template +inline void write_connection_boxes(T& in, std::ostream& os, Context& context) { + (void)in; + (void)os; + (void)context; + { + for (size_t i = 0, n = in.num_connection_boxes_connection_box(context); i < n; i++) { + auto child_context = in.get_connection_boxes_connection_box(i, context); + os << "\n"; + } + } +} + template inline void write_grid_locs(T& in, std::ostream& os, Context& context) { (void)in; @@ -4049,6 +4618,26 @@ inline void write_node(T& in, std::ostream& os, Context& context) { os << "\n"; } } + { + if (in.has_node_canonical_loc(context)) { + auto child_context = in.get_node_canonical_loc(context); + os << "\n"; + } + } + { + if (in.has_node_connection_box(context)) { + auto child_context = in.get_node_connection_box(context); + os << "\n"; + } + } } template @@ -4135,6 +4724,18 @@ inline void write_rr_graph(T& in, std::ostream& os, Context& context) { write_block_types(in, os, child_context); os << "\n"; } + { + if (in.has_rr_graph_connection_boxes(context)) { + auto child_context = in.get_rr_graph_connection_boxes(context); + os << ""; + write_connection_boxes(in, os, child_context); + os << "\n"; + } + } { auto child_context = in.get_rr_graph_grid(context); os << "\n"; diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h index 4d78a7cf016..dca960a1bdb 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h @@ -54,6 +54,10 @@ void load_block_type_capnp_type(const ucap::BlockType::Reader& root, T& out, Con template void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template +void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template +void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_grid_locs_capnp_type(const ucap::GridLocs::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -68,6 +72,10 @@ void load_meta_capnp_type(const ucap::Meta::Reader& root, T& out, Context& conte template void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template +void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template +void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_rr_nodes_capnp_type(const ucap::RrNodes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -98,6 +106,8 @@ inline void write_block_type_capnp_type(T& in, ucap::BlockType::Builder& root, C template inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, Context& context); template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context); +template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context); template inline void write_meta_capnp_type(T& in, ucap::Meta::Builder& root, Context& context); @@ -614,6 +624,39 @@ inline void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& stack->pop_back(); } +template +inline void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + out.set_connection_box_declaration_name(root.getName().cStr(), context); +} + +template +inline void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + stack->push_back(std::make_pair("getConnectionBox", 0)); + { + auto data = root.getConnectionBoxes(); + out.preallocate_connection_boxes_connection_box(context, data.size()); + for (const auto& el : data) { + auto child_context = out.add_connection_boxes_connection_box(context, el.getId()); + load_connection_box_declaration_capnp_type(el, out, child_context, report_error, stack); + out.finish_connection_boxes_connection_box(child_context); + stack->back().second += 1; + } + } + stack->pop_back(); +} + template inline void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -710,6 +753,24 @@ inline void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, stack->pop_back(); } +template +inline void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; +} + +template +inline void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; +} + template inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -751,6 +812,22 @@ inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context out.finish_node_metadata(child_context); } stack->pop_back(); + stack->push_back(std::make_pair("getCanonicalLoc", 0)); + if (root.hasCanonicalLoc()) { + auto child_el = root.getCanonicalLoc(); + auto child_context = out.init_node_canonical_loc(context, child_el.getX(), child_el.getY()); + load_canonical_loc_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_node_canonical_loc(child_context); + } + stack->pop_back(); + stack->push_back(std::make_pair("getConnectionBox", 0)); + if (root.hasConnectionBox()) { + auto child_el = root.getConnectionBox(); + auto child_context = out.init_node_connection_box(context, child_el.getId(), child_el.getSitePinDelay(), child_el.getX(), child_el.getY()); + load_connection_box_annotation_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_node_connection_box(child_context); + } + stack->pop_back(); } template @@ -858,6 +935,14 @@ inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, out.finish_rr_graph_block_types(child_context); } stack->pop_back(); + stack->push_back(std::make_pair("getConnectionBoxes", 0)); + if (root.hasConnectionBoxes()) { + auto child_el = root.getConnectionBoxes(); + auto child_context = out.init_rr_graph_connection_boxes(context, child_el.getNumBoxes(), child_el.getXDim(), child_el.getYDim()); + load_connection_boxes_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_rr_graph_connection_boxes(child_context); + } + stack->pop_back(); stack->push_back(std::make_pair("getGrid", 0)); if (root.hasGrid()) { auto child_el = root.getGrid(); @@ -1051,6 +1136,21 @@ inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, } } +template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context) { + (void)in; + (void)root; + + size_t num_connection_boxes_connection_boxes = in.num_connection_boxes_connection_box(context); + auto connection_boxes_connection_boxes = root.initConnectionBoxes(num_connection_boxes_connection_boxes); + for (size_t i = 0; i < num_connection_boxes_connection_boxes; i++) { + auto connection_boxes_connection_box = connection_boxes_connection_boxes[i]; + auto child_context = in.get_connection_boxes_connection_box(i, context); + connection_boxes_connection_box.setId(in.get_connection_box_declaration_id(child_context)); + connection_boxes_connection_box.setName(in.get_connection_box_declaration_name(child_context)); + } +} + template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context) { (void)in; @@ -1126,6 +1226,22 @@ inline void write_node_capnp_type(T& in, ucap::Node::Builder& root, Context& con auto child_context = in.get_node_metadata(context); write_metadata_capnp_type(in, node_metadata, child_context); } + + if (in.has_node_canonical_loc(context)) { + auto node_canonical_loc = root.initCanonicalLoc(); + auto child_context = in.get_node_canonical_loc(context); + node_canonical_loc.setX(in.get_canonical_loc_x(child_context)); + node_canonical_loc.setY(in.get_canonical_loc_y(child_context)); + } + + if (in.has_node_connection_box(context)) { + auto node_connection_box = root.initConnectionBox(); + auto child_context = in.get_node_connection_box(context); + node_connection_box.setId(in.get_connection_box_annotation_id(child_context)); + node_connection_box.setSitePinDelay(in.get_connection_box_annotation_site_pin_delay(child_context)); + node_connection_box.setX(in.get_connection_box_annotation_x(child_context)); + node_connection_box.setY(in.get_connection_box_annotation_y(child_context)); + } } template @@ -1205,6 +1321,15 @@ inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Conte write_block_types_capnp_type(in, rr_graph_block_types, child_context); } + if (in.has_rr_graph_connection_boxes(context)) { + auto rr_graph_connection_boxes = root.initConnectionBoxes(); + auto child_context = in.get_rr_graph_connection_boxes(context); + rr_graph_connection_boxes.setNumBoxes(in.get_connection_boxes_num_boxes(child_context)); + rr_graph_connection_boxes.setXDim(in.get_connection_boxes_x_dim(child_context)); + rr_graph_connection_boxes.setYDim(in.get_connection_boxes_y_dim(child_context)); + write_connection_boxes_capnp_type(in, rr_graph_connection_boxes, child_context); + } + { auto child_context = in.get_rr_graph_grid(context); auto rr_graph_grid = root.initGrid(); diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h index ed968c6ebcf..8c9085dc7d0 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h @@ -68,6 +68,8 @@ struct DefaultRrGraphContextTypes { using PinClassReadContext = void*; using BlockTypeReadContext = void*; using BlockTypesReadContext = void*; + using ConnectionBoxDeclarationReadContext = void*; + using ConnectionBoxesReadContext = void*; using GridLocReadContext = void*; using GridLocsReadContext = void*; using NodeLocReadContext = void*; @@ -75,6 +77,8 @@ struct DefaultRrGraphContextTypes { using NodeSegmentReadContext = void*; using MetaReadContext = void*; using MetadataReadContext = void*; + using CanonicalLocReadContext = void*; + using ConnectionBoxAnnotationReadContext = void*; using NodeReadContext = void*; using RrNodesReadContext = void*; using EdgeReadContext = void*; @@ -95,6 +99,8 @@ struct DefaultRrGraphContextTypes { using PinClassWriteContext = void*; using BlockTypeWriteContext = void*; using BlockTypesWriteContext = void*; + using ConnectionBoxDeclarationWriteContext = void*; + using ConnectionBoxesWriteContext = void*; using GridLocWriteContext = void*; using GridLocsWriteContext = void*; using NodeLocWriteContext = void*; @@ -102,6 +108,8 @@ struct DefaultRrGraphContextTypes { using NodeSegmentWriteContext = void*; using MetaWriteContext = void*; using MetadataWriteContext = void*; + using CanonicalLocWriteContext = void*; + using ConnectionBoxAnnotationWriteContext = void*; using NodeWriteContext = void*; using RrNodesWriteContext = void*; using EdgeWriteContext = void*; @@ -348,6 +356,35 @@ class RrGraphBase { virtual inline size_t num_block_types_block_type(typename ContextTypes::BlockTypesReadContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypeReadContext get_block_types_block_type(int n, typename ContextTypes::BlockTypesReadContext& ctx) = 0; + /** Generated for complex type "connection_box_declaration": + * + * + * + * + */ + virtual inline unsigned int get_connection_box_declaration_id(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline const char* get_connection_box_declaration_name(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline void set_connection_box_declaration_name(const char* name, typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + + /** Generated for complex type "connection_boxes": + * + * + * + * + * + * + * + * + */ + virtual inline unsigned int get_connection_boxes_num_boxes(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_x_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_y_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline void preallocate_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, size_t size) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationWriteContext add_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, unsigned int id) = 0; + virtual inline void finish_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + virtual inline size_t num_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationReadContext get_connection_boxes_connection_box(int n, typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + /** Generated for complex type "grid_loc": * * @@ -437,6 +474,28 @@ class RrGraphBase { virtual inline size_t num_metadata_meta(typename ContextTypes::MetadataReadContext& ctx) = 0; virtual inline typename ContextTypes::MetaReadContext get_metadata_meta(int n, typename ContextTypes::MetadataReadContext& ctx) = 0; + /** Generated for complex type "canonical_loc": + * + * + * + * + */ + virtual inline unsigned int get_canonical_loc_x(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; + virtual inline unsigned int get_canonical_loc_y(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; + + /** Generated for complex type "connection_box_annotation": + * + * + * + * + * + * + */ + virtual inline unsigned int get_connection_box_annotation_id(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline float get_connection_box_annotation_site_pin_delay(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline unsigned int get_connection_box_annotation_x(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline unsigned int get_connection_box_annotation_y(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + /** Generated for complex type "node": * * @@ -444,6 +503,8 @@ class RrGraphBase { * * * + * + * * * * @@ -471,6 +532,14 @@ class RrGraphBase { virtual inline void finish_node_metadata(typename ContextTypes::MetadataWriteContext& ctx) = 0; virtual inline typename ContextTypes::MetadataReadContext get_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; virtual inline bool has_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline typename ContextTypes::CanonicalLocWriteContext init_node_canonical_loc(typename ContextTypes::NodeWriteContext& ctx, unsigned int x, unsigned int y) = 0; + virtual inline void finish_node_canonical_loc(typename ContextTypes::CanonicalLocWriteContext& ctx) = 0; + virtual inline typename ContextTypes::CanonicalLocReadContext get_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline bool has_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxAnnotationWriteContext init_node_connection_box(typename ContextTypes::NodeWriteContext& ctx, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) = 0; + virtual inline void finish_node_connection_box(typename ContextTypes::ConnectionBoxAnnotationWriteContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxAnnotationReadContext get_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline bool has_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; /** Generated for complex type "rr_nodes": * @@ -523,6 +592,7 @@ class RrGraphBase { * * * + * * * * @@ -550,6 +620,10 @@ class RrGraphBase { virtual inline typename ContextTypes::BlockTypesWriteContext init_rr_graph_block_types(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_block_types(typename ContextTypes::BlockTypesWriteContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypesReadContext get_rr_graph_block_types(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesWriteContext init_rr_graph_connection_boxes(typename ContextTypes::RrGraphWriteContext& ctx, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) = 0; + virtual inline void finish_rr_graph_connection_boxes(typename ContextTypes::ConnectionBoxesWriteContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesReadContext get_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline bool has_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsWriteContext init_rr_graph_grid(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_grid(typename ContextTypes::GridLocsWriteContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsReadContext get_rr_graph_grid(typename ContextTypes::RrGraphReadContext& ctx) = 0; diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index b72b38b4e2d..9b3dfa03fc1 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -1,6 +1,7 @@ #include "router_lookahead.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "vpr_error.h" #include "globals.h" #include "route_timing.h" @@ -13,6 +14,8 @@ static std::unique_ptr make_router_lookahead_object(e_router_lo return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::MAP) { return std::make_unique(); + } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::NO_OP) { return std::make_unique(); } diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index eceb209227c..ea06d7c769e 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -32,6 +32,7 @@ #include "rr_graph_writer.h" #include "rr_graph_reader.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "rr_graph_clock.h" #include "rr_types.h" diff --git a/vpr/src/route/rr_graph.xsd b/vpr/src/route/rr_graph.xsd index 728ea747e3c..2121b919928 100644 --- a/vpr/src/route/rr_graph.xsd +++ b/vpr/src/route/rr_graph.xsd @@ -261,12 +261,26 @@ + + + + + + + + + + + + + + @@ -283,6 +297,20 @@ + + + + + + + + + + + + + + @@ -330,6 +358,7 @@ + diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index d76ca4fdcf9..681e3520bb5 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -62,6 +62,7 @@ void load_rr_file(const t_graph_type graph_type, &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, + &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index b4c09cff670..35c811b0c6b 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -230,6 +230,9 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using NodeReadContext = const t_rr_node; using EdgeReadContext = const EdgeWalker*; using RrEdgesReadContext = EdgeWalker; + using ConnectionBoxDeclarationReadContext = ConnectionBoxId; + using CanonicalLocReadContext = const std::pair*; + using ConnectionBoxAnnotationReadContext = const std::tuple, float>; using TimingWriteContext = t_rr_switch_inf*; using SizingWriteContext = t_rr_switch_inf*; using SwitchWriteContext = t_rr_switch_inf*; @@ -245,6 +248,7 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using MetadataWriteContext = MetadataBind; using NodeWriteContext = int; using EdgeWriteContext = MetadataBind; + using ConnectionBoxDeclarationWriteContext = ConnectionBox*; }; class RrGraphSerializer final : public uxsd::RrGraphBase { @@ -262,6 +266,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf, std::vector* rr_indexed_data, t_rr_node_indices* rr_node_indices, + ConnectionBoxes* connection_boxes, const size_t num_arch_switches, const t_arch_switch_inf* arch_switch_inf, const std::vector& segment_inf, @@ -276,6 +281,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { , rr_switch_inf_(rr_switch_inf) , rr_indexed_data_(rr_indexed_data) , rr_node_indices_(rr_node_indices) + , connection_boxes_(connection_boxes) , read_rr_graph_filename_(read_rr_graph_filename) , graph_type_(graph_type) , base_cost_type_(base_cost_type) @@ -589,6 +595,160 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { } } + inline unsigned int get_connection_box_annotation_id(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return (size_t)box_id; + } + inline float get_connection_box_annotation_site_pin_delay(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return site_pin_delay; + } + inline unsigned int get_connection_box_annotation_x(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return box_location.first; + } + inline unsigned int get_connection_box_annotation_y(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return box_location.second; + } + + inline unsigned int get_canonical_loc_x(const std::pair*& data) final { + return data->first; + } + inline unsigned int get_canonical_loc_y(const std::pair*& data) final { + return data->second; + } + + inline const std::pair* get_node_canonical_loc(const t_rr_node& node) final { + return connection_boxes_->find_canonical_loc(get_node_id(node)); + } + inline bool has_node_canonical_loc(const t_rr_node& node) final { + return connection_boxes_->find_canonical_loc(get_node_id(node)) != nullptr; + } + inline const std::tuple, float> get_node_connection_box(const t_rr_node& node) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + + if (!connection_boxes_->find_connection_box(get_node_id(node), + &box_id, &box_location, &site_pin_delay)) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, + "No connection box for %d", node); + } + return std::make_tuple(box_id, box_location, site_pin_delay); + } + inline bool has_node_connection_box(const t_rr_node& node) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + + return connection_boxes_->find_connection_box(get_node_id(node), + &box_id, &box_location, &site_pin_delay); + } + inline void* init_node_canonical_loc(int& inode, unsigned int x, unsigned int y) final { + connection_boxes_->add_canonical_loc(inode, std::make_pair(x, y)); + return nullptr; + } + inline void finish_node_canonical_loc(void*& /*ctx*/) final {} + + inline void* init_node_connection_box(int& inode, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) final { + connection_boxes_->add_connection_box(inode, + ConnectionBoxId(id), + std::make_pair(x, y), + site_pin_delay); + return nullptr; + } + inline void finish_node_connection_box(void*& /*ctx*/) final {} + + /** Generated for complex type "connection_box_declaration": + * + * + * + * + */ + inline void set_connection_box_declaration_name(const char* name, ConnectionBox*& box) final { + box->name.assign(name); + } + + inline unsigned int get_connection_box_declaration_id(ConnectionBoxId& id) final { + return (size_t)id; + } + inline const char* get_connection_box_declaration_name(ConnectionBoxId& id) final { + return connection_boxes_->get_connection_box(id)->name.c_str(); + } + inline unsigned int get_connection_boxes_num_boxes(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types(); + } + inline unsigned int get_connection_boxes_x_dim(void*& /*ctx*/) final { + return connection_boxes_->connection_box_grid_size().first; + } + inline unsigned int get_connection_boxes_y_dim(void*& /*ctx*/) final { + return connection_boxes_->connection_box_grid_size().second; + } + inline size_t num_connection_boxes_connection_box(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types(); + } + inline ConnectionBoxId get_connection_boxes_connection_box(int n, void*& /*ctx*/) final { + return ConnectionBoxId(n); + } + + /** Generated for complex type "connection_boxes": + * + * + * + * + * + * + * + * + */ + inline void preallocate_connection_boxes_connection_box(void*& /*ctx*/, size_t size) final { + if (size != boxes_.size()) { + report_error("Number of connection_box %zu is greater than num_boxes %zu on ", + size, boxes_.size()); + } + } + inline ConnectionBox* add_connection_boxes_connection_box(void*& /*ctx*/, unsigned int id) final { + if (id >= boxes_.size()) { + report_error("ConnectionBox id %u is greater than num_boxes on ", + id); + } + return &boxes_[id]; + } + inline void finish_connection_boxes_connection_box(ConnectionBox*& /*ctx*/) final {} + + private: + int box_x_dim_; + int box_y_dim_; + std::vector boxes_; + + public: + inline void* init_rr_graph_connection_boxes(void*& /*ctx*/, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) final { + box_x_dim_ = x_dim; + box_y_dim_ = y_dim; + boxes_.resize(num_boxes); + return nullptr; + } + inline void finish_rr_graph_connection_boxes(void*& /*ctx*/) final { + connection_boxes_->reset_boxes(std::make_pair(box_x_dim_, box_y_dim_), std::move(boxes_)); + } + /** Generated for complex type "node_timing": * * @@ -764,6 +924,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // If make_room_in_vector was used for allocation, this ensures that // the final storage has no overhead. rr_nodes_->shrink_to_fit(); + connection_boxes_->resize_nodes(rr_nodes_->size()); } /** Generated for complex type "rr_edges": @@ -1450,6 +1611,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline void* get_rr_graph_block_types(void*& /*ctx*/) final { return nullptr; } + inline void* get_rr_graph_connection_boxes(void*& /*ctx*/) final { + return nullptr; + } + inline bool has_rr_graph_connection_boxes(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types() > 0; + } inline void* get_rr_graph_grid(void*& /*ctx*/) final { return nullptr; } @@ -1476,6 +1643,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { (*rr_indexed_data_)[i].seg_index = seg_index_[i]; } + connection_boxes_->create_sink_back_ref(); + VTR_ASSERT(read_rr_graph_filename_ != nullptr); VTR_ASSERT(read_rr_graph_name_ != nullptr); read_rr_graph_filename_->assign(read_rr_graph_name_); @@ -1844,6 +2013,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf_; std::vector* rr_indexed_data_; t_rr_node_indices* rr_node_indices_; + ConnectionBoxes* connection_boxes_; std::string* read_rr_graph_filename_; // Constant data for loads and writes. diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp index 86fed376c05..57d2bb9c0e8 100644 --- a/vpr/src/route/rr_graph_writer.cpp +++ b/vpr/src/route/rr_graph_writer.cpp @@ -40,6 +40,7 @@ void write_rr_graph(const char* file_name) { &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, + &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, From b71a487737e9fee6628ab9af9189cfa5357dc37f Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 15 Aug 2019 18:09:05 -0700 Subject: [PATCH 03/41] Complete connection box lookahead to be aware of connection box. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrcapnproto/connection_map.capnp | 4 +- .../route/connection_box_lookahead_map.cpp | 313 ++++++++++-------- 2 files changed, 179 insertions(+), 138 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index ac03ddfc9c0..1ca672108c8 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -13,7 +13,7 @@ struct VprVector2D { } struct VprCostMap { - costMap @0 :List(Matrix.Matrix(VprCostEntry)); - offset @1 :List(VprVector2D); + costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); + offset @1 :Matrix.Matrix(VprVector2D); segmentMap @2 :List(Int64); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 2b1b3b48c34..a95fa385893 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -12,14 +12,16 @@ #include "echo_files.h" #include "route_timing.h" - -#include "capnp/serialize.h" -#include "connection_map.capnp.h" -#include "ndmatrix_serdes.h" -#include "mmap_file.h" -#include "serdes_utils.h" #include "route_common.h" +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "connection_map.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif + /* we're profiling routing cost over many tracks for each wire type, so we'll * have many cost entries at each |dx|,|dy| offset. There are many ways to * "boil down" the many costs at each offset to a single entry for a given @@ -40,17 +42,17 @@ static int signum(int x) { return 0; } -typedef std::vector, Cost_Entry>> t_routing_cost_map; +typedef std::vector, std::pair, Cost_Entry, ConnectionBoxId>> t_routing_cost_map; static void run_dijkstra(int start_node_ind, - t_routing_cost_map* cost_map); + t_routing_cost_map* routing_cost_map); class CostMap { public: - void set_segment_count(size_t seg_count) { + void set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); - cost_map_.resize(seg_count); - offset_.resize(seg_count); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); @@ -68,11 +70,11 @@ class CostMap { return segment_map_[from_node_ind]; } - Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - int dx = delta_x - offset_[from_seg_index].first; - int dy = delta_y - offset_[from_seg_index].second; - const auto& cost_map = cost_map_[from_seg_index]; + int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; + int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; + const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; if (dx < 0) { dx = 0; @@ -88,12 +90,21 @@ class CostMap { dy = cost_map.dim_size(1) - 1; } - return cost_map_[from_seg_index][dx][dy]; + return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } void set_cost_map(int from_seg_index, const t_routing_cost_map& cost_map, e_representative_entry_method method) { + const auto& device_ctx = g_vpr_ctx.device(); + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + ++box_id) { + set_cost_map(from_seg_index, ConnectionBoxId(box_id), cost_map, method); + } + } + + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const t_routing_cost_map& cost_map, e_representative_entry_method method) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); // Find coordinate offset for this segment. @@ -102,15 +113,18 @@ class CostMap { int max_dx = 0; int max_dy = 0; for (const auto& entry : cost_map) { - min_dx = std::min(entry.first.first, min_dx); - min_dy = std::min(entry.first.second, min_dy); + if (std::get<3>(entry) != box_id) { + continue; + } + min_dx = std::min(std::get<1>(entry).first, min_dx); + min_dy = std::min(std::get<1>(entry).second, min_dy); - max_dx = std::max(entry.first.first, max_dx); - max_dy = std::max(entry.first.second, max_dy); + max_dx = std::max(std::get<1>(entry).first, max_dx); + max_dy = std::max(std::get<1>(entry).second, max_dy); } - offset_[from_seg_index].first = min_dx; - offset_[from_seg_index].second = min_dy; + offset_[from_seg_index][size_t(box_id)].first = min_dx; + offset_[from_seg_index][size_t(box_id)].second = min_dy; size_t dim_x = max_dx - min_dx + 1; size_t dim_y = max_dy - min_dy + 1; @@ -118,41 +132,45 @@ class CostMap { {dim_x, dim_y}); for (const auto& entry : cost_map) { - int x = entry.first.first - min_dx; - int y = entry.first.second - min_dy; + if (std::get<3>(entry) != box_id) { + continue; + } + int x = std::get<1>(entry).first - min_dx; + int y = std::get<1>(entry).second - min_dy; expansion_cost_map[x][y].add_cost_entry( - method, entry.second.delay, - entry.second.congestion); + method, std::get<2>(entry).delay, + std::get<2>(entry).congestion); } - cost_map_[from_seg_index] = vtr::NdMatrix( + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( {dim_x, dim_y}); /* set the lookahead cost map entries with a representative cost * entry from routing_cost_map */ for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + cost_map_[from_seg_index][size_t(box_id)][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); } } /* find missing cost entries and fill them in by copying a nearby cost entry */ for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + Cost_Entry cost_entry = cost_map_[from_seg_index][size_t(box_id)][ix][iy]; if (!cost_entry.valid()) { Cost_Entry copied_entry = get_nearby_cost_entry( from_seg_index, - offset_[from_seg_index].first + ix, - offset_[from_seg_index].second + iy); - cost_map_[from_seg_index][ix][iy] = copied_entry; + box_id, + offset_[from_seg_index][size_t(box_id)].first + ix, + offset_[from_seg_index][size_t(box_id)].second + iy); + cost_map_[from_seg_index][size_t(box_id)][ix][iy] = copied_entry; } } } } - Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + Cost_Entry get_nearby_cost_entry(int segment_index, ConnectionBoxId box_id, int x, int y) { /* compute the slope from x,y to 0,0 and then move towards 0,0 by one * unit to get the coordinates of the cost entry to be copied */ @@ -173,7 +191,7 @@ class CostMap { } } - Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + Cost_Entry copy_entry = find_cost(segment_index, box_id, copy_x, copy_y); /* if the entry to be copied is also empty, recurse */ if (copy_entry.valid()) { @@ -182,41 +200,31 @@ class CostMap { return Cost_Entry(); } - return get_nearby_cost_entry(segment_index, copy_x, copy_y); - } - - void print_cost_map(const std::vector& segment_inf, - const char* fname) { - FILE* fp = vtr::fopen(fname, "w"); - for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { - fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), - iseg, - offset_[iseg].first, - offset_[iseg].second); - for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { - for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { - fprintf(fp, "%.4g,\t", - cost_map_[iseg][ix][iy].delay); - } - fprintf(fp, "\n"); - } - fprintf(fp, "\n\n"); - } - - fclose(fp); + return get_nearby_cost_entry(segment_index, box_id, copy_x, copy_y); } void read(const std::string& file); void write(const std::string& file) const; private: - std::vector> cost_map_; - std::vector> offset_; + vtr::NdMatrix, 2> cost_map_; + vtr::NdMatrix, 2> offset_; std::vector segment_map_; }; static CostMap g_cost_map; +const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { + const auto& device_ctx = g_vpr_ctx.device(); + if (rr_type == CHANX) { + return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; + } else if (rr_type == CHANY) { + return device_ctx.rr_node_indices[rr_type][start_x][start_y][0]; + } else { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown channel type %d", rr_type); + } +} + class StartNode { public: StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) @@ -227,7 +235,8 @@ class StartNode { , index_(0) {} int get_next_node() { const auto& device_ctx = g_vpr_ctx.device(); - const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + const std::vector& channel_node_list = get_rr_node_indcies( + rr_type_, start_x_, start_y_); for (; index_ < channel_node_list.size(); index_++) { int node_ind = channel_node_list[index_]; @@ -273,13 +282,45 @@ static constexpr int kMinProfile = 1; // - kMaxProfile is exceeded. static constexpr int kMaxProfile = 7; +static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { + int count = 0; + int dx = 0; + int dy = 0; + + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + return count; +} + static void compute_connection_box_lookahead( const std::vector& segment_inf) { size_t num_segments = segment_inf.size(); vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); /* free previous delay map and allocate new one */ - g_cost_map.set_segment_count(segment_inf.size()); + auto& device_ctx = g_vpr_ctx.device(); + g_cost_map.set_counts(segment_inf.size(), + device_ctx.connection_boxes.num_connection_box_types()); /* run Dijkstra's algorithm for each segment type & channel type combination */ for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { @@ -289,48 +330,33 @@ static void compute_connection_box_lookahead( t_routing_cost_map cost_map; int count = 0; - - int dx = 0; - int dy = 0; - //int start_x = vtr::nint(device_ctx.grid.width()/2); - //int start_y = vtr::nint(device_ctx.grid.height()/2); - int start_x = REF_X; - int start_y = REF_Y; - while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { - for (e_rr_type chan_type : {CHANX, CHANY}) { - StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); - - for (int start_node_ind = start_node.get_next_node(); - start_node_ind != UNDEFINED; - start_node_ind = start_node.get_next_node()) { - count += 1; - - /* run Dijkstra's algorithm */ - run_dijkstra(start_node_ind, &cost_map); - } - } - - if (dy < dx) { - dy += 1; - } else { - dx += 1; - } - } + count += search_at(iseg, REF_X, REF_Y, &cost_map); + count += search_at(iseg, REF_Y, REF_X, &cost_map); + count += search_at(iseg, 1, 1, &cost_map); + count += search_at(iseg, 76, 1, &cost_map); + count += search_at(iseg, 25, 25, &cost_map); + count += search_at(iseg, 25, 27, &cost_map); + count += search_at(iseg, 75, 26, &cost_map); if (count == 0) { VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", segment_inf[iseg].name.c_str(), iseg); } +#if 0 + for(const auto & e : cost_map) { + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + std::get<0>(e).first, std::get<0>(e).second, + std::get<1>(e).first, std::get<1>(e).second, + std::get<2>(e).delay, std::get<2>(e).congestion); + } +#endif + /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ g_cost_map.set_cost_map(iseg, cost_map, REPRESENTATIVE_ENTRY_METHOD); } - - if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { - g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); - } } static float get_connection_box_lookahead_map_cost(int from_node_ind, @@ -369,24 +395,16 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, } } - if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - bool found = device_ctx.connection_boxes.find_connection_box( - to_node_ind, &box_id, &box_location, &site_pin_delay); - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); - } - - to_location = box_location; - } else { - const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); - if (!to_canonical_loc) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); - } - - to_location = *to_canonical_loc; + if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { + VPR_THROW(VPR_ERROR_ROUTE, "Not an IPIN/SINK, is %d", to_node_ind); + } + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); } const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); @@ -395,11 +413,11 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, from_node_ind, to_node_ind); } - ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); - ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); int from_seg_index = g_cost_map.node_to_segment(from_node_ind); - Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, box_id, dx, dy); float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; @@ -461,11 +479,13 @@ static void run_dijkstra(int start_node_ind, int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); - routing_cost_map->push_back(std::make_pair( + routing_cost_map->push_back(std::make_tuple( + std::make_pair(start_node_ind, node_ind), std::make_pair(delta_x, delta_y), Cost_Entry( current.delay, - current.congestion_upstream))); + current.congestion_upstream), + box_id)); } expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); @@ -496,6 +516,17 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +#ifndef VTR_ENABLE_CAPNPROTO + +void ConnectionBoxMapLookahead::read(const std::string& file) { + VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::read not implemented"); +} +void ConnectionBoxMapLookahead::write(const std::string& file) const { + VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::write not implemented"); +} + +#else + void ConnectionBoxMapLookahead::read(const std::string& file) { g_cost_map.read(file); } @@ -513,6 +544,27 @@ static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { out->setCongestion(in.congestion); } +static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { + *out = std::make_pair(in.getX(), in.getY()); +} + +static void FromVprVector2D(VprVector2D::Builder* out, const std::pair& in) { + out->setX(in.first); + out->setY(in.second); +} + +static void ToMatrixCostEntry(vtr::NdMatrix* out, + const Matrix::Reader& in) { + ToNdMatrix<2, VprCostEntry, Cost_Entry>(out, in, ToCostEntry); +} + +static void FromMatrixCostEntry( + Matrix::Builder* out, + const vtr::NdMatrix& in) { + FromNdMatrix<2, VprCostEntry, Cost_Entry>( + out, in, FromCostEntry); +} + void CostMap::read(const std::string& file) { MmapFile f(file); @@ -532,20 +584,14 @@ void CostMap::read(const std::string& file) { { const auto& offset = cost_map.getOffset(); - offset_.resize(offset.size()); - auto dst_iter = offset_.begin(); - for (const auto& src : offset) { - *dst_iter++ = std::make_pair(src.getX(), src.getY()); - } + ToNdMatrix<2, VprVector2D, std::pair>( + &offset_, offset, ToVprVector2D); } { const auto& cost_maps = cost_map.getCostMap(); - cost_map_.resize(cost_maps.size()); - auto dst_iter = cost_map_.begin(); - for (const auto& src : cost_maps) { - ToNdMatrix<2, VprCostEntry, Cost_Entry>(&(*dst_iter++), src, ToCostEntry); - } + ToNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_map_, cost_maps, ToMatrixCostEntry); } } @@ -562,22 +608,17 @@ void CostMap::write(const std::string& file) const { } { - auto offset = cost_map.initOffset(offset_.size()); - for (size_t i = 0; i < offset_.size(); ++i) { - auto elem = offset[i]; - elem.setX(offset_[i].first); - elem.setY(offset_[i].second); - } + auto offset = cost_map.initOffset(); + FromNdMatrix<2, VprVector2D, std::pair>( + &offset, offset_, FromVprVector2D); } { - auto cost_maps = cost_map.initCostMap(cost_map_.size()); - for (size_t i = 0; i < cost_map_.size(); ++i) { - Matrix::Builder elem = cost_maps[i]; - FromNdMatrix<2, VprCostEntry, Cost_Entry>( - &elem, cost_map_[i], FromCostEntry); - } + auto cost_maps = cost_map.initCostMap(); + FromNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_maps, cost_map_, FromMatrixCostEntry); } writeMessageToFile(file, &builder); } +#endif From 596c10fcaba9404cdbcc6dbe9cdb077d65c943ee Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 27 Sep 2019 11:08:58 -0700 Subject: [PATCH 04/41] Mark file location function as static. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a95fa385893..b82efe419ed 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -214,7 +214,7 @@ class CostMap { static CostMap g_cost_map; -const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { +static const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { const auto& device_ctx = g_vpr_ctx.device(); if (rr_type == CHANX) { return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; From 15b676bf95dda8364e315ab3ccae9cd0af9e4b47 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 11 Oct 2019 16:43:37 -0700 Subject: [PATCH 05/41] Add bounds checking when search grid. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index b82efe419ed..33ac7e3aa57 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -283,11 +283,20 @@ static constexpr int kMinProfile = 1; static constexpr int kMaxProfile = 7; static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { + const auto& device_ctx = g_vpr_ctx.device(); + int count = 0; int dx = 0; int dy = 0; while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + if (start_x + dx >= device_ctx.grid.width()) { + break; + } + if (start_y + dy >= device_ctx.grid.height()) { + break; + } + for (e_rr_type chan_type : {CHANX, CHANY}) { StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); From 78ef779f23111627372bd7f37377879dc7b19c0b Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 11 Oct 2019 16:28:47 -0700 Subject: [PATCH 06/41] Updates to connection box lookahead map. - New sampling method for connection box lookahead map. - New spiral hole filling algorithm - Use a lightweight version of PQ_Entry (PQ_Entry_Lite) - Use maps in run_dijkstra() - Move context of router_lookahead_map_utils inside of util namespace. Signed-off-by: Dusty DeWeese Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- .../route/connection_box_lookahead_map.cpp | 812 ++++++++++-------- vpr/src/route/connection_box_lookahead_map.h | 57 ++ vpr/src/route/router_lookahead_map_utils.cpp | 69 +- vpr/src/route/router_lookahead_map_utils.h | 32 +- 4 files changed, 601 insertions(+), 369 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 33ac7e3aa57..cff9fe2971b 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -9,6 +9,7 @@ #include "globals.h" #include "vtr_math.h" #include "vtr_time.h" +#include "vtr_geometry.h" #include "echo_files.h" #include "route_timing.h" @@ -29,348 +30,261 @@ * average, median, etc. This define selects the method we use. * * See e_representative_entry_method */ -#define REPRESENTATIVE_ENTRY_METHOD SMALLEST +#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST -#define REF_X 25 -#define REF_Y 23 +static constexpr int SAMPLE_GRID_SIZE = 4; -static int signum(int x) { - if (x > 0) return 1; - if (x < 0) - return -1; - else - return 0; -} +typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; -typedef std::vector, std::pair, Cost_Entry, ConnectionBoxId>> t_routing_cost_map; static void run_dijkstra(int start_node_ind, - t_routing_cost_map* routing_cost_map); - -class CostMap { - public: - void set_counts(size_t seg_count, size_t box_count) { - cost_map_.clear(); - offset_.clear(); - cost_map_.resize({seg_count, box_count}); - offset_.resize({seg_count, box_count}); - - const auto& device_ctx = g_vpr_ctx.device(); - segment_map_.resize(device_ctx.rr_nodes.size()); - for (size_t i = 0; i < segment_map_.size(); ++i) { - auto& from_node = device_ctx.rr_nodes[i]; + std::vector* routing_costs); +static void find_inodes_for_segment_types(std::vector* inodes_for_segment); - int from_cost_index = from_node.cost_index(); - int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; +void CostMap::set_counts(size_t seg_count, size_t box_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); - segment_map_[i] = from_seg_index; - } - } + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; - int node_to_segment(int from_node_ind) { - return segment_map_[from_node_ind]; + segment_map_[i] = from_seg_index; } +} - Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; - int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; +int CostMap::node_to_segment(int from_node_ind) const { + return segment_map_[from_node_ind]; +} - if (dx < 0) { - dx = 0; - } - if (dy < 0) { - dy = 0; - } +util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; + if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { + return util::Cost_Entry(); + } - if (dx >= (ssize_t)cost_map.dim_size(0)) { - dx = cost_map.dim_size(0) - 1; - } - if (dy >= (ssize_t)cost_map.dim_size(1)) { - dy = cost_map.dim_size(1) - 1; - } + int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; + int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; + if (dx < 0) { + dx = 0; } - - void set_cost_map(int from_seg_index, - const t_routing_cost_map& cost_map, - e_representative_entry_method method) { - const auto& device_ctx = g_vpr_ctx.device(); - for (size_t box_id = 0; - box_id < device_ctx.connection_boxes.num_connection_box_types(); - ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), cost_map, method); - } + if (dy < 0) { + dy = 0; } - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const t_routing_cost_map& cost_map, e_representative_entry_method method) { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - - // Find coordinate offset for this segment. - int min_dx = 0; - int min_dy = 0; - int max_dx = 0; - int max_dy = 0; - for (const auto& entry : cost_map) { - if (std::get<3>(entry) != box_id) { - continue; - } - min_dx = std::min(std::get<1>(entry).first, min_dx); - min_dy = std::min(std::get<1>(entry).second, min_dy); + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } - max_dx = std::max(std::get<1>(entry).first, max_dx); - max_dy = std::max(std::get<1>(entry).second, max_dy); - } + return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; +} - offset_[from_seg_index][size_t(box_id)].first = min_dx; - offset_[from_seg_index][size_t(box_id)].second = min_dy; - size_t dim_x = max_dx - min_dx + 1; - size_t dim_y = max_dy - min_dy + 1; +void CostMap::set_cost_map(int from_seg_index, + const RoutingCosts& costs, + util::e_representative_entry_method method) { + // sort the entries + const auto& device_ctx = g_vpr_ctx.device(); + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + ++box_id) { + set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs, method); + } +} - vtr::NdMatrix expansion_cost_map( - {dim_x, dim_y}); +void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - for (const auto& entry : cost_map) { - if (std::get<3>(entry) != box_id) { - continue; - } - int x = std::get<1>(entry).first - min_dx; - int y = std::get<1>(entry).second - min_dy; - expansion_cost_map[x][y].add_cost_entry( - method, std::get<2>(entry).delay, - std::get<2>(entry).congestion); + // calculate the bounding box + vtr::Rect bounds; + for (const auto& entry : costs) { + if (entry.first.box_id == box_id) { + bounds.expand_bounding_box(vtr::Rect(entry.first.delta)); } + } - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {dim_x, dim_y}); + if (bounds.empty()) { + offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( + {size_t(0), size_t(0)}); + return; + } - /* set the lookahead cost map entries with a representative cost - * entry from routing_cost_map */ - for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { - for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - cost_map_[from_seg_index][size_t(box_id)][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); - } - } + offset_[from_seg_index][size_t(box_id)] = std::make_pair(bounds.xmin(), bounds.ymin()); - /* find missing cost entries and fill them in by copying a nearby cost entry */ - for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { - for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - Cost_Entry cost_entry = cost_map_[from_seg_index][size_t(box_id)][ix][iy]; - - if (!cost_entry.valid()) { - Cost_Entry copied_entry = get_nearby_cost_entry( - from_seg_index, - box_id, - offset_[from_seg_index][size_t(box_id)].first + ix, - offset_[from_seg_index][size_t(box_id)].second + iy); - cost_map_[from_seg_index][size_t(box_id)][ix][iy] = copied_entry; - } - } + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( + {size_t(bounds.width()), size_t(bounds.height())}); + auto& matrix = cost_map_[from_seg_index][size_t(box_id)]; + + for (const auto& entry : costs) { + if (entry.first.box_id == box_id) { + int x = entry.first.delta.x() - bounds.xmin(); + int y = entry.first.delta.y() - bounds.ymin(); + matrix[x][y] = entry.second.cost_entry; } } - Cost_Entry get_nearby_cost_entry(int segment_index, ConnectionBoxId box_id, int x, int y) { - /* compute the slope from x,y to 0,0 and then move towards 0,0 by one - * unit to get the coordinates of the cost entry to be copied */ - - float slope; - int copy_x, copy_y; - if (x == 0 || y == 0) { - slope = std::numeric_limits::infinity(); - copy_x = x - signum(x); - copy_y = y - signum(y); - } else { - slope = (float)y / (float)x; - if (slope >= 1.0) { - copy_y = y - signum(y); - copy_x = vtr::nint((float)y / slope); - } else { - copy_x = x - signum(x); - copy_y = vtr::nint((float)x * slope); + std::list> missing; + bool couldnt_fill = false; + auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (!cost_entry.valid()) { + // maximum search radius + util::Cost_Entry filler = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler.valid()) { + missing.push_back(std::make_tuple(ix, iy, filler)); + } else { + couldnt_fill = true; + } } } - - Cost_Entry copy_entry = find_cost(segment_index, box_id, copy_x, copy_y); - - /* if the entry to be copied is also empty, recurse */ - if (copy_entry.valid()) { - return copy_entry; - } else if (copy_x == 0 && copy_y == 0) { - return Cost_Entry(); - } - - return get_nearby_cost_entry(segment_index, box_id, copy_x, copy_y); } - void read(const std::string& file); - void write(const std::string& file) const; - - private: - vtr::NdMatrix, 2> cost_map_; - vtr::NdMatrix, 2> offset_; - std::vector segment_map_; -}; - -static CostMap g_cost_map; - -static const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { - const auto& device_ctx = g_vpr_ctx.device(); - if (rr_type == CHANX) { - return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; - } else if (rr_type == CHANY) { - return device_ctx.rr_node_indices[rr_type][start_x][start_y][0]; - } else { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown channel type %d", rr_type); + // write back the missing entries + for (auto& xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); } -} -class StartNode { - public: - StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) - : start_x_(start_x) - , start_y_(start_y) - , rr_type_(rr_type) - , seg_index_(seg_index) - , index_(0) {} - int get_next_node() { - const auto& device_ctx = g_vpr_ctx.device(); - const std::vector& channel_node_list = get_rr_node_indcies( - rr_type_, start_x_, start_y_); - - for (; index_ < channel_node_list.size(); index_++) { - int node_ind = channel_node_list[index_]; - - if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { - continue; + if (couldnt_fill) { + VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld\n", + from_seg_index, size_t(box_id)); + for (unsigned y = 0; y < matrix.dim_size(1); y++) { + for (unsigned x = 0; x < matrix.dim_size(0); x++) { + VTR_ASSERT(!matrix[x][y].valid()); } - - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); - if (loc == nullptr) { - continue; - } - - int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); - int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; - if (node_seg_ind == seg_index_) { - index_ += 1; - return node_ind; + } +#if 0 + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + if(matrix[ix][iy].valid()) { + printf("O"); + } else { + printf("."); + } } + printf("\n"); } - - return UNDEFINED; +#endif } +} - private: - int start_x_; - int start_y_; - t_rr_type rr_type_; - int seg_index_; - size_t index_; -}; - -// Minimum size of search for channels to profile. kMinProfile results -// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. -// -// Making this value larger will increase the sample size, but also the runtime -// to produce the lookahead. -static constexpr int kMinProfile = 1; - -// Maximum size of search for channels to profile. Once search is outside of -// kMinProfile distance, lookahead will stop searching once: -// - At least one channel has been profiled -// - kMaxProfile is exceeded. -static constexpr int kMaxProfile = 7; - -static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { +void CostMap::print(int iseg) const { const auto& device_ctx = g_vpr_ctx.device(); - - int count = 0; - int dx = 0; - int dy = 0; - - while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { - if (start_x + dx >= device_ctx.grid.width()) { - break; + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + box_id++) { + auto& matrix = cost_map_[iseg][box_id]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { + printf("cost EMPTY for box_id = %lu\n", box_id); + continue; } - if (start_y + dy >= device_ctx.grid.height()) { - break; + printf("cost for box_id = %lu\n", box_id); + double sum = 0.0; + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (entry.valid()) { + sum += entry.delay; + } + } } - - for (e_rr_type chan_type : {CHANX, CHANY}) { - StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); - VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); - - for (int start_node_ind = start_node.get_next_node(); - start_node_ind != UNDEFINED; - start_node_ind = start_node.get_next_node()) { - count += 1; - - /* run Dijkstra's algorithm */ - run_dijkstra(start_node_ind, cost_map); + double avg = sum / ((double)matrix.dim_size(0) * (double)matrix.dim_size(1)); + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (!entry.valid()) { + printf("*"); + } else if (entry.delay > avg) { + printf("o"); + } else { + printf("."); + } } + printf("\n"); } + } +} - if (dy < dx) { - dy += 1; - } else { - dx += 1; +std::list> CostMap::list_empty() const { + std::list> results; + for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { + for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { + auto& matrix = cost_map_[iseg][box_id]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(iseg, box_id)); } } - - return count; + return results; } -static void compute_connection_box_lookahead( - const std::vector& segment_inf) { - size_t num_segments = segment_inf.size(); - vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); - - /* free previous delay map and allocate new one */ - auto& device_ctx = g_vpr_ctx.device(); - g_cost_map.set_counts(segment_inf.size(), - device_ctx.connection_boxes.num_connection_box_types()); +static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) { + if (src.delay < dst.delay) dst.delay = src.delay; + if (src.congestion < dst.congestion) dst.congestion = src.congestion; +} - /* run Dijkstra's algorithm for each segment type & channel type combination */ - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { - VTR_LOG("Creating cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); - /* allocate the cost map for this iseg/chan_type */ - t_routing_cost_map cost_map; - - int count = 0; - count += search_at(iseg, REF_X, REF_Y, &cost_map); - count += search_at(iseg, REF_Y, REF_X, &cost_map); - count += search_at(iseg, 1, 1, &cost_map); - count += search_at(iseg, 76, 1, &cost_map); - count += search_at(iseg, 25, 25, &cost_map); - count += search_at(iseg, 25, 27, &cost_map); - count += search_at(iseg, 75, 26, &cost_map); - - if (count == 0) { - VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", - segment_inf[iseg].name.c_str(), iseg); +util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, + int cx, + int cy, + const vtr::Rect& bounds) { + // spiral around (cx, cy) looking for a nearby entry + int n = 1, x, y; + bool in_bounds; + util::Cost_Entry entry; + + do { + in_bounds = false; + y = cy - n; // top + // left -> right + for (x = cx - n; x < cx + n; x++) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } } - -#if 0 - for(const auto & e : cost_map) { - VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - std::get<0>(e).first, std::get<0>(e).second, - std::get<1>(e).first, std::get<1>(e).second, - std::get<2>(e).delay, std::get<2>(e).congestion); + x = cx + n; // right + // top -> bottom + for (; y < cy + n; y++) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } } -#endif - - /* boil down the cost list in routing_cost_map at each coordinate to a - * representative cost entry and store it in the lookahead cost map */ - g_cost_map.set_cost_map(iseg, cost_map, - REPRESENTATIVE_ENTRY_METHOD); - } + y = cy + n; // bottom + // right -> left + for (; x > cx - n; x--) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } + } + x = cx - n; // left + // bottom -> top + for (; y > cy - n; y--) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } + } + if (entry.valid()) return entry; + n++; + } while (in_bounds); + return util::Cost_Entry(); } -static float get_connection_box_lookahead_map_cost(int from_node_ind, - int to_node_ind, - float criticality_fac) { +float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) const { if (from_node_ind == to_node_ind) { return 0.f; } @@ -383,25 +297,18 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, if (to_node_type == SINK) { const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); - if (sink_to_ipin.ipin_count > 1) { - float cost = std::numeric_limits::infinity(); - // Find cheapest cost from from_node_ind to IPINs for this SINK. - for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { - cost = std::min(cost, - get_connection_box_lookahead_map_cost( - from_node_ind, - sink_to_ipin.ipin_nodes[i], criticality_fac)); - } - - return cost; - } else if (sink_to_ipin.ipin_count == 1) { - to_node_ind = sink_to_ipin.ipin_nodes[0]; - if (from_node_ind == to_node_ind) { - return 0.f; - } - } else { - return std::numeric_limits::infinity(); + float cost = std::numeric_limits::infinity(); + + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + if (cost <= 0.f) break; } + + return cost; } if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { @@ -425,12 +332,23 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); - int from_seg_index = g_cost_map.node_to_segment(from_node_ind); - Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, box_id, dx, dy); + int from_seg_index = cost_map_.node_to_segment(from_node_ind); + util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy); + + if (!cost_entry.valid()) { + // there is no route + // VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + // from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + // to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + // (int)size_t(box_id), (int)box_location.first, (int)box_location.second); + return std::numeric_limits::infinity(); + } + float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + VTR_ASSERT(std::isfinite(expected_cost) && expected_cost >= 0.f); return expected_cost; } @@ -438,7 +356,7 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ static void run_dijkstra(int start_node_ind, - t_routing_cost_map* routing_cost_map) { + std::vector* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); /* a list of boolean flags (one for each rr node) to figure out if a @@ -447,24 +365,18 @@ static void run_dijkstra(int start_node_ind, /* for each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the * expansion queue */ - std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + std::unordered_map paths; /* a priority queue for expansion */ - std::priority_queue pq; + std::priority_queue, std::greater> pq; /* first entry has no upstream delay or congestion */ - util::PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); pq.push(first_entry); - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - start_node_ind); - } - /* now do routing */ while (!pq.empty()) { - util::PQ_Entry current = pq.top(); + auto current = pq.top(); pq.pop(); int node_ind = current.rr_node_ind; @@ -485,25 +397,155 @@ static void run_dijkstra(int start_node_ind, VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); } - int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); - int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + // reconstruct the path + std::vector path; + for (int i = node_ind; i != start_node_ind; path.push_back(i = paths[i].parent)) + ; + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + // recalculate the path with congestion + util::PQ_Entry current_full = parent_entry; + int parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + current_full = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), current_full.delay, + current_full.R_upstream, current_full.congestion_upstream, false); + parent = *it; + } - routing_cost_map->push_back(std::make_tuple( - std::make_pair(start_node_ind, node_ind), - std::make_pair(delta_x, delta_y), - Cost_Entry( - current.delay, - current.congestion_upstream), - box_id)); - } + // add each node along the path subtracting the incremental costs from the current costs + parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + parent); + } + + vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), + ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); + RoutingCostKey key = { + delta, + box_id}; + RoutingCost val = { + parent, + node_ind, + util::Cost_Entry( + current_full.delay - parent_entry.delay, + current_full.congestion_upstream - parent_entry.congestion_upstream)}; + + const auto& x = (*routing_costs)[seg_index].find(key); + + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == (*routing_costs)[seg_index].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + (*routing_costs)[seg_index][key] = val; + } - expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); - node_expanded[node_ind] = true; + parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent = *it; + } + } else { + expand_dijkstra_neighbours(current, paths, node_expanded, pq); + node_expanded[node_ind] = true; + } } } void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { - compute_connection_box_lookahead(segment_inf); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + size_t num_segments = segment_inf.size(); + std::vector inodes_for_segment(num_segments); + find_inodes_for_segment_types(&inodes_for_segment); + + /* free previous delay map and allocate new one */ + auto& device_ctx = g_vpr_ctx.device(); + cost_map_.set_counts(segment_inf.size(), + device_ctx.connection_boxes.num_connection_box_types()); + + VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); + std::vector all_costs(num_segments); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + std::vector costs(num_segments); + for (const auto& row : inodes_for_segment[iseg]) { + for (auto cell : row) { + for (auto node_ind : cell) { + run_dijkstra(node_ind, &costs); + } + } + } + + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + for (const auto& cost : costs[iseg]) { + const auto& key = cost.first; + const auto& val = cost.second; + const auto& x = all_costs[iseg].find(key); + + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == all_costs[iseg].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[iseg][key] = val; + } + } + } + } + + VTR_LOG("Combining results\n"); + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { +#if 0 + for (auto &e : cost_map_per_segment[iseg]) { + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + std::get<0>(e).first, std::get<0>(e).second, + std::get<1>(e).first, std::get<1>(e).second, + std::get<2>(e).delay, std::get<2>(e).congestion); + } +#endif + const auto& costs = all_costs[iseg]; + if (costs.empty()) { + // check that there were no start nodes + bool empty = true; + for (const auto& row : inodes_for_segment[iseg]) { + for (auto cell : row) { + if (!cell.empty()) { + empty = false; + break; + } + } + if (!empty) break; + } + if (empty) { + VTR_LOG_WARN("Segment %s(%d) found no routes\n", + segment_inf[iseg].name.c_str(), iseg); + } else { + VTR_LOG_WARN("Segment %s(%d) found no routes, even though there are some matching nodes\n", + segment_inf[iseg].name.c_str(), iseg); + } + } else { + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + cost_map_.set_cost_map(iseg, costs, + REPRESENTATIVE_ENTRY_METHOD); + } + /* + * VTR_LOG("cost map for %s(%d)\n", + * segment_inf[iseg].name.c_str(), iseg); + * cost_map_.print(iseg); + */ + } + + for (std::pair p : cost_map_.list_empty()) { + int iseg, box_id; + std::tie(iseg, box_id) = p; + VTR_LOG("cost map for %s(%d), connection box %d EMPTY\n", + segment_inf[iseg].name.c_str(), iseg, box_id); + } } float ConnectionBoxMapLookahead::get_expected_cost( @@ -516,7 +558,7 @@ float ConnectionBoxMapLookahead::get_expected_cost( t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); if (rr_type == CHANX || rr_type == CHANY) { - return get_connection_box_lookahead_map_cost( + return get_map_cost( current_node, target_node, params.criticality); } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); @@ -525,6 +567,86 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +static int manhattan_distance(const t_rr_node& node, int x, int y) { + int node_center_x = (node.xhigh() + node.xlow()) / 2; + int node_center_y = (node.yhigh() + node.ylow()) / 2; + return abs(node_center_x - x) + abs(node_center_y - y); +} + +static vtr::Rect bounding_box_for_node(const t_rr_node& node) { + return vtr::Rect(node.xlow(), node.ylow(), + node.xhigh() + 1, node.yhigh() + 1); +} + +static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_nodes = device_ctx.rr_nodes; + const int num_segments = inodes_for_segment->size(); + + // compute bounding boxes for each segment type + std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(node)); + } + + // select an inode near the center of the bounding box for each segment type + inodes_for_segment->clear(); + inodes_for_segment->resize(num_segments); + for (auto& grid : *inodes_for_segment) { + for (auto& row : grid) { + for (auto& cell : row) { + cell = std::vector(); + } + } + } + + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + auto& grid = (*inodes_for_segment)[seg_index]; + for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { + for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { + auto& stored_inodes = grid[sy][sx]; + if (stored_inodes.empty()) { + stored_inodes.push_back(i); + goto next_rr_node; + } + + auto& first_stored_node = rr_nodes[stored_inodes.front()]; + if (first_stored_node.xhigh() >= node.xhigh() && first_stored_node.xlow() <= node.xlow() && first_stored_node.yhigh() >= node.yhigh() && first_stored_node.ylow() <= node.ylow()) { + stored_inodes.push_back(i); + goto next_rr_node; + } + + vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); + int distance_new = manhattan_distance(node, target.x(), target.y()); + int distance_stored = manhattan_distance(first_stored_node, target.x(), target.y()); + if (distance_new < distance_stored) { + stored_inodes.clear(); + stored_inodes.push_back(i); + goto next_rr_node; + } + } + } + next_rr_node: + continue; + } +} + #ifndef VTR_ENABLE_CAPNPROTO void ConnectionBoxMapLookahead::read(const std::string& file) { @@ -537,18 +659,18 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { #else void ConnectionBoxMapLookahead::read(const std::string& file) { - g_cost_map.read(file); + cost_map_.read(file); } void ConnectionBoxMapLookahead::write(const std::string& file) const { - g_cost_map.write(file); + cost_map_.write(file); } -static void ToCostEntry(Cost_Entry* out, const VprCostEntry::Reader& in) { +static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { out->delay = in.getDelay(); out->congestion = in.getCongestion(); } -static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { +static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { out->setDelay(in.delay); out->setCongestion(in.congestion); } @@ -562,15 +684,15 @@ static void FromVprVector2D(VprVector2D::Builder* out, const std::pair out->setY(in.second); } -static void ToMatrixCostEntry(vtr::NdMatrix* out, +static void ToMatrixCostEntry(vtr::NdMatrix* out, const Matrix::Reader& in) { - ToNdMatrix<2, VprCostEntry, Cost_Entry>(out, in, ToCostEntry); + ToNdMatrix<2, VprCostEntry, util::Cost_Entry>(out, in, ToCostEntry); } static void FromMatrixCostEntry( Matrix::Builder* out, - const vtr::NdMatrix& in) { - FromNdMatrix<2, VprCostEntry, Cost_Entry>( + const vtr::NdMatrix& in) { + FromNdMatrix<2, VprCostEntry, util::Cost_Entry>( out, in, FromCostEntry); } @@ -599,7 +721,7 @@ void CostMap::read(const std::string& file) { { const auto& cost_maps = cost_map.getCostMap(); - ToNdMatrix<2, Matrix, vtr::NdMatrix>( + ToNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_map_, cost_maps, ToMatrixCostEntry); } } @@ -624,7 +746,7 @@ void CostMap::write(const std::string& file) const { { auto cost_maps = cost_map.initCostMap(); - FromNdMatrix<2, Matrix, vtr::NdMatrix>( + FromNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_maps, cost_map_, FromMatrixCostEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 03e1a140f0b..814a57b4d49 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -2,16 +2,73 @@ #define CONNECTION_BOX_LOOKAHEAD_H_ #include +#include #include "physical_types.h" #include "router_lookahead.h" +#include "router_lookahead_map_utils.h" +#include "connection_box.h" +#include "vtr_geometry.h" + +struct RoutingCostKey { + vtr::Point delta; + ConnectionBoxId box_id; + bool operator==(const RoutingCostKey& other) const { + return delta == other.delta && box_id == other.box_id; + } +}; +struct RoutingCost { + int from_node, to_node; + util::Cost_Entry cost_entry; +}; + +// specialization of std::hash for RoutingCostKey +namespace std { +template<> +struct hash { + typedef RoutingCostKey argument_type; + typedef std::size_t result_type; + result_type operator()(argument_type const& s) const noexcept { + result_type const h1(std::hash{}(s.delta.x())); + result_type const h2(std::hash{}(s.delta.y())); + result_type const h3(std::hash{}(size_t(s.box_id))); + return h1 ^ ((h2 ^ (h3 << 1)) << 1); + } +}; +} // namespace std + +typedef std::unordered_map RoutingCosts; + +class CostMap { + public: + void set_counts(size_t seg_count, size_t box_count); + int node_to_segment(int from_node_ind) const; + util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; + void set_cost_map(int from_seg_index, + const RoutingCosts& costs, + util::e_representative_entry_method method); + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method); + util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); + void read(const std::string& file); + void write(const std::string& file) const; + void print(int iseg) const; + std::list> list_empty() const; + + private: + vtr::NdMatrix, 2> cost_map_; + vtr::NdMatrix, 2> offset_; + std::vector segment_map_; +}; class ConnectionBoxMapLookahead : public RouterLookahead { public: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; void compute(const std::vector& segment_inf) override; void read(const std::string& file) override; void write(const std::string& file) const override; + + CostMap cost_map_; }; #endif diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index d50aa372a5f..36667cb3a29 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -59,9 +59,36 @@ util::PQ_Entry::PQ_Entry( this->cost = this->delay; } +util::PQ_Entry_Lite::PQ_Entry_Lite( + int set_rr_node_ind, + int switch_ind, + float parent_delay, + bool starting_node) { + this->rr_node_ind = set_rr_node_ind; + + auto& device_ctx = g_vpr_ctx.device(); + this->delay_cost = parent_delay; + if (!starting_node) { + float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; + float Rsw = device_ctx.rr_switch_inf[switch_ind].R; + float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); + float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); + + float T_linear = 0.f; + if (device_ctx.rr_switch_inf[switch_ind].buffered()) { + T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; + } else { /* Pass transistor */ + T_linear = Tsw + 0.5 * Rsw * Cnode; + } + + VTR_ASSERT(T_linear >= 0.); + this->delay_cost += T_linear; + } +} + /* returns cost entry with the smallest delay */ -Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const { - Cost_Entry smallest_entry; +util::Cost_Entry util::Expansion_Cost_Entry::get_smallest_entry() const { + util::Cost_Entry smallest_entry; for (auto entry : this->cost_vector) { if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) { @@ -73,7 +100,7 @@ Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const { } /* returns a cost entry that represents the average of all the recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_average_entry() const { +util::Cost_Entry util::Expansion_Cost_Entry::get_average_entry() const { float avg_delay = 0; float avg_congestion = 0; @@ -85,11 +112,11 @@ Cost_Entry Expansion_Cost_Entry::get_average_entry() const { avg_delay /= (float)this->cost_vector.size(); avg_congestion /= (float)this->cost_vector.size(); - return Cost_Entry(avg_delay, avg_congestion); + return util::Cost_Entry(avg_delay, avg_congestion); } /* returns a cost entry that represents the geomean of all the recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const { +util::Cost_Entry util::Expansion_Cost_Entry::get_geomean_entry() const { float geomean_delay = 0; float geomean_cong = 0; for (auto cost_entry : this->cost_vector) { @@ -100,19 +127,19 @@ Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const { geomean_delay = exp(geomean_delay / (float)this->cost_vector.size()); geomean_cong = exp(geomean_cong / (float)this->cost_vector.size()); - return Cost_Entry(geomean_delay, geomean_cong); + return util::Cost_Entry(geomean_delay, geomean_cong); } /* returns a cost entry that represents the medial of all recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_median_entry() const { +util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const { /* find median by binning the delays of all entries and then chosing the bin * with the largest number of entries */ int num_bins = 10; /* find entries with smallest and largest delays */ - Cost_Entry min_del_entry; - Cost_Entry max_del_entry; + util::Cost_Entry min_del_entry; + util::Cost_Entry max_del_entry; for (auto entry : this->cost_vector) { if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) { min_del_entry = entry; @@ -127,7 +154,7 @@ Cost_Entry Expansion_Cost_Entry::get_median_entry() const { float bin_size = delay_diff / (float)num_bins; /* sort the cost entries into bins */ - std::vector > entry_bins(num_bins, std::vector()); + std::vector> entry_bins(num_bins, std::vector()); for (auto entry : this->cost_vector) { float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size); @@ -150,16 +177,18 @@ Cost_Entry Expansion_Cost_Entry::get_median_entry() const { } /* get the representative delay of the largest bin */ - Cost_Entry representative_entry = entry_bins[largest_bin][0]; + util::Cost_Entry representative_entry = entry_bins[largest_bin][0]; return representative_entry; } /* iterates over the children of the specified node and selectively pushes them onto the priority queue */ -void expand_dijkstra_neighbours(util::PQ_Entry parent_entry, - std::vector& node_visited_costs, +void expand_dijkstra_neighbours(util::PQ_Entry_Lite parent_entry, + std::unordered_map& paths, std::vector& node_expanded, - std::priority_queue& pq) { + std::priority_queue, + std::greater>& pq) { auto& device_ctx = g_vpr_ctx.device(); int parent_ind = parent_entry.rr_node_ind; @@ -175,18 +204,18 @@ void expand_dijkstra_neighbours(util::PQ_Entry parent_entry, continue; } - util::PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); + util::PQ_Entry_Lite child_entry(child_node_ind, switch_ind, parent_entry.delay_cost, false); - VTR_ASSERT(child_entry.cost >= 0); + VTR_ASSERT(child_entry.delay_cost >= 0); - /* skip this child if it has been visited with smaller cost */ - if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) { + /* skip this child if it has been visited with smaller or the same cost */ + auto stored_cost = paths.find(child_node_ind); + if (stored_cost != paths.end() && stored_cost->second.cost <= child_entry.delay_cost) { continue; } /* finally, record the cost with which the child was visited and put the child entry on the queue */ - node_visited_costs[child_node_ind] = child_entry.cost; + paths[child_node_ind] = {child_entry.delay_cost, parent_ind, iedge}; pq.push(child_entry); } } diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index af37006669a..5745a2c9da7 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -19,8 +19,11 @@ #include #include #include +#include #include "vpr_types.h" +namespace util { + /* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single * representative entry, this enum is passed-in to specify how that representative entry should be * calculated */ @@ -115,7 +118,6 @@ class Expansion_Cost_Entry { } }; -namespace util { /* a class that represents an entry in the Dijkstra expansion priority queue */ class PQ_Entry { public: @@ -134,11 +136,33 @@ class PQ_Entry { return (this->cost > obj.cost); } }; + +// A version of PQ_Entry that only calculates and stores the delay (cost.) +class PQ_Entry_Lite { + public: + int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents + float delay_cost; //the cost of the path to get to this node + + PQ_Entry_Lite(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, bool starting_node); + + bool operator>(const PQ_Entry_Lite& obj) const { + return (this->delay_cost > obj.delay_cost); + } +}; + +struct Search_Path { + float cost; + int parent; + int edge; +}; + } // namespace util -void expand_dijkstra_neighbours(util::PQ_Entry parent_entry, - std::vector& node_visited_costs, +void expand_dijkstra_neighbours(util::PQ_Entry_Lite parent_entry, + std::unordered_map& paths, std::vector& node_expanded, - std::priority_queue& pq); + std::priority_queue, + std::greater>& pq); #endif From b351679dbc1477731506414edea7dd526e5f6cd7 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 13:25:41 -0700 Subject: [PATCH 07/41] add documentation Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 79 ++++++++++++------- vpr/src/route/connection_box_lookahead_map.h | 15 +++- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index cff9fe2971b..fc769412c3e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -29,9 +29,12 @@ * (wire type, chan_type) combination we can take the smallest cost, the * average, median, etc. This define selects the method we use. * + * NOTE: Currently, only SMALLEST is supported. + * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST +/* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ static constexpr int SAMPLE_GRID_SIZE = 4; typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; @@ -40,6 +43,7 @@ static void run_dijkstra(int start_node_ind, std::vector* routing_costs); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +// resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); @@ -58,10 +62,12 @@ void CostMap::set_counts(size_t seg_count, size_t box_count) { } } +// cached node -> segment map int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; } +// get a cost entry for a segment type, connection box type, and offset util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; @@ -89,6 +95,7 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } +// set all cost maps for the segment type void CostMap::set_cost_map(int from_seg_index, const RoutingCosts& costs, util::e_representative_entry_method method) { @@ -101,6 +108,7 @@ void CostMap::set_cost_map(int from_seg_index, } } +// set the cost map for a segment type and connection box type, filling holes void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); @@ -113,6 +121,7 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } if (bounds.empty()) { + // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( {size_t(0), size_t(0)}); @@ -133,10 +142,10 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } } + // find missing cost entries and fill them in by copying a nearby cost entry std::list> missing; bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); - /* find missing cost entries and fill them in by copying a nearby cost entry */ for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { util::Cost_Entry& cost_entry = matrix[ix][iy]; @@ -165,21 +174,13 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou VTR_ASSERT(!matrix[x][y].valid()); } } -#if 0 - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - if(matrix[ix][iy].valid()) { - printf("O"); - } else { - printf("."); - } - } - printf("\n"); - } -#endif } } +// prints an ASCII diagram of each cost map for a segment type (debug) +// o => above average +// . => at or below average +// * => invalid (missing) void CostMap::print(int iseg) const { const auto& device_ctx = g_vpr_ctx.device(); for (size_t box_id = 0; @@ -217,6 +218,7 @@ void CostMap::print(int iseg) const { } } +// list segment type and connection box type pairs that have empty cost maps (debug) std::list> CostMap::list_empty() const { std::list> results; for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { @@ -233,6 +235,7 @@ static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) if (src.congestion < dst.congestion) dst.congestion = src.congestion; } +// find the minimum cost entry from the nearest manhattan distance neighbor util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, @@ -282,6 +285,7 @@ util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix %d (%s, %d, (%d, %d))\n", - // from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - // to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), - // (int)size_t(box_id), (int)box_location.first, (int)box_location.second); +#if 0 + // useful for debugging but can be really noisy + VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + (int)size_t(box_id), (int)box_location.first, (int)box_location.second); +#endif return std::numeric_limits::infinity(); } @@ -362,9 +369,10 @@ static void run_dijkstra(int start_node_ind, /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ std::vector node_expanded(device_ctx.rr_nodes.size(), false); - /* for each node keep a list of the cost with which that node has been + /* For each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the - * expansion queue */ + * expansion queue. + * Also store the parent node so we can reconstruct a specific path. */ std::unordered_map paths; /* a priority queue for expansion */ std::priority_queue, std::greater> pq; @@ -454,6 +462,7 @@ static void run_dijkstra(int start_node_ind, } } +// compute the cost maps for lookahead void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); @@ -473,7 +482,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("Creating cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); - /* allocate the cost map for this iseg/chan_type */ + std::vector costs(num_segments); for (const auto& row : inodes_for_segment[iseg]) { for (auto cell : row) { @@ -483,6 +492,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } + // combine the cost map from this run with the final cost maps for each segment for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { for (const auto& cost : costs[iseg]) { const auto& key = cost.first; @@ -500,11 +510,13 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen VTR_LOG("Combining results\n"); for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { #if 0 - for (auto &e : cost_map_per_segment[iseg]) { + for (auto &cost : all_costs[iseg]) { + const auto& key = cost.first; + const auto& val = cost.second; VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - std::get<0>(e).first, std::get<0>(e).second, - std::get<1>(e).first, std::get<1>(e).second, - std::get<2>(e).delay, std::get<2>(e).congestion); + val.from_node, val.to_node, + key.delta.x(), key.delta.y(), + val.cost_entry.delay, val.cost_entry.congestion); } #endif const auto& costs = all_costs[iseg]; @@ -533,21 +545,25 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen cost_map_.set_cost_map(iseg, costs, REPRESENTATIVE_ENTRY_METHOD); } - /* - * VTR_LOG("cost map for %s(%d)\n", - * segment_inf[iseg].name.c_str(), iseg); - * cost_map_.print(iseg); - */ + +#if 0 + VTR_LOG("cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + cost_map_.print(iseg); +#endif } +#if 0 for (std::pair p : cost_map_.list_empty()) { int iseg, box_id; std::tie(iseg, box_id) = p; VTR_LOG("cost map for %s(%d), connection box %d EMPTY\n", segment_inf[iseg].name.c_str(), iseg, box_id); } +#endif } +// get an expected minimum cost for routing from the current node to the target node float ConnectionBoxMapLookahead::get_expected_cost( int current_node, int target_node, @@ -567,6 +583,7 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +// also known as the L1 norm static int manhattan_distance(const t_rr_node& node, int x, int y) { int node_center_x = (node.xhigh() + node.xlow()) / 2; int node_center_y = (node.yhigh() + node.ylow()) / 2; @@ -578,6 +595,8 @@ static vtr::Rect bounding_box_for_node(const t_rr_node& node) { node.xhigh() + 1, node.yhigh() + 1); } +// for each segment type, find the nearest nodes to an equally spaced grid of points +// within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; @@ -642,6 +661,8 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se } } } + + // to break out from the inner loop next_rr_node: continue; } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 814a57b4d49..200e7d79980 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -9,19 +9,29 @@ #include "connection_box.h" #include "vtr_geometry.h" +// Keys in the RoutingCosts map struct RoutingCostKey { + // offset of the destination connection box from the starting segment vtr::Point delta; + + // type of the destination connection box ConnectionBoxId box_id; + bool operator==(const RoutingCostKey& other) const { return delta == other.delta && box_id == other.box_id; } }; + +// Data in the RoutingCosts map struct RoutingCost { + // source and destination node indices int from_node, to_node; + + // cost entry for the route util::Cost_Entry cost_entry; }; -// specialization of std::hash for RoutingCostKey +// Specialization of std::hash for RoutingCostKey namespace std { template<> struct hash { @@ -36,8 +46,10 @@ struct hash { }; } // namespace std +// Map used to store intermediate routing costs typedef std::unordered_map RoutingCosts; +// Dense cost maps per source segment and destination connection box types class CostMap { public: void set_counts(size_t seg_count, size_t box_count); @@ -59,6 +71,7 @@ class CostMap { std::vector segment_map_; }; +// Implementation of RouterLookahead based on source segment and destination connection box types class ConnectionBoxMapLookahead : public RouterLookahead { public: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; From 3c1a1906dbd92eebd90b6c0a92327b02c8812b07 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 15:04:39 -0700 Subject: [PATCH 08/41] un-shadow a variable Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fc769412c3e..1448f17fbc6 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -493,15 +493,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } // combine the cost map from this run with the final cost maps for each segment - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { - for (const auto& cost : costs[iseg]) { + for (int i = 0; i < (ssize_t)num_segments; i++) { + for (const auto& cost : costs[i]) { const auto& key = cost.first; const auto& val = cost.second; - const auto& x = all_costs[iseg].find(key); + const auto& x = all_costs[i].find(key); // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs[iseg].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[iseg][key] = val; + if (x == all_costs[i].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[i][key] = val; } } } From 5d19549d8e807a31310b16ee70865166b5779352 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 17:00:44 -0700 Subject: [PATCH 09/41] remove method from set_cost_map Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 10 ++++------ vpr/src/route/connection_box_lookahead_map.h | 5 ++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 1448f17fbc6..d6aa98ca656 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -97,19 +97,18 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, // set all cost maps for the segment type void CostMap::set_cost_map(int from_seg_index, - const RoutingCosts& costs, - util::e_representative_entry_method method) { + const RoutingCosts& costs) { // sort the entries const auto& device_ctx = g_vpr_ctx.device(); for (size_t box_id = 0; box_id < device_ctx.connection_boxes.num_connection_box_types(); ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs, method); + set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs); } } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { +void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); // calculate the bounding box @@ -542,8 +541,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } else { /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(iseg, costs, - REPRESENTATIVE_ENTRY_METHOD); + cost_map_.set_cost_map(iseg, costs); } #if 0 diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 200e7d79980..eb1c0c0756f 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -56,9 +56,8 @@ class CostMap { int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; void set_cost_map(int from_seg_index, - const RoutingCosts& costs, - util::e_representative_entry_method method); - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method); + const RoutingCosts& costs); + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs); util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; From 39af170d54816cc5351ba9944f2ebd6ace3fd4cf Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 24 Oct 2019 12:51:01 -0700 Subject: [PATCH 10/41] replace std::list with std::vector Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 6 +++--- vpr/src/route/connection_box_lookahead_map.h | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d6aa98ca656..99b294de6ca 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -142,7 +142,7 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } // find missing cost entries and fill them in by copying a nearby cost entry - std::list> missing; + std::vector> missing; bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { @@ -218,8 +218,8 @@ void CostMap::print(int iseg) const { } // list segment type and connection box type pairs that have empty cost maps (debug) -std::list> CostMap::list_empty() const { - std::list> results; +std::vector> CostMap::list_empty() const { + std::vector> results; for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { auto& matrix = cost_map_[iseg][box_id]; diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index eb1c0c0756f..411394c89d5 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -2,7 +2,6 @@ #define CONNECTION_BOX_LOOKAHEAD_H_ #include -#include #include "physical_types.h" #include "router_lookahead.h" #include "router_lookahead_map_utils.h" @@ -62,7 +61,7 @@ class CostMap { void read(const std::string& file); void write(const std::string& file) const; void print(int iseg) const; - std::list> list_empty() const; + std::vector> list_empty() const; private: vtr::NdMatrix, 2> cost_map_; From aec3721dc6c1eddb7f9fb691a7b4882295952dfc Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 09:25:19 -0700 Subject: [PATCH 11/41] parallelize connection box lookahead Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 99b294de6ca..717db0771b6 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -23,6 +23,11 @@ # include "serdes_utils.h" #endif +#if defined(VPR_USE_TBB) +# include +# include +#endif + /* we're profiling routing cost over many tracks for each wire type, so we'll * have many cost entries at each |dx|,|dy| offset. There are many ways to * "boil down" the many costs at each offset to a single entry for a given @@ -478,7 +483,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen std::vector all_costs(num_segments); /* run Dijkstra's algorithm for each segment type & channel type combination */ +#if defined(VPR_USE_TBB) + tbb::mutex all_costs_mutex; + tbb::parallel_for(size_t(0), size_t(num_segments), [&](int iseg) { +#else for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { +#endif VTR_LOG("Creating cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); @@ -491,6 +501,10 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } +#if defined(VPR_USE_TBB) + all_costs_mutex.lock(); +#endif + // combine the cost map from this run with the final cost maps for each segment for (int i = 0; i < (ssize_t)num_segments; i++) { for (const auto& cost : costs[i]) { @@ -504,7 +518,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } } + +#if defined(VPR_USE_TBB) + all_costs_mutex.unlock(); +#endif +#if !defined(VPR_USE_TBB) } +#else + }); +#endif VTR_LOG("Combining results\n"); for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { From 31b2f473c2b682419178bcef7e06ba00adb8250a Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 24 Oct 2019 18:46:28 -0700 Subject: [PATCH 12/41] performance improvements Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 528 +++++++++++------- vpr/src/route/connection_box_lookahead_map.h | 74 ++- 2 files changed, 386 insertions(+), 216 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 717db0771b6..fa34c205b43 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -24,7 +24,7 @@ #endif #if defined(VPR_USE_TBB) -# include +# include # include #endif @@ -38,22 +38,93 @@ * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST +// #define FILL_LIMIT 30 /* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ -static constexpr int SAMPLE_GRID_SIZE = 4; +static constexpr int SAMPLE_GRID_SIZE = 3; +static constexpr float COST_LIMIT = std::numeric_limits::infinity(); +static constexpr int DIJKSTRA_CACHE_SIZE = 64; +static constexpr int DIJKSTRA_CACHE_WINDOW = 3; +static constexpr bool BREAK_ON_MISS = false; +static constexpr float PENALTY_FACTOR = 1.f; + +struct SamplePoint { + uint64_t order; + vtr::Point location; + std::vector samples; + SamplePoint() + : location(0, 0) {} +}; + +struct SampleGrid { + SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; +}; + +template +class SimpleCache { + public: + SimpleCache() + : pos(0) + , hits(0) + , misses(0) {} + bool get(K key, V* value) { + for (int i = 0; i < N; i++) { + auto& k = keys[i]; + if (k == key) { + auto& v = values[i]; +#if 0 + // preserve the found key by pushing it back + int last = (pos + N - 1) % N; + std::swap(k, keys[last]); + std::swap(v, values[last]); +#endif + *value = v; + hits++; + return true; + } + } + misses++; + return false; + } + void insert(K key, V val) { + keys[pos] = key; + values[pos] = val; + pos = (pos + 1) % N; + } + float hit_ratio() { + return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; + } + float miss_ratio() { + return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; + } -typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; + private: + std::array keys; // keep keys together for faster scanning + std::array values; + size_t pos; + uint64_t hits; + uint64_t misses; +}; -static void run_dijkstra(int start_node_ind, - std::vector* routing_costs); +static float run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + SimpleCache* cache, + float max_cost); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +// also known as the L1 norm +static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { + return abs(b.x() - a.x()) + abs(b.y() - a.y()); +} + // resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); cost_map_.resize({seg_count, box_count}); offset_.resize({seg_count, box_count}); + seg_count_ = seg_count; + box_count_ = box_count; const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); @@ -100,82 +171,118 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } -// set all cost maps for the segment type -void CostMap::set_cost_map(int from_seg_index, - const RoutingCosts& costs) { - // sort the entries - const auto& device_ctx = g_vpr_ctx.device(); - for (size_t box_id = 0; - box_id < device_ctx.connection_boxes.num_connection_box_types(); - ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs); - } +static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, + entry.congestion); } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs) { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - - // calculate the bounding box - vtr::Rect bounds; +void CostMap::set_cost_map(const RoutingCosts& costs) { + // calculate the bounding boxes + vtr::Matrix> bounds({seg_count_, box_count_}); for (const auto& entry : costs) { - if (entry.first.box_id == box_id) { - bounds.expand_bounding_box(vtr::Rect(entry.first.delta)); + bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + + // store bounds + for (size_t seg = 0; seg < seg_count_; seg++) { + for (size_t box = 0; box < box_count_; box++) { + const auto& seg_box_bounds = bounds[seg][box]; + if (seg_box_bounds.empty()) { + // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. + offset_[seg][box] = std::make_pair(0, 0); + cost_map_[seg][box] = vtr::NdMatrix( + {size_t(0), size_t(0)}); + continue; + } else { + offset_[seg][box] = std::make_pair(seg_box_bounds.xmin(), seg_box_bounds.ymin()); + cost_map_[seg][box] = vtr::NdMatrix( + {size_t(seg_box_bounds.width()), size_t(seg_box_bounds.height())}); + } } } - if (bounds.empty()) { - // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. - offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {size_t(0), size_t(0)}); - return; - } - - offset_[from_seg_index][size_t(box_id)] = std::make_pair(bounds.xmin(), bounds.ymin()); - - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {size_t(bounds.width()), size_t(bounds.height())}); - auto& matrix = cost_map_[from_seg_index][size_t(box_id)]; - + // store entries into the matrices for (const auto& entry : costs) { - if (entry.first.box_id == box_id) { - int x = entry.first.delta.x() - bounds.xmin(); - int y = entry.first.delta.y() - bounds.ymin(); - matrix[x][y] = entry.second.cost_entry; - } - } + int seg = entry.first.seg_index; + int box = size_t(entry.first.box_id); + const auto& seg_box_bounds = bounds[seg][box]; + int x = entry.first.delta.x() - seg_box_bounds.xmin(); + int y = entry.first.delta.y() - seg_box_bounds.ymin(); + cost_map_[seg][box][x][y] = entry.second.cost_entry; + } + + // fill the holes + for (size_t seg = 0; seg < seg_count_; seg++) { + for (size_t box = 0; box < box_count_; box++) { + const auto& seg_box_bounds = bounds[seg][box]; + if (seg_box_bounds.empty()) { + continue; + } + auto& matrix = cost_map_[seg][box]; - // find missing cost entries and fill them in by copying a nearby cost entry - std::vector> missing; - bool couldnt_fill = false; - auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); - for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - util::Cost_Entry& cost_entry = matrix[ix][iy]; - if (!cost_entry.valid()) { - // maximum search radius - util::Cost_Entry filler = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); - if (filler.valid()) { - missing.push_back(std::make_tuple(ix, iy, filler)); - } else { - couldnt_fill = true; + // calculate delay penalty + float min_delay = 0.f, max_delay = 0.f; + vtr::Point min_location(0, 0), max_location(0, 0); + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (cost_entry.valid()) { + if (cost_entry.delay < min_delay) { + min_delay = cost_entry.delay; + min_location = vtr::Point(ix, iy); + } + if (cost_entry.delay > max_delay) { + max_delay = cost_entry.delay; + max_location = vtr::Point(ix, iy); + } + } + } + } + float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + + // find missing cost entries and fill them in by copying a nearby cost entry + std::vector> missing; + bool couldnt_fill = false; + auto shifted_bounds = vtr::Rect(0, 0, seg_box_bounds.width(), seg_box_bounds.height()); + int max_fill = 0; + for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (!cost_entry.valid()) { + // maximum search radius + util::Cost_Entry filler; + int distance; + std::tie(filler, distance) = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler.valid()) { + missing.push_back(std::make_tuple(ix, iy, penalize(filler, distance, delay_penalty))); + max_fill = std::max(max_fill, distance); + } else { + couldnt_fill = true; + } + } } } - } - } - // write back the missing entries - for (auto& xy_entry : missing) { - matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); - } + if (!couldnt_fill) { + VTR_LOG("At %d -> %d: max_fill = %d, delay_penalty = %e\n", seg, box, max_fill, delay_penalty); + } - if (couldnt_fill) { - VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld\n", - from_seg_index, size_t(box_id)); - for (unsigned y = 0; y < matrix.dim_size(1); y++) { - for (unsigned x = 0; x < matrix.dim_size(0); x++) { - VTR_ASSERT(!matrix[x][y].valid()); + // write back the missing entries + for (auto& xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); + } + + if (couldnt_fill) { + VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", + seg, box, seg_box_bounds.width(), seg_box_bounds.height()); +#if !defined(FILL_LIMIT) + for (unsigned y = 0; y < matrix.dim_size(1); y++) { + for (unsigned x = 0; x < matrix.dim_size(0); x++) { + VTR_ASSERT(!matrix[x][y].valid()); + } + } +#endif } } } @@ -211,10 +318,14 @@ void CostMap::print(int iseg) const { const auto& entry = matrix[ix][iy]; if (!entry.valid()) { printf("*"); + } else if (entry.delay * 4 > avg * 5) { + printf("O"); } else if (entry.delay > avg) { printf("o"); - } else { + } else if (entry.delay * 4 > avg * 3) { printf("."); + } else { + printf(" "); } } printf("\n"); @@ -240,53 +351,40 @@ static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) } // find the minimum cost entry from the nearest manhattan distance neighbor -util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, - int cx, - int cy, - const vtr::Rect& bounds) { +std::pair CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, + int cx, + int cy, + const vtr::Rect& bounds) { // spiral around (cx, cy) looking for a nearby entry - int n = 1, x, y; + int n = 1; bool in_bounds; util::Cost_Entry entry; do { in_bounds = false; - y = cy - n; // top - // left -> right - for (x = cx - n; x < cx + n; x++) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); + for (int ox = -n; ox <= n; ox++) { + int x = cx + ox; + int oy = n - abs(ox); + int yp = cy + oy; + int yn = cy - oy; + if (bounds.contains(vtr::Point(x, yp))) { + assign_min_entry(entry, matrix[x][yp]); in_bounds = true; } - } - x = cx + n; // right - // top -> bottom - for (; y < cy + n; y++) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); - in_bounds = true; - } - } - y = cy + n; // bottom - // right -> left - for (; x > cx - n; x--) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); + if (bounds.contains(vtr::Point(x, yn))) { + assign_min_entry(entry, matrix[x][yn]); in_bounds = true; } } - x = cx - n; // left - // bottom -> top - for (; y > cy - n; y--) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); - in_bounds = true; - } - } - if (entry.valid()) return entry; + if (entry.valid()) return std::make_pair(entry, n); n++; +#if defined(FILL_LIMIT) + if (n > FILL_LIMIT) { + break; + } +#endif } while (in_bounds); - return util::Cost_Entry(); + return std::make_pair(util::Cost_Entry(), n); } // derive a cost from the map between two nodes @@ -366,8 +464,10 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ -static void run_dijkstra(int start_node_ind, - std::vector* routing_costs) { +static float run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + SimpleCache* cache, + float cost_limit) { auto& device_ctx = g_vpr_ctx.device(); /* a list of boolean flags (one for each rr node) to figure out if a @@ -384,6 +484,8 @@ static void run_dijkstra(int start_node_ind, /* first entry has no upstream delay or congestion */ util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); + float max_cost = 0.f; + pq.push(first_entry); /* now do routing */ @@ -439,8 +541,10 @@ static void run_dijkstra(int start_node_ind, vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); RoutingCostKey key = { - delta, - box_id}; + seg_index, + box_id, + delta}; + CompressedRoutingCostKey compressed_key(key); RoutingCost val = { parent, node_ind, @@ -448,11 +552,41 @@ static void run_dijkstra(int start_node_ind, current_full.delay - parent_entry.delay, current_full.congestion_upstream - parent_entry.congestion_upstream)}; - const auto& x = (*routing_costs)[seg_index].find(key); - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == (*routing_costs)[seg_index].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - (*routing_costs)[seg_index][key] = val; + float cost = 0.f; + bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; + if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { + // the sample was not cheaper than the cached sample + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + } else { + const auto& x = routing_costs->find(key); + if (x != routing_costs->end()) { + if (x->second.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } else { + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + if (in_window) { + cache->insert(compressed_key, x->second.cost_entry.delay); + } + } + } else { + // this sample is new + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } } parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, @@ -463,7 +597,13 @@ static void run_dijkstra(int start_node_ind, expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; } + + max_cost = std::max(max_cost, current.delay_cost); + if (max_cost > cost_limit) { + break; + } } + return max_cost; } // compute the cost maps for lookahead @@ -471,8 +611,30 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); size_t num_segments = segment_inf.size(); - std::vector inodes_for_segment(num_segments); - find_inodes_for_segment_types(&inodes_for_segment); + std::vector sample_points; + { + std::vector inodes_for_segment(num_segments); + find_inodes_for_segment_types(&inodes_for_segment); + + // collapse into a vector + for (auto& grid : inodes_for_segment) { + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + auto& point = grid.point[y][x]; + if (!point.samples.empty()) { + point.order = point.samples[0]; + sample_points.push_back(point); + } + } + } + } + } + + // sort by VPR coordinate + std::sort(sample_points.begin(), sample_points.end(), + [](const SamplePoint& a, const SamplePoint& b) { + return a.order < b.order; + }); /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); @@ -480,98 +642,72 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen device_ctx.connection_boxes.num_connection_box_types()); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); - std::vector all_costs(num_segments); + RoutingCosts all_costs; /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) tbb::mutex all_costs_mutex; - tbb::parallel_for(size_t(0), size_t(num_segments), [&](int iseg) { + tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) { #else - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + for (const auto& point : sample_points) { #endif - VTR_LOG("Creating cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); - - std::vector costs(num_segments); - for (const auto& row : inodes_for_segment[iseg]) { - for (auto cell : row) { - for (auto node_ind : cell) { - run_dijkstra(node_ind, &costs); - } - } + float max_cost = 0.f; + RoutingCosts costs; + SimpleCache cache; + for (auto node_ind : point.samples) { + max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif + VTR_LOG("Expanded sample point (%d, %d) %e miss %g\n", + point.location.x(), point.location.y(), max_cost, cache.miss_ratio()); + // combine the cost map from this run with the final cost maps for each segment - for (int i = 0; i < (ssize_t)num_segments; i++) { - for (const auto& cost : costs[i]) { - const auto& key = cost.first; - const auto& val = cost.second; - const auto& x = all_costs[i].find(key); + for (const auto& cost : costs) { + const auto& key = cost.first; + const auto& val = cost.second; + const auto& x = all_costs.find(key); - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs[i].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[i][key] = val; - } + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == all_costs.end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[key] = val; } } #if defined(VPR_USE_TBB) all_costs_mutex.unlock(); -#endif -#if !defined(VPR_USE_TBB) - } -#else }); +#else + } #endif VTR_LOG("Combining results\n"); - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + cost_map_.set_cost_map(all_costs); + +// diagnostics #if 0 - for (auto &cost : all_costs[iseg]) { - const auto& key = cost.first; - const auto& val = cost.second; - VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - val.from_node, val.to_node, - key.delta.x(), key.delta.y(), - val.cost_entry.delay, val.cost_entry.congestion); - } + for (auto &cost : all_costs) { + const auto& key = cost.first; + const auto& val = cost.second; + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + val.from_node, val.to_node, + key.delta.x(), key.delta.y(), + val.cost_entry.delay, val.cost_entry.congestion); + } #endif - const auto& costs = all_costs[iseg]; - if (costs.empty()) { - // check that there were no start nodes - bool empty = true; - for (const auto& row : inodes_for_segment[iseg]) { - for (auto cell : row) { - if (!cell.empty()) { - empty = false; - break; - } - } - if (!empty) break; - } - if (empty) { - VTR_LOG_WARN("Segment %s(%d) found no routes\n", - segment_inf[iseg].name.c_str(), iseg); - } else { - VTR_LOG_WARN("Segment %s(%d) found no routes, even though there are some matching nodes\n", - segment_inf[iseg].name.c_str(), iseg); - } - } else { - /* boil down the cost list in routing_cost_map at each coordinate to a - * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(iseg, costs); - } -#if 0 +#if 1 + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); + segment_inf[iseg].name.c_str(), iseg); cost_map_.print(iseg); -#endif } +#endif #if 0 for (std::pair p : cost_map_.list_empty()) { @@ -603,18 +739,17 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } -// also known as the L1 norm -static int manhattan_distance(const t_rr_node& node, int x, int y) { - int node_center_x = (node.xhigh() + node.xlow()) / 2; - int node_center_y = (node.yhigh() + node.ylow()) / 2; - return abs(node_center_x - x) + abs(node_center_y - y); -} - static vtr::Rect bounding_box_for_node(const t_rr_node& node) { return vtr::Rect(node.xlow(), node.ylow(), node.xhigh() + 1, node.yhigh() + 1); } +static vtr::Point point_for_node(const t_rr_node& node) { + int x = (node.xhigh() + node.xlow()) / 2; + int y = (node.yhigh() + node.ylow()) / 2; + return vtr::Point(x, y); +} + // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { @@ -639,15 +774,16 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); for (auto& grid : *inodes_for_segment) { - for (auto& row : grid) { - for (auto& cell : row) { - cell = std::vector(); + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + grid.point[y][x].samples = std::vector(); } } } for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; + vtr::Rect node_bounds = bounding_box_for_node(node); if (node.type() != CHANX && node.type() != CHANY) continue; if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; @@ -659,24 +795,24 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& grid = (*inodes_for_segment)[seg_index]; for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - auto& stored_inodes = grid[sy][sx]; - if (stored_inodes.empty()) { - stored_inodes.push_back(i); + auto& point = grid.point[sy][sx]; + if (point.samples.empty()) { + point.samples.push_back(i); + point.location = vtr::Point(node.xlow(), node.ylow()); goto next_rr_node; } - auto& first_stored_node = rr_nodes[stored_inodes.front()]; - if (first_stored_node.xhigh() >= node.xhigh() && first_stored_node.xlow() <= node.xlow() && first_stored_node.yhigh() >= node.yhigh() && first_stored_node.ylow() <= node.ylow()) { - stored_inodes.push_back(i); + if (node_bounds.contains(point.location)) { + point.samples.push_back(i); goto next_rr_node; } vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); - int distance_new = manhattan_distance(node, target.x(), target.y()); - int distance_stored = manhattan_distance(first_stored_node, target.x(), target.y()); + int distance_new = manhattan_distance(point_for_node(node), target); + int distance_stored = manhattan_distance(point.location, target); if (distance_new < distance_stored) { - stored_inodes.clear(); - stored_inodes.push_back(i); + point.samples.clear(); + point.samples.push_back(i); goto next_rr_node; } } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 411394c89d5..3f9ed424924 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -10,14 +10,48 @@ // Keys in the RoutingCosts map struct RoutingCostKey { - // offset of the destination connection box from the starting segment - vtr::Point delta; + // segment type index + int seg_index; // type of the destination connection box ConnectionBoxId box_id; + // offset of the destination connection box from the starting segment + vtr::Point delta; + + RoutingCostKey() + : seg_index(-1) + , delta(0, 0) {} + RoutingCostKey(int seg_index_arg, ConnectionBoxId box_id_arg, vtr::Point delta_arg) + : seg_index(seg_index_arg) + , box_id(box_id_arg) + , delta(delta_arg) {} + bool operator==(const RoutingCostKey& other) const { - return delta == other.delta && box_id == other.box_id; + return seg_index == other.seg_index && box_id == other.box_id && delta == other.delta; + } +}; + +// compressed version of RoutingCostKey +// TODO add bounds checks +struct CompressedRoutingCostKey { + uint32_t data; + + CompressedRoutingCostKey() { + data = -1; + } + CompressedRoutingCostKey(const RoutingCostKey& key) { + data = key.seg_index & 0xff; + data <<= 8; + data |= size_t(key.box_id) & 0xff; + data <<= 8; + data |= key.delta.x() & 0xff; + data <<= 8; + data |= key.delta.y() & 0xff; + } + + bool operator==(CompressedRoutingCostKey other) const { + return data == other.data; } }; @@ -30,23 +64,23 @@ struct RoutingCost { util::Cost_Entry cost_entry; }; -// Specialization of std::hash for RoutingCostKey -namespace std { -template<> -struct hash { - typedef RoutingCostKey argument_type; - typedef std::size_t result_type; - result_type operator()(argument_type const& s) const noexcept { - result_type const h1(std::hash{}(s.delta.x())); - result_type const h2(std::hash{}(s.delta.y())); - result_type const h3(std::hash{}(size_t(s.box_id))); - return h1 ^ ((h2 ^ (h3 << 1)) << 1); +// hash implementation for RoutingCostKey +struct HashRoutingCostKey { + std::size_t operator()(RoutingCostKey const& key) const noexcept { + uint64_t data; + data = key.seg_index & 0xffff; + data <<= 16; + data |= size_t(key.box_id) & 0xffff; + data <<= 16; + data |= key.delta.x() & 0xffff; + data <<= 16; + data |= key.delta.y() & 0xffff; + return std::hash{}(data); } }; -} // namespace std // Map used to store intermediate routing costs -typedef std::unordered_map RoutingCosts; +typedef std::unordered_map RoutingCosts; // Dense cost maps per source segment and destination connection box types class CostMap { @@ -54,10 +88,8 @@ class CostMap { void set_counts(size_t seg_count, size_t box_count); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; - void set_cost_map(int from_seg_index, - const RoutingCosts& costs); - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs); - util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); + void set_cost_map(const RoutingCosts& costs); + std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; void print(int iseg) const; @@ -67,6 +99,8 @@ class CostMap { vtr::NdMatrix, 2> cost_map_; vtr::NdMatrix, 2> offset_; std::vector segment_map_; + size_t seg_count_; + size_t box_count_; }; // Implementation of RouterLookahead based on source segment and destination connection box types From 9d1aaa9f35a681c6f8306f817a07faccff721fad Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 30 Oct 2019 16:25:13 -0700 Subject: [PATCH 13/41] suggested changes Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fa34c205b43..f00dddfb36e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -345,9 +345,11 @@ std::vector> CostMap::list_empty() const { return results; } -static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) { - if (src.delay < dst.delay) dst.delay = src.delay; - if (src.congestion < dst.congestion) dst.congestion = src.congestion; +static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { + if (src.delay < dst->delay) { + dst->delay = src.delay; + dst->congestion = src.congestion; + } } // find the minimum cost entry from the nearest manhattan distance neighbor @@ -368,11 +370,11 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat int yp = cy + oy; int yn = cy - oy; if (bounds.contains(vtr::Point(x, yp))) { - assign_min_entry(entry, matrix[x][yp]); + assign_min_entry(&entry, matrix[x][yp]); in_bounds = true; } if (bounds.contains(vtr::Point(x, yn))) { - assign_min_entry(entry, matrix[x][yn]); + assign_min_entry(&entry, matrix[x][yn]); in_bounds = true; } } @@ -457,7 +459,10 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_congestion = cost_entry.congestion; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; - VTR_ASSERT(std::isfinite(expected_cost) && expected_cost >= 0.f); + if (!std::isfinite(expected_cost) || expected_cost < 0.f) { + VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); + VTR_ASSERT(0); + } return expected_cost; } From 9c9437b4aea66f7fd5c59754611c647f9a9dc920 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 30 Oct 2019 18:18:10 -0700 Subject: [PATCH 14/41] changes suggested in PR Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 263 +++++++++++------- vpr/src/route/route_timing.h | 2 + 2 files changed, 159 insertions(+), 106 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index f00dddfb36e..824b8422179 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -40,26 +40,41 @@ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST // #define FILL_LIMIT 30 -/* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ +#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS + +// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE static constexpr int SAMPLE_GRID_SIZE = 3; + +// Stop Dijkstra expansion after reaching COST_LIMIT static constexpr float COST_LIMIT = std::numeric_limits::infinity(); + +// Number of entries in the routing cost cache static constexpr int DIJKSTRA_CACHE_SIZE = 64; + +// Only entries with a delta inside the window (+- DIJKSTRA_CACHE_WINDOW x/y) are cached static constexpr int DIJKSTRA_CACHE_WINDOW = 3; + +// Don't continue storing a path after hitting a lower-or-same cost entry. static constexpr bool BREAK_ON_MISS = false; + +// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; +// a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { - uint64_t order; + uint64_t order; // used to order sample points vtr::Point location; std::vector samples; SamplePoint() : location(0, 0) {} }; +// a grid of sample points struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; +// implements a simple cache of key(K)/value(V) pairs of N entries template class SimpleCache { public: @@ -67,12 +82,14 @@ class SimpleCache { : pos(0) , hits(0) , misses(0) {} + + // O(N) lookup bool get(K key, V* value) { for (int i = 0; i < N; i++) { auto& k = keys[i]; if (k == key) { auto& v = values[i]; -#if 0 +#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS) // preserve the found key by pushing it back int last = (pos + N - 1) % N; std::swap(k, keys[last]); @@ -86,14 +103,20 @@ class SimpleCache { misses++; return false; } + + // O(1) insertion (overwriting an older entry) void insert(K key, V val) { keys[pos] = key; values[pos] = val; pos = (pos + 1) % N; } + + // ratio of successful lookups float hit_ratio() { return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; } + + // ratio of unsuccessful lookups float miss_ratio() { return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; } @@ -246,7 +269,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, seg_box_bounds.width(), seg_box_bounds.height()); int max_fill = 0; - for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { util::Cost_Entry& cost_entry = matrix[ix][iy]; if (!cost_entry.valid()) { @@ -262,6 +285,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } } +#if !defined(FILL_LIMIT) + if (couldnt_fill) { + break; + } +#endif } if (!couldnt_fill) { @@ -445,13 +473,11 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, if (!cost_entry.valid()) { // there is no route -#if 0 - // useful for debugging but can be really noisy - VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", - from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), - (int)size_t(box_id), (int)box_location.first, (int)box_location.second); -#endif + VTR_LOGV_DEBUG(f_router_debug, + "Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + (int)size_t(box_id), (int)box_location.first, (int)box_location.second); return std::numeric_limits::infinity(); } @@ -466,6 +492,111 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, return expected_cost; } +// add a best cost routing path from start_node_ind to node_ind to routing costs +static void add_paths(int start_node_ind, + int node_ind, + std::unordered_map* paths, + RoutingCosts* routing_costs, + SimpleCache* cache) { + auto& device_ctx = g_vpr_ctx.device(); + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + // reconstruct the path + std::vector path; + for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) + ; + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + // recalculate the path with congestion + util::PQ_Entry current_full = parent_entry; + int parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, + current_full.R_upstream, current_full.congestion_upstream, false); + parent = *it; + } + + // add each node along the path subtracting the incremental costs from the current costs + parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + parent); + } + + vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), + ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); + RoutingCostKey key = { + seg_index, + box_id, + delta}; + CompressedRoutingCostKey compressed_key(key); + RoutingCost val = { + parent, + node_ind, + util::Cost_Entry( + current_full.delay - parent_entry.delay, + current_full.congestion_upstream - parent_entry.congestion_upstream)}; + + // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + + // use a cache for a small window around a delta of (0, 0) + float cost = 0.f; + bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; + if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { + // the sample was not cheaper than the cached sample + const auto& x = routing_costs->find(key); + VTR_ASSERT(x != routing_costs->end()); + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + } else { + const auto& x = routing_costs->find(key); + if (x != routing_costs->end()) { + if (x->second.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } else { + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + if (in_window) { + cache->insert(compressed_key, x->second.cost_entry.delay); + } + } + } else { + // this sample is new + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } + } + + // update parent data + parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent = *it; + } +} + /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ @@ -507,97 +638,7 @@ static float run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - bool found = device_ctx.connection_boxes.find_connection_box( - node_ind, &box_id, &box_location, &site_pin_delay); - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); - } - - // reconstruct the path - std::vector path; - for (int i = node_ind; i != start_node_ind; path.push_back(i = paths[i].parent)) - ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); - - // recalculate the path with congestion - util::PQ_Entry current_full = parent_entry; - int parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - current_full = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false); - parent = *it; - } - - // add each node along the path subtracting the incremental costs from the current costs - parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - parent); - } - - vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), - ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); - RoutingCostKey key = { - seg_index, - box_id, - delta}; - CompressedRoutingCostKey compressed_key(key); - RoutingCost val = { - parent, - node_ind, - util::Cost_Entry( - current_full.delay - parent_entry.delay, - current_full.congestion_upstream - parent_entry.congestion_upstream)}; - - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - float cost = 0.f; - bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; - if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { - // the sample was not cheaper than the cached sample - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - } else { - const auto& x = routing_costs->find(key); - if (x != routing_costs->end()) { - if (x->second.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - if (in_window) { - cache->insert(compressed_key, x->second.cost_entry.delay); - } - } - } else { - // this sample is new - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } - } - - parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); - parent = *it; - } + add_paths(start_node_ind, node_ind, &paths, routing_costs, cache); } else { expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; @@ -657,8 +698,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (const auto& point : sample_points) { #endif float max_cost = 0.f; + + // holds the cost entries for a run RoutingCosts costs; + + // a cache to avoid hammering the RoutingCosts map, since lookups will be dominated by a few keys + // must be consistent with `costs` i.e. any entry in the cache should also be in `costs` + // NOTE: this is used as a write-through cache, maybe try write-back SimpleCache cache; + for (auto node_ind : point.samples) { max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); } @@ -695,8 +743,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen cost_map_.set_cost_map(all_costs); // diagnostics -#if 0 - for (auto &cost : all_costs) { +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) + for (auto& cost : all_costs) { const auto& key = cost.first; const auto& val = cost.second; VTR_LOG("%d -> %d (%d, %d): %g, %g\n", @@ -706,7 +754,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } #endif -#if 1 +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS) for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); @@ -714,7 +762,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } #endif -#if 0 +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_EMPTY_MAPS) for (std::pair p : cost_map_.list_empty()) { int iseg, box_id; std::tie(iseg, box_id) = p; @@ -744,11 +792,14 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +// the smallest bounding box containing a node static vtr::Rect bounding_box_for_node(const t_rr_node& node) { return vtr::Rect(node.xlow(), node.ylow(), node.xhigh() + 1, node.yhigh() + 1); } +// the center point for a node +// it is unknown where the the node starts, so use the average static vtr::Point point_for_node(const t_rr_node& node) { int x = (node.xhigh() + node.xlow()) / 2; int y = (node.yhigh() + node.ylow()) / 2; diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index 5bc4653035a..3974f3c6405 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -13,6 +13,8 @@ #include "connection_router_interface.h" #include "heap_type.h" +extern bool f_router_debug; + int get_max_pins_per_net(); bool try_timing_driven_route(const t_router_opts& router_opts, From 8abd79fb91a85a3be3866b38b36106947f4d3a24 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 31 Oct 2019 18:19:10 -0700 Subject: [PATCH 15/41] add penalty to Cost_Map::find_cost for points outside of the map Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 54 +++++++++++-------- vpr/src/route/connection_box_lookahead_map.h | 5 +- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 824b8422179..d1ee992583d 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -140,12 +140,29 @@ static int manhattan_distance(const vtr::Point& a, const vtr::Point& b return abs(b.x() - a.x()) + abs(b.y() - a.y()); } +template +constexpr const T& clamp(const T& v, const T& lo, const T& hi) { + return std::min(std::max(v, lo), hi); +} + +template +static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Point& p) { + if (r.empty()) { + return vtr::Point(0, 0); + } else { + return vtr::Point(clamp(p.x(), r.xmin(), r.xmax() - 1), + clamp(p.y(), r.ymin(), r.ymax() - 1)); + } +} + // resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); + penalty_.clear(); cost_map_.resize({seg_count, box_count}); offset_.resize({seg_count, box_count}); + penalty_.resize({seg_count, box_count}); seg_count_ = seg_count; box_count_ = box_count; @@ -166,6 +183,11 @@ int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; } +static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, + entry.congestion); +} + // get a cost entry for a segment type, connection box type, and offset util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); @@ -174,29 +196,14 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return util::Cost_Entry(); } - int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; - int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - - if (dx < 0) { - dx = 0; - } - if (dy < 0) { - dy = 0; - } - - if (dx >= (ssize_t)cost_map.dim_size(0)) { - dx = cost_map.dim_size(0) - 1; - } - if (dy >= (ssize_t)cost_map.dim_size(1)) { - dy = cost_map.dim_size(1) - 1; - } - - return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; -} - -static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { - return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, - entry.congestion); + vtr::Point coord(delta_x - offset_[from_seg_index][size_t(box_id)].first, + delta_y - offset_[from_seg_index][size_t(box_id)].second); + vtr::Rect bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); + auto closest = closest_point_in_rect(bounds, coord); + auto cost = cost_map_[from_seg_index][size_t(box_id)][closest.x()][closest.y()]; + float penalty = penalty_[from_seg_index][size_t(box_id)]; + auto distance = manhattan_distance(closest, coord); + return penalize(cost, distance, penalty); } // set the cost map for a segment type and connection box type, filling holes @@ -263,6 +270,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + penalty_[seg][box] = delay_penalty; // find missing cost entries and fill them in by copying a nearby cost entry std::vector> missing; diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 3f9ed424924..9b5bdb0afea 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -96,8 +96,9 @@ class CostMap { std::vector> list_empty() const; private: - vtr::NdMatrix, 2> cost_map_; - vtr::NdMatrix, 2> offset_; + vtr::Matrix> cost_map_; + vtr::Matrix> offset_; + vtr::Matrix penalty_; std::vector segment_map_; size_t seg_count_; size_t box_count_; From e5ddec73a184778e4873d78360cc8258549b1281 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Fri, 1 Nov 2019 18:07:41 -0700 Subject: [PATCH 16/41] store penalties Signed-off-by: Dusty DeWeese --- libs/libvtrcapnproto/connection_map.capnp | 5 ++++ .../route/connection_box_lookahead_map.cpp | 25 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index 1ca672108c8..bc445a30e70 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -12,8 +12,13 @@ struct VprVector2D { y @1 :Int64; } +struct VprFloatEntry { + value @0 :Float32; +} + struct VprCostMap { costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); offset @1 :Matrix.Matrix(VprVector2D); segmentMap @2 :List(Int64); + penalty @3 :Matrix.Matrix(VprFloatEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d1ee992583d..fc852adc9ea 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -245,6 +245,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { // fill the holes for (size_t seg = 0; seg < seg_count_; seg++) { for (size_t box = 0; box < box_count_; box++) { + penalty_[seg][box] = std::numeric_limits::infinity(); const auto& seg_box_bounds = bounds[seg][box]; if (seg_box_bounds.empty()) { continue; @@ -252,7 +253,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { auto& matrix = cost_map_[seg][box]; // calculate delay penalty - float min_delay = 0.f, max_delay = 0.f; + float min_delay = std::numeric_limits::infinity(), max_delay = 0.f; vtr::Point min_location(0, 0), max_location(0, 0); for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { @@ -937,6 +938,16 @@ static void FromMatrixCostEntry( out, in, FromCostEntry); } +static void ToFloat(float* out, const VprFloatEntry::Reader& in) { + // Getting a scalar field is always "get()". + *out = in.getValue(); +} + +static void FromFloat(VprFloatEntry::Builder* out, const float& in) { + // Setting a scalar field is always "set(value)". + out->setValue(in); +} + void CostMap::read(const std::string& file) { MmapFile f(file); @@ -965,6 +976,12 @@ void CostMap::read(const std::string& file) { ToNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_map_, cost_maps, ToMatrixCostEntry); } + + { + const auto& penalty = cost_map.getPenalty(); + ToNdMatrix<2, VprFloatEntry, float>( + &penalty_, penalty, ToFloat); + } } void CostMap::write(const std::string& file) const { @@ -991,6 +1008,12 @@ void CostMap::write(const std::string& file) const { &cost_maps, cost_map_, FromMatrixCostEntry); } + { + auto penalty = cost_map.initPenalty(); + FromNdMatrix<2, VprFloatEntry, float>( + &penalty, penalty_, FromFloat); + } + writeMessageToFile(file, &builder); } #endif From f00073fd6500405f0133468762ce3810ffa8e950 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Mon, 4 Nov 2019 12:33:36 -0800 Subject: [PATCH 17/41] printf -> VTR_LOG Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fc852adc9ea..824a344fb7a 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -336,10 +336,10 @@ void CostMap::print(int iseg) const { box_id++) { auto& matrix = cost_map_[iseg][box_id]; if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { - printf("cost EMPTY for box_id = %lu\n", box_id); + VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); continue; } - printf("cost for box_id = %lu\n", box_id); + VTR_LOG("cost for box_id = %lu\n", box_id); double sum = 0.0; for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { @@ -354,18 +354,18 @@ void CostMap::print(int iseg) const { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { const auto& entry = matrix[ix][iy]; if (!entry.valid()) { - printf("*"); + VTR_LOG("*"); } else if (entry.delay * 4 > avg * 5) { - printf("O"); + VTR_LOG("O"); } else if (entry.delay > avg) { - printf("o"); + VTR_LOG("o"); } else if (entry.delay * 4 > avg * 3) { - printf("."); + VTR_LOG("."); } else { - printf(" "); + VTR_LOG(" "); } } - printf("\n"); + VTR_LOG("\n"); } } } From 56a28ceaab8586d208006a1a46df9ec07a699c02 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 12:01:55 -0800 Subject: [PATCH 18/41] change sampling method Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 121 ++++++++++++------ 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 824a344fb7a..410918a8b6e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -802,17 +802,48 @@ float ConnectionBoxMapLookahead::get_expected_cost( } // the smallest bounding box containing a node -static vtr::Rect bounding_box_for_node(const t_rr_node& node) { - return vtr::Rect(node.xlow(), node.ylow(), - node.xhigh() + 1, node.yhigh() + 1); +static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { + const std::pair* loc = connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + return vtr::Rect(); + } else { + return vtr::Rect(vtr::Point(loc->first, loc->second)); + } } -// the center point for a node -// it is unknown where the the node starts, so use the average -static vtr::Point point_for_node(const t_rr_node& node) { - int x = (node.xhigh() + node.xlow()) / 2; - int y = (node.yhigh() + node.ylow()) / 2; - return vtr::Point(x, y); +static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& bounding_box, int sx, int sy, int n) { + vtr::Rect window(sample(bounding_box, sx, sy, n), + sample(bounding_box, sx + 1, sy + 1, n)); + vtr::Point center = sample(window, 1, 1, 2); + vtr::Point sample_point = center; + int sample_distance = 0; + int sample_count = counts[sample_point.x()][sample_point.y()]; + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + vtr::Point here(x, y); + int count = counts[x][y]; + if (count < sample_count) continue; + int distance = manhattan_distance(center, here); + if (count > sample_count || (count == sample_count && distance < sample_distance)) { + sample_point = here; + sample_count = count; + sample_distance = distance; + } + } + } + return sample_point; +} + +// linear lookup, so consider something more sophisticated for large SAMPLE_GRID_SIZEs +static std::pair grid_lookup(const SampleGrid& grid, vtr::Point point) { + for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { + for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { + if (grid.point[sy][sx].location == point) { + return std::make_pair(sx, sy); + } + } + } + return std::make_pair(-1, -1); } // for each segment type, find the nearest nodes to an equally spaced grid of points @@ -821,6 +852,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; const int num_segments = inodes_for_segment->size(); + std::vector> segment_counts(num_segments); // compute bounding boxes for each segment type std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); @@ -832,10 +864,16 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(node)); + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); } - // select an inode near the center of the bounding box for each segment type + // initialize counts + for (int seg = 0; seg < num_segments; seg++) { + const auto& box = bounding_box_for_segment[seg]; + segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); + } + + // initialize the samples vector for each sample point inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); for (auto& grid : *inodes_for_segment) { @@ -846,46 +884,51 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se } } + // count sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; - vtr::Rect node_bounds = bounding_box_for_node(node); if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; + if (node.capacity() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + segment_counts[seg_index][loc->first][loc->second] += 1; VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); + } - auto& grid = (*inodes_for_segment)[seg_index]; - for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { - for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - auto& point = grid.point[sy][sx]; - if (point.samples.empty()) { - point.samples.push_back(i); - point.location = vtr::Point(node.xlow(), node.ylow()); - goto next_rr_node; - } - - if (node_bounds.contains(point.location)) { - point.samples.push_back(i); - goto next_rr_node; - } - - vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); - int distance_new = manhattan_distance(point_for_node(node), target); - int distance_stored = manhattan_distance(point.location, target); - if (distance_new < distance_stored) { - point.samples.clear(); - point.samples.push_back(i); - goto next_rr_node; - } + // select sample points + for (int i = 0; i < num_segments; i++) { + const auto& counts = segment_counts[i]; + const auto& bounding_box = bounding_box_for_segment[i]; + auto& grid = (*inodes_for_segment)[i]; + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + grid.point[y][x].location = choose_point(counts, bounding_box, x, y, SAMPLE_GRID_SIZE); } } + } - // to break out from the inner loop - next_rr_node: - continue; + // select an inode near the center of the bounding box for each segment type + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + auto& grid = (*inodes_for_segment)[seg_index]; + auto grid_loc = grid_lookup(grid, vtr::Point(loc->first, loc->second)); + if (grid_loc.first >= 0) { + grid.point[grid_loc.first][grid_loc.second].samples.push_back(i); + } } } From ff57a6635fd5526848fb3f0ac78c3490da9afa06 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 13:40:27 -0800 Subject: [PATCH 19/41] remove unnecessary lookups Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 410918a8b6e..57b1c691e35 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -565,18 +565,17 @@ static void add_paths(int start_node_ind, bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { // the sample was not cheaper than the cached sample - const auto& x = routing_costs->find(key); - VTR_ASSERT(x != routing_costs->end()); if (BREAK_ON_MISS) { // don't store the rest of the path break; } } else { - const auto& x = routing_costs->find(key); - if (x != routing_costs->end()) { - if (x->second.cost_entry.delay > val.cost_entry.delay) { + auto result = routing_costs->insert(std::make_pair(key, val)); + if (!result.second) { + auto& existing = result.first->second; + if (existing.cost_entry.delay > val.cost_entry.delay) { // this sample is cheaper - (*routing_costs)[key] = val; + existing = val; if (in_window) { cache->insert(compressed_key, val.cost_entry.delay); } @@ -587,7 +586,7 @@ static void add_paths(int start_node_ind, break; } if (in_window) { - cache->insert(compressed_key, x->second.cost_entry.delay); + cache->insert(compressed_key, existing.cost_entry.delay); } } } else { @@ -731,11 +730,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (const auto& cost : costs) { const auto& key = cost.first; const auto& val = cost.second; - const auto& x = all_costs.find(key); + const auto& result = all_costs.insert(std::make_pair(key, val)); + auto& existing = result.first->second; // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs.end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[key] = val; + if (!result.second || existing.cost_entry.delay > val.cost_entry.delay) { + existing = val; } } From afc36aaf62d4f9b955c56d734df902b561fc889b Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 17:39:47 -0800 Subject: [PATCH 20/41] remove SimpleCache, use vtr::hash_combine, add timing stats Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 164 +++++------------- vpr/src/route/connection_box_lookahead_map.h | 37 +--- 2 files changed, 52 insertions(+), 149 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 57b1c691e35..35e3b224844 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -48,14 +48,8 @@ static constexpr int SAMPLE_GRID_SIZE = 3; // Stop Dijkstra expansion after reaching COST_LIMIT static constexpr float COST_LIMIT = std::numeric_limits::infinity(); -// Number of entries in the routing cost cache -static constexpr int DIJKSTRA_CACHE_SIZE = 64; - -// Only entries with a delta inside the window (+- DIJKSTRA_CACHE_WINDOW x/y) are cached -static constexpr int DIJKSTRA_CACHE_WINDOW = 3; - // Don't continue storing a path after hitting a lower-or-same cost entry. -static constexpr bool BREAK_ON_MISS = false; +static constexpr bool BREAK_ON_MISS = true; // Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; @@ -74,65 +68,9 @@ struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; -// implements a simple cache of key(K)/value(V) pairs of N entries -template -class SimpleCache { - public: - SimpleCache() - : pos(0) - , hits(0) - , misses(0) {} - - // O(N) lookup - bool get(K key, V* value) { - for (int i = 0; i < N; i++) { - auto& k = keys[i]; - if (k == key) { - auto& v = values[i]; -#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS) - // preserve the found key by pushing it back - int last = (pos + N - 1) % N; - std::swap(k, keys[last]); - std::swap(v, values[last]); -#endif - *value = v; - hits++; - return true; - } - } - misses++; - return false; - } - - // O(1) insertion (overwriting an older entry) - void insert(K key, V val) { - keys[pos] = key; - values[pos] = val; - pos = (pos + 1) % N; - } - - // ratio of successful lookups - float hit_ratio() { - return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; - } - - // ratio of unsuccessful lookups - float miss_ratio() { - return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; - } - - private: - std::array keys; // keep keys together for faster scanning - std::array values; - size_t pos; - uint64_t hits; - uint64_t misses; -}; - -static float run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - SimpleCache* cache, - float max_cost); +static std::pair run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + float max_cost); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); // also known as the L1 norm @@ -505,8 +443,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, static void add_paths(int start_node_ind, int node_ind, std::unordered_map* paths, - RoutingCosts* routing_costs, - SimpleCache* cache) { + RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; std::pair box_location; @@ -550,7 +487,6 @@ static void add_paths(int start_node_ind, seg_index, box_id, delta}; - CompressedRoutingCostKey compressed_key(key); RoutingCost val = { parent, node_ind, @@ -560,42 +496,22 @@ static void add_paths(int start_node_ind, // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - // use a cache for a small window around a delta of (0, 0) - float cost = 0.f; - bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; - if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { - // the sample was not cheaper than the cached sample - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - } else { - auto result = routing_costs->insert(std::make_pair(key, val)); - if (!result.second) { - auto& existing = result.first->second; - if (existing.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - existing = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - if (in_window) { - cache->insert(compressed_key, existing.cost_entry.delay); - } - } + auto result = routing_costs->insert(std::make_pair(key, val)); + if (!result.second) { + auto& existing = result.first->second; + if (existing.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + existing = val; } else { - // this sample is new - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; } } + } else { + // this sample is new + (*routing_costs)[key] = val; } // update parent data @@ -608,11 +524,11 @@ static void add_paths(int start_node_ind, /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ -static float run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - SimpleCache* cache, - float cost_limit) { +static std::pair run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + float cost_limit) { auto& device_ctx = g_vpr_ctx.device(); + int path_count = 0; /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ @@ -646,7 +562,8 @@ static float run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { - add_paths(start_node_ind, node_ind, &paths, routing_costs, cache); + path_count++; + add_paths(start_node_ind, node_ind, &paths, routing_costs); } else { expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; @@ -657,7 +574,17 @@ static float run_dijkstra(int start_node_ind, break; } } - return max_cost; + return std::make_pair(max_cost, path_count); +} + +static uint64_t interleave(uint32_t x) { + uint64_t i = x; + i = (i ^ (i << 16)) & 0x0000ffff0000ffff; + i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; + i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; + i = (i ^ (i << 2)) & 0x3333333333333333; + i = (i ^ (i << 1)) & 0x5555555555555555; + return i; } // compute the cost maps for lookahead @@ -676,7 +603,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { auto& point = grid.point[y][x]; if (!point.samples.empty()) { - point.order = point.samples[0]; + point.order = interleave(point.location.x()) | (interleave(point.location.y()) << 1); sample_points.push_back(point); } } @@ -710,21 +637,23 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen // holds the cost entries for a run RoutingCosts costs; - // a cache to avoid hammering the RoutingCosts map, since lookups will be dominated by a few keys - // must be consistent with `costs` i.e. any entry in the cache should also be in `costs` - // NOTE: this is used as a write-through cache, maybe try write-back - SimpleCache cache; - + vtr::Timer run_timer; + int path_count = 0; for (auto node_ind : point.samples) { - max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); + auto result = run_dijkstra(node_ind, &costs, COST_LIMIT); + max_cost = std::max(max_cost, result.first); + path_count += result.second; } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - VTR_LOG("Expanded sample point (%d, %d) %e miss %g\n", - point.location.x(), point.location.y(), max_cost, cache.miss_ratio()); + VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e (%g paths/sec)\n", + path_count, + point.location.x(), point.location.y(), + max_cost, + path_count / run_timer.elapsed_sec()); // combine the cost map from this run with the final cost maps for each segment for (const auto& cost : costs) { @@ -903,6 +832,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (int i = 0; i < num_segments; i++) { const auto& counts = segment_counts[i]; const auto& bounding_box = bounding_box_for_segment[i]; + if (bounding_box.empty()) continue; auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 9b5bdb0afea..b04c8131800 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -32,29 +32,6 @@ struct RoutingCostKey { } }; -// compressed version of RoutingCostKey -// TODO add bounds checks -struct CompressedRoutingCostKey { - uint32_t data; - - CompressedRoutingCostKey() { - data = -1; - } - CompressedRoutingCostKey(const RoutingCostKey& key) { - data = key.seg_index & 0xff; - data <<= 8; - data |= size_t(key.box_id) & 0xff; - data <<= 8; - data |= key.delta.x() & 0xff; - data <<= 8; - data |= key.delta.y() & 0xff; - } - - bool operator==(CompressedRoutingCostKey other) const { - return data == other.data; - } -}; - // Data in the RoutingCosts map struct RoutingCost { // source and destination node indices @@ -67,15 +44,11 @@ struct RoutingCost { // hash implementation for RoutingCostKey struct HashRoutingCostKey { std::size_t operator()(RoutingCostKey const& key) const noexcept { - uint64_t data; - data = key.seg_index & 0xffff; - data <<= 16; - data |= size_t(key.box_id) & 0xffff; - data <<= 16; - data |= key.delta.x() & 0xffff; - data <<= 16; - data |= key.delta.y() & 0xffff; - return std::hash{}(data); + std::size_t hash = std::hash{}(key.seg_index); + vtr::hash_combine(hash, key.box_id); + vtr::hash_combine(hash, key.delta.x()); + vtr::hash_combine(hash, key.delta.y()); + return hash; } }; From 2446c03f1e41a1113f3e3ad3befc773d3fc0eab6 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Fri, 8 Nov 2019 12:12:06 -0800 Subject: [PATCH 21/41] integrate lighost's patches Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box.cpp | 1 + vpr/src/route/connection_box.h | 6 ++-- .../route/connection_box_lookahead_map.cpp | 34 +++++++++++++++---- vpr/src/route/router_lookahead_map_utils.cpp | 17 +++------- vpr/src/route/router_lookahead_map_utils.h | 2 +- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp index 1bee129f0d6..181fe171989 100644 --- a/vpr/src/route/connection_box.cpp +++ b/vpr/src/route/connection_box.cpp @@ -33,6 +33,7 @@ bool ConnectionBoxes::find_connection_box(int inode, float* site_pin_delay) const { VTR_ASSERT(box_id != nullptr); VTR_ASSERT(box_location != nullptr); + VTR_ASSERT(site_pin_delay != nullptr); if (inode >= (ssize_t)ipin_map_.size()) { return false; diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h index 25fd0ea422c..1757c6c726b 100644 --- a/vpr/src/route/connection_box.h +++ b/vpr/src/route/connection_box.h @@ -25,12 +25,12 @@ struct ConnBoxLoc { float a_site_pin_delay, ConnectionBoxId a_box_id) : box_location(a_box_location) - , box_id(a_box_id) - , site_pin_delay(a_site_pin_delay) {} + , site_pin_delay(a_site_pin_delay) + , box_id(a_box_id) {} std::pair box_location; - ConnectionBoxId box_id; float site_pin_delay; + ConnectionBoxId box_id; }; struct SinkToIpin { diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 35e3b224844..84ac3ffd71f 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -277,7 +277,7 @@ void CostMap::print(int iseg) const { VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); continue; } - VTR_LOG("cost for box_id = %lu\n", box_id); + VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1)); double sum = 0.0; for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { @@ -431,7 +431,16 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; + expected_delay += site_pin_delay; + expected_congestion += site_pin_delay; + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + + VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); + VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); + VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); + if (!std::isfinite(expected_cost) || expected_cost < 0.f) { VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); VTR_ASSERT(0); @@ -458,15 +467,21 @@ static void add_paths(int start_node_ind, std::vector path; for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f); // recalculate the path with congestion util::PQ_Entry current_full = parent_entry; int parent = start_node_ind; for (auto it = path.rbegin(); it != path.rend(); it++) { auto& parent_node = device_ctx.rr_nodes[parent]; + float Tsw_adjust = 0.f; + + // Remove site pin delay when taking edge from last channel to IPIN. + if (*it == node_ind) { + Tsw_adjust = -site_pin_delay; + } current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false); + current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust); parent = *it; } @@ -487,12 +502,17 @@ static void add_paths(int start_node_ind, seg_index, box_id, delta}; + + float new_delay = current_full.delay - parent_entry.delay; + float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream; + + VTR_ASSERT(new_delay >= 0.f); + VTR_ASSERT(new_congestion >= 0.f); + RoutingCost val = { parent, node_ind, - util::Cost_Entry( - current_full.delay - parent_entry.delay, - current_full.congestion_upstream - parent_entry.congestion_upstream)}; + util::Cost_Entry(new_delay, new_congestion)}; // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST @@ -516,7 +536,7 @@ static void add_paths(int start_node_ind, // update parent data parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f); parent = *it; } } diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 36667cb3a29..a1033d334bd 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -13,7 +13,8 @@ util::PQ_Entry::PQ_Entry( float parent_delay, float parent_R_upstream, float parent_congestion_upstream, - bool starting_node) { + bool starting_node, + float Tsw_adjust) { this->rr_node_ind = set_rr_node_ind; auto& device_ctx = g_vpr_ctx.device(); @@ -24,6 +25,8 @@ util::PQ_Entry::PQ_Entry( int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; + Tsw += Tsw_adjust; + VTR_ASSERT(Tsw >= 0.f); float Rsw = device_ctx.rr_switch_inf[switch_ind].R; float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); @@ -32,21 +35,11 @@ util::PQ_Entry::PQ_Entry( float T_quadratic = 0.f; if (device_ctx.rr_switch_inf[switch_ind].buffered()) { T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; - T_quadratic = 0.; } else { /* Pass transistor */ T_linear = Tsw + 0.5 * Rsw * Cnode; - T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; } - float base_cost; - if (device_ctx.rr_indexed_data[cost_index].inv_length < 0) { - base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; - } else { - float frac_num_seg = CLB_DIST * device_ctx.rr_indexed_data[cost_index].inv_length; - - base_cost = frac_num_seg * T_linear - + frac_num_seg * frac_num_seg * T_quadratic; - } + float base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; VTR_ASSERT(T_linear >= 0.); VTR_ASSERT(base_cost >= 0.); diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 5745a2c9da7..53eb176855a 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -129,7 +129,7 @@ class PQ_Entry { float R_upstream; float congestion_upstream; - PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node); + PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node, float Tsw_adjust); bool operator<(const PQ_Entry& obj) const { /* inserted into max priority queue so want queue entries with a lower cost to be greater */ From d654bd47a3cefb90dd7eabe977ad035befcd27bd Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Mon, 11 Nov 2019 11:22:23 -0800 Subject: [PATCH 22/41] independently minimize delay and base cost (congestion) --- vpr/src/route/connection_box_lookahead_map.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 84ac3ffd71f..a719f1f243d 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -323,6 +323,8 @@ std::vector> CostMap::list_empty() const { static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { if (src.delay < dst->delay) { dst->delay = src.delay; + } + if (src.congestion < dst->congestion) { dst->congestion = src.congestion; } } From 24eeb80c6c971b806cf5048a150641ec429cabbf Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 14 Nov 2019 15:13:48 -0800 Subject: [PATCH 23/41] Expand delay and base cost independently Expand base_cost too. The delay and base cost matricies are independent, so should be expanded and filled independently. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 231 +++++++++--------- vpr/src/route/connection_box_lookahead_map.h | 4 +- vpr/src/route/router_lookahead_map_utils.cpp | 90 ++++--- vpr/src/route/router_lookahead_map_utils.h | 55 ++++- 4 files changed, 225 insertions(+), 155 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a719f1f243d..a65bb04d2d0 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -38,16 +38,12 @@ * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST -// #define FILL_LIMIT 30 #define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS // Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE static constexpr int SAMPLE_GRID_SIZE = 3; -// Stop Dijkstra expansion after reaching COST_LIMIT -static constexpr float COST_LIMIT = std::numeric_limits::infinity(); - // Don't continue storing a path after hitting a lower-or-same cost entry. static constexpr bool BREAK_ON_MISS = true; @@ -68,9 +64,10 @@ struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; +template static std::pair run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - float max_cost); + RoutingCosts* routing_costs); + static void find_inodes_for_segment_types(std::vector* inodes_for_segment); // also known as the L1 norm @@ -145,10 +142,13 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(const RoutingCosts& costs) { +void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { // calculate the bounding boxes vtr::Matrix> bounds({seg_count_, box_count_}); - for (const auto& entry : costs) { + for (const auto& entry : delay_costs) { + bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + for (const auto& entry : base_costs) { bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); } @@ -171,13 +171,21 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } // store entries into the matrices - for (const auto& entry : costs) { + for (const auto& entry : delay_costs) { int seg = entry.first.seg_index; int box = size_t(entry.first.box_id); const auto& seg_box_bounds = bounds[seg][box]; int x = entry.first.delta.x() - seg_box_bounds.xmin(); int y = entry.first.delta.y() - seg_box_bounds.ymin(); - cost_map_[seg][box][x][y] = entry.second.cost_entry; + cost_map_[seg][box][x][y].delay = entry.second; + } + for (const auto& entry : base_costs) { + int seg = entry.first.seg_index; + int box = size_t(entry.first.box_id); + const auto& seg_box_bounds = bounds[seg][box]; + int x = entry.first.delta.x() - seg_box_bounds.xmin(); + int y = entry.first.delta.y() - seg_box_bounds.ymin(); + cost_map_[seg][box][x][y].congestion = entry.second; } // fill the holes @@ -232,11 +240,10 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } } -#if !defined(FILL_LIMIT) if (couldnt_fill) { + // give up trying to fill an empty matrix break; } -#endif } if (!couldnt_fill) { @@ -251,13 +258,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { if (couldnt_fill) { VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", seg, box, seg_box_bounds.width(), seg_box_bounds.height()); -#if !defined(FILL_LIMIT) for (unsigned y = 0; y < matrix.dim_size(1); y++) { for (unsigned x = 0; x < matrix.dim_size(0); x++) { VTR_ASSERT(!matrix[x][y].valid()); } } -#endif } } } @@ -335,35 +340,39 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat int cy, const vtr::Rect& bounds) { // spiral around (cx, cy) looking for a nearby entry - int n = 1; - bool in_bounds; - util::Cost_Entry entry; + bool in_bounds = bounds.contains(vtr::Point(cx, cy)); + if (!in_bounds) { + return std::make_pair(util::Cost_Entry(), 0); + } + int n = 0; + util::Cost_Entry fill(matrix[cx][cy]); - do { + while (in_bounds && !fill.valid()) { + n++; in_bounds = false; + util::Cost_Entry min_entry; for (int ox = -n; ox <= n; ox++) { int x = cx + ox; int oy = n - abs(ox); int yp = cy + oy; int yn = cy - oy; if (bounds.contains(vtr::Point(x, yp))) { - assign_min_entry(&entry, matrix[x][yp]); + assign_min_entry(&min_entry, matrix[x][yp]); in_bounds = true; } if (bounds.contains(vtr::Point(x, yn))) { - assign_min_entry(&entry, matrix[x][yn]); + assign_min_entry(&min_entry, matrix[x][yn]); in_bounds = true; } + if (!std::isfinite(fill.delay)) { + fill.delay = min_entry.delay; + } + if (!std::isfinite(fill.congestion)) { + fill.congestion = min_entry.congestion; + } } - if (entry.valid()) return std::make_pair(entry, n); - n++; -#if defined(FILL_LIMIT) - if (n > FILL_LIMIT) { - break; - } -#endif - } while (in_bounds); - return std::make_pair(util::Cost_Entry(), n); + } + return std::make_pair(fill, n); } // derive a cost from the map between two nodes @@ -376,8 +385,6 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, auto& device_ctx = g_vpr_ctx.device(); - std::pair from_location; - std::pair to_location; auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); if (to_node_type == SINK) { @@ -447,18 +454,21 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); VTR_ASSERT(0); } + return expected_cost; } // add a best cost routing path from start_node_ind to node_ind to routing costs +template static void add_paths(int start_node_ind, - int node_ind, + Entry current, std::unordered_map* paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; std::pair box_location; float site_pin_delay; + int node_ind = current.rr_node_ind; bool found = device_ctx.connection_boxes.find_connection_box( node_ind, &box_id, &box_location, &site_pin_delay); if (!found) { @@ -467,32 +477,20 @@ static void add_paths(int start_node_ind, // reconstruct the path std::vector path; - for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) - ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f); - - // recalculate the path with congestion - util::PQ_Entry current_full = parent_entry; - int parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - float Tsw_adjust = 0.f; - - // Remove site pin delay when taking edge from last channel to IPIN. - if (*it == node_ind) { - Tsw_adjust = -site_pin_delay; - } - current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust); - parent = *it; + for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) { + path.push_back(i); } + path.push_back(start_node_ind); + + current.adjust_Tsw(-site_pin_delay); // add each node along the path subtracting the incremental costs from the current costs - parent = start_node_ind; + Entry start_to_here(start_node_ind, UNDEFINED, nullptr); + int parent = start_node_ind; for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + auto& here = device_ctx.rr_nodes[*it]; + int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); if (from_canonical_loc == nullptr) { VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", parent); @@ -505,50 +503,37 @@ static void add_paths(int start_node_ind, box_id, delta}; - float new_delay = current_full.delay - parent_entry.delay; - float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream; + if (*it != start_node_ind) { + auto& parent_node = device_ctx.rr_nodes[parent]; + start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here); + parent = *it; + } - VTR_ASSERT(new_delay >= 0.f); - VTR_ASSERT(new_congestion >= 0.f); + float cost = current.cost() - start_to_here.cost(); + if (cost < 0.f && cost > -1e-15 /* 1 femtosecond */) { + cost = 0.f; + } - RoutingCost val = { - parent, - node_ind, - util::Cost_Entry(new_delay, new_congestion)}; + VTR_ASSERT(cost >= 0.f); // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - - auto result = routing_costs->insert(std::make_pair(key, val)); + auto result = routing_costs->insert(std::make_pair(key, cost)); if (!result.second) { - auto& existing = result.first->second; - if (existing.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - existing = val; - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } + if (cost < result.first->second) { + result.first->second = cost; + } else if (BREAK_ON_MISS) { + break; } - } else { - // this sample is new - (*routing_costs)[key] = val; } - - // update parent data - parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f); - parent = *it; } } /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ +template static std::pair run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - float cost_limit) { + RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; @@ -561,12 +546,11 @@ static std::pair run_dijkstra(int start_node_ind, * Also store the parent node so we can reconstruct a specific path. */ std::unordered_map paths; /* a priority queue for expansion */ - std::priority_queue, std::greater> pq; + std::priority_queue, std::greater> pq; /* first entry has no upstream delay or congestion */ - util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); - - float max_cost = 0.f; + Entry first_entry(start_node_ind, UNDEFINED, nullptr); + float max_cost = first_entry.cost(); pq.push(first_entry); @@ -577,6 +561,9 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; + // the last cost should be the highest + max_cost = current.cost(); + /* check that we haven't already expanded from this node */ if (node_expanded[node_ind]) { continue; @@ -585,16 +572,12 @@ static std::pair run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { path_count++; - add_paths(start_node_ind, node_ind, &paths, routing_costs); + add_paths(start_node_ind, current, &paths, routing_costs); } else { - expand_dijkstra_neighbours(current, paths, node_expanded, pq); + util::expand_dijkstra_neighbours(device_ctx.rr_nodes, + current, paths, node_expanded, pq); node_expanded[node_ind] = true; } - - max_cost = std::max(max_cost, current.delay_cost); - if (max_cost > cost_limit) { - break; - } } return std::make_pair(max_cost, path_count); } @@ -645,7 +628,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen device_ctx.connection_boxes.num_connection_box_types()); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); - RoutingCosts all_costs; + RoutingCosts all_delay_costs; + RoutingCosts all_base_costs; /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) @@ -654,39 +638,58 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #else for (const auto& point : sample_points) { #endif - float max_cost = 0.f; - // holds the cost entries for a run - RoutingCosts costs; + RoutingCosts delay_costs; + RoutingCosts base_costs; + // statistics vtr::Timer run_timer; int path_count = 0; + float max_delay_cost = 0.f; + float max_base_cost = 0.f; + for (auto node_ind : point.samples) { - auto result = run_dijkstra(node_ind, &costs, COST_LIMIT); - max_cost = std::max(max_cost, result.first); - path_count += result.second; + { + auto result = run_dijkstra(node_ind, &delay_costs); + max_delay_cost = std::max(max_delay_cost, result.first); + path_count += result.second; + } + { + auto result = run_dijkstra(node_ind, &base_costs); + max_base_cost = std::max(max_base_cost, result.first); + path_count += result.second; + } } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - - VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e (%g paths/sec)\n", + /* + * for (auto node_ind : point.samples) { + * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); + * } + */ + VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e %e (%g paths/sec)\n", path_count, point.location.x(), point.location.y(), - max_cost, + max_delay_cost, max_base_cost, path_count / run_timer.elapsed_sec()); // combine the cost map from this run with the final cost maps for each segment - for (const auto& cost : costs) { - const auto& key = cost.first; + for (const auto& cost : delay_costs) { const auto& val = cost.second; - const auto& result = all_costs.insert(std::make_pair(key, val)); - auto& existing = result.first->second; - - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (!result.second || existing.cost_entry.delay > val.cost_entry.delay) { - existing = val; + auto result = all_delay_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); + } + } + for (const auto& cost : base_costs) { + const auto& val = cost.second; + auto result = all_base_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); } } @@ -700,7 +703,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen VTR_LOG("Combining results\n"); /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(all_costs); + cost_map_.set_cost_map(all_delay_costs, all_base_costs); // diagnostics #if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index b04c8131800..810dec59135 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -53,7 +53,7 @@ struct HashRoutingCostKey { }; // Map used to store intermediate routing costs -typedef std::unordered_map RoutingCosts; +typedef std::unordered_map RoutingCosts; // Dense cost maps per source segment and destination connection box types class CostMap { @@ -61,7 +61,7 @@ class CostMap { void set_counts(size_t seg_count, size_t box_count); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; - void set_cost_map(const RoutingCosts& costs); + void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index a1033d334bd..e53b265b916 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -4,10 +4,12 @@ #include "vpr_context.h" #include "vtr_math.h" +namespace util { + /* Number of CLBs I think the average conn. goes. */ static const int CLB_DIST = 3; -util::PQ_Entry::PQ_Entry( +PQ_Entry::PQ_Entry( int set_rr_node_ind, int switch_ind, float parent_delay, @@ -32,7 +34,6 @@ util::PQ_Entry::PQ_Entry( float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); float T_linear = 0.f; - float T_quadratic = 0.f; if (device_ctx.rr_switch_inf[switch_ind].buffered()) { T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; } else { /* Pass transistor */ @@ -52,16 +53,14 @@ util::PQ_Entry::PQ_Entry( this->cost = this->delay; } -util::PQ_Entry_Lite::PQ_Entry_Lite( +util::PQ_Entry_Delay::PQ_Entry_Delay( int set_rr_node_ind, int switch_ind, - float parent_delay, - bool starting_node) { + const util::PQ_Entry_Delay* parent) { this->rr_node_ind = set_rr_node_ind; - auto& device_ctx = g_vpr_ctx.device(); - this->delay_cost = parent_delay; - if (!starting_node) { + if (parent != nullptr) { + auto& device_ctx = g_vpr_ctx.device(); float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; float Rsw = device_ctx.rr_switch_inf[switch_ind].R; float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); @@ -75,7 +74,24 @@ util::PQ_Entry_Lite::PQ_Entry_Lite( } VTR_ASSERT(T_linear >= 0.); - this->delay_cost += T_linear; + this->delay_cost = parent->delay_cost + T_linear; + } else { + this->delay_cost = 0.f; + } +} + +util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( + int set_rr_node_ind, + int switch_ind, + const util::PQ_Entry_Base_Cost* parent) { + this->rr_node_ind = set_rr_node_ind; + + if (parent != nullptr) { + auto& device_ctx = g_vpr_ctx.device(); + int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); + this->base_cost = parent->base_cost + device_ctx.rr_indexed_data[cost_index].base_cost; + } else { + this->base_cost = 0.f; } } @@ -175,18 +191,17 @@ util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const { return representative_entry; } -/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ -void expand_dijkstra_neighbours(util::PQ_Entry_Lite parent_entry, +template +void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, + const Entry& parent_entry, std::unordered_map& paths, std::vector& node_expanded, - std::priority_queue, - std::greater>& pq) { - auto& device_ctx = g_vpr_ctx.device(); - + std::priority_queue, + std::greater>& pq) { int parent_ind = parent_entry.rr_node_ind; - auto& parent_node = device_ctx.rr_nodes[parent_ind]; + auto& parent_node = rr_nodes[parent_ind]; for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { int child_node_ind = parent_node.edge_sink_node(iedge); @@ -197,18 +212,37 @@ void expand_dijkstra_neighbours(util::PQ_Entry_Lite parent_entry, continue; } - util::PQ_Entry_Lite child_entry(child_node_ind, switch_ind, parent_entry.delay_cost, false); - - VTR_ASSERT(child_entry.delay_cost >= 0); + Entry child_entry(child_node_ind, switch_ind, &parent_entry); + VTR_ASSERT(child_entry.cost() >= 0); + pq.push(child_entry); - /* skip this child if it has been visited with smaller or the same cost */ - auto stored_cost = paths.find(child_node_ind); - if (stored_cost != paths.end() && stored_cost->second.cost <= child_entry.delay_cost) { - continue; + /* Create (if it doesn't exist) or update (if the new cost is lower) + * to specified node */ + Search_Path path_entry = {child_entry.cost(), parent_ind, iedge}; + auto result = paths.insert(std::make_pair( + child_node_ind, + path_entry)); + if (!result.second) { + if (child_entry.cost() < result.first->second.cost) { + result.first->second = path_entry; + } } - - /* finally, record the cost with which the child was visited and put the child entry on the queue */ - paths[child_node_ind] = {child_entry.delay_cost, parent_ind, iedge}; - pq.push(child_entry); } } + +template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, + const PQ_Entry_Delay& parent_entry, + std::unordered_map& paths, + std::vector& node_expanded, + std::priority_queue, + std::greater>& pq); +template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, + const PQ_Entry_Base_Cost& parent_entry, + std::unordered_map& paths, + std::vector& node_expanded, + std::priority_queue, + std::greater>& pq); + +} // namespace util diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 53eb176855a..2bc0cfef630 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -21,6 +21,7 @@ #include #include #include "vpr_types.h" +#include "rr_node.h" namespace util { @@ -137,32 +138,64 @@ class PQ_Entry { } }; -// A version of PQ_Entry that only calculates and stores the delay (cost.) -class PQ_Entry_Lite { +// A version of PQ_Entry that only calculates and stores the delay. +class PQ_Entry_Delay { public: int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents float delay_cost; //the cost of the path to get to this node - PQ_Entry_Lite(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, bool starting_node); + PQ_Entry_Delay(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Delay* parent); - bool operator>(const PQ_Entry_Lite& obj) const { + float cost() const { + return delay_cost; + } + + void adjust_Tsw(float amount) { + delay_cost += amount; + } + + bool operator>(const PQ_Entry_Delay& obj) const { return (this->delay_cost > obj.delay_cost); } }; +// A version of PQ_Entry that only calculates and stores the base cost. +class PQ_Entry_Base_Cost { + public: + int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents + float base_cost; + + PQ_Entry_Base_Cost(int set_rr_node_ind, int /*switch_ind*/, const PQ_Entry_Base_Cost* parent); + + float cost() const { + return base_cost; + } + + void adjust_Tsw(float amount) { + // do nothing + } + + bool operator>(const PQ_Entry_Base_Cost& obj) const { + return (this->base_cost > obj.base_cost); + } +}; + struct Search_Path { float cost; int parent; int edge; }; -} // namespace util - -void expand_dijkstra_neighbours(util::PQ_Entry_Lite parent_entry, - std::unordered_map& paths, +/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ +template +void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, + const Entry& parent_entry, + std::unordered_map& paths, std::vector& node_expanded, - std::priority_queue, - std::greater>& pq); + std::priority_queue, + std::greater>& pq); + +} // namespace util #endif From 578d08475db176219929959f6552be9f2670b7aa Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 14 Nov 2019 15:16:26 -0800 Subject: [PATCH 24/41] site_pin_delay shouldn't affect congestion Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a65bb04d2d0..e360f7791bd 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -441,7 +441,6 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_congestion = cost_entry.congestion; expected_delay += site_pin_delay; - expected_congestion += site_pin_delay; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; From bb9943e7e6a474260249587e1dad334e720ea27c Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Mon, 18 Nov 2019 18:29:55 -0800 Subject: [PATCH 25/41] use histogram to choose sample sizes, set minimum penalty Signed-off-by: Dustin DeWeese --- .../route/connection_box_lookahead_map.cpp | 137 +++++++++++++----- vpr/src/route/router_lookahead_map_utils.h | 2 +- 2 files changed, 100 insertions(+), 39 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index e360f7791bd..d770eaa2d5b 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -45,16 +45,18 @@ static constexpr int SAMPLE_GRID_SIZE = 3; // Don't continue storing a path after hitting a lower-or-same cost entry. -static constexpr bool BREAK_ON_MISS = true; +static constexpr bool BREAK_ON_MISS = false; // Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; +static constexpr float PENALTY_MIN = 1e-12f; // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { uint64_t order; // used to order sample points vtr::Point location; std::vector samples; + int segment_type; SamplePoint() : location(0, 0) {} }; @@ -119,6 +121,7 @@ int CostMap::node_to_segment(int from_node_ind) const { } static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + penalty = std::max(penalty, PENALTY_MIN); return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, entry.congestion); } @@ -364,12 +367,12 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat assign_min_entry(&min_entry, matrix[x][yn]); in_bounds = true; } - if (!std::isfinite(fill.delay)) { - fill.delay = min_entry.delay; - } - if (!std::isfinite(fill.congestion)) { - fill.congestion = min_entry.congestion; - } + } + if (!std::isfinite(fill.delay)) { + fill.delay = min_entry.delay; + } + if (!std::isfinite(fill.congestion)) { + fill.congestion = min_entry.congestion; } } return std::make_pair(fill, n); @@ -459,7 +462,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, // add a best cost routing path from start_node_ind to node_ind to routing costs template -static void add_paths(int start_node_ind, +static bool add_paths(int start_node_ind, Entry current, std::unordered_map* paths, RoutingCosts* routing_costs) { @@ -470,6 +473,7 @@ static void add_paths(int start_node_ind, int node_ind = current.rr_node_ind; bool found = device_ctx.connection_boxes.find_connection_box( node_ind, &box_id, &box_location, &site_pin_delay); + bool new_sample_found = false; if (!found) { VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); } @@ -491,8 +495,7 @@ static void add_paths(int start_node_ind, int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - parent); + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", *it); } vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), @@ -509,7 +512,7 @@ static void add_paths(int start_node_ind, } float cost = current.cost() - start_to_here.cost(); - if (cost < 0.f && cost > -1e-15 /* 1 femtosecond */) { + if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) { cost = 0.f; } @@ -520,11 +523,15 @@ static void add_paths(int start_node_ind, if (!result.second) { if (cost < result.first->second) { result.first->second = cost; + new_sample_found = true; } else if (BREAK_ON_MISS) { break; } + } else { + new_sample_found = true; } } + return new_sample_found; } /* runs Dijkstra's algorithm from specified node until all nodes have been @@ -535,6 +542,11 @@ static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; + const std::pair* start_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (start_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ @@ -646,7 +658,6 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen int path_count = 0; float max_delay_cost = 0.f; float max_base_cost = 0.f; - for (auto node_ind : point.samples) { { auto result = run_dijkstra(node_ind, &delay_costs); @@ -663,14 +674,10 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - /* - * for (auto node_ind : point.samples) { - * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); - * } - */ - VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e %e (%g paths/sec)\n", - path_count, + VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", + path_count, segment_inf[point.segment_type].name.c_str(), point.segment_type, point.location.x(), point.location.y(), + (int)point.samples.size(), max_delay_cost, max_base_cost, path_count / run_timer.elapsed_sec()); @@ -764,25 +771,29 @@ static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_bo } } -static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& bounding_box, int sx, int sy, int n) { - vtr::Rect window(sample(bounding_box, sx, sy, n), +static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { + return vtr::Rect(sample(bounding_box, sx, sy, n), sample(bounding_box, sx + 1, sy + 1, n)); +} + +static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& window, int with_count) { vtr::Point center = sample(window, 1, 1, 2); vtr::Point sample_point = center; - int sample_distance = 0; - int sample_count = counts[sample_point.x()][sample_point.y()]; - for (int y = window.ymin(); y < window.ymax(); y++) { - for (int x = window.xmin(); x < window.xmax(); x++) { - vtr::Point here(x, y); - int count = counts[x][y]; - if (count < sample_count) continue; - int distance = manhattan_distance(center, here); - if (count > sample_count || (count == sample_count && distance < sample_distance)) { - sample_point = here; - sample_count = count; - sample_distance = distance; + if (with_count > 0) { + int sample_distance = std::numeric_limits::max(); + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + vtr::Point here(x, y); + if (counts[x][y] == with_count) { + int distance = manhattan_distance(center, here); + if (distance < sample_distance) { + sample_point = here; + sample_distance = distance; + } + } } } + VTR_ASSERT(counts[sample_point.x()][sample_point.y()] > 0); } return sample_point; } @@ -799,6 +810,51 @@ static std::pair grid_lookup(const SampleGrid& grid, vtr::Point p return std::make_pair(-1, -1); } +// histogram is a map from segment count to number of locations having that count +static int max_count_within_quantiles(const std::map& histogram, float lower, float upper) { + if (histogram.empty()) { + return 0; + } + int sum = 0; + for (const auto& entry : histogram) { + sum += entry.second; + } + int lower_limit = std::ceil(sum * lower); + int upper_limit = std::ceil(sum * upper); + std::pair max(0, 0); + for (const auto& entry : histogram) { + upper_limit -= entry.second; + if (lower_limit > 0) { + lower_limit -= entry.second; + if (lower_limit <= 0) { + max = entry; + } + } else { + if (entry.second >= max.second) { + max = entry; + } + if (upper_limit <= 0) { + break; + } + } + } + return max.first; +} + +// select a good number of segments to find +static int select_size(const vtr::Rect& box, const vtr::Matrix& counts) { + std::map histogram; + for (int y = box.ymin(); y < box.ymax(); y++) { + for (int x = box.xmin(); x < box.xmax(); x++) { + int count = counts[x][y]; + if (count > 0) { + histogram[count]++; + } + } + } + return max_count_within_quantiles(histogram, 0.75, 0.9); +} + // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { @@ -812,6 +868,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; VTR_ASSERT(seg_index != OPEN); @@ -829,10 +886,12 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se // initialize the samples vector for each sample point inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); - for (auto& grid : *inodes_for_segment) { + for (int i = 0; i < num_segments; i++) { + auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { grid.point[y][x].samples = std::vector(); + grid.point[y][x].segment_type = i; } } } @@ -841,7 +900,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); if (loc == nullptr) continue; @@ -860,16 +919,18 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - grid.point[y][x].location = choose_point(counts, bounding_box, x, y, SAMPLE_GRID_SIZE); + vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); + int selected_size = select_size(window, segment_counts[i]); + grid.point[y][x].location = choose_point(counts, window, selected_size); } } } - // select an inode near the center of the bounding box for each segment type + // collect the node indices for each segment type at the selected sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); if (loc == nullptr) continue; diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 2bc0cfef630..9ea5506de04 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -171,7 +171,7 @@ class PQ_Entry_Base_Cost { return base_cost; } - void adjust_Tsw(float amount) { + void adjust_Tsw(float /* amount */) { // do nothing } From 0eda42baef75afb4c5b82c39921b6c3670745aba Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 21 Nov 2019 09:28:01 -0800 Subject: [PATCH 26/41] Use get_rr_cong_cost to compute base_cost's. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 3 +++ vpr/src/route/router_lookahead_map_utils.cpp | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d770eaa2d5b..b4c884d1942 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -607,6 +607,9 @@ static uint64_t interleave(uint32_t x) { void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + // Initialize rr_node_route_inf if not already + alloc_and_load_rr_node_route_structs(); + size_t num_segments = segment_inf.size(); std::vector sample_points; { diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index e53b265b916..31d7d0d3ca3 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -3,6 +3,7 @@ #include "globals.h" #include "vpr_context.h" #include "vtr_math.h" +#include "route_common.h" namespace util { @@ -24,8 +25,6 @@ PQ_Entry::PQ_Entry( this->congestion_upstream = parent_congestion_upstream; this->R_upstream = parent_R_upstream; if (!starting_node) { - int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); - float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; Tsw += Tsw_adjust; VTR_ASSERT(Tsw >= 0.f); @@ -40,7 +39,10 @@ PQ_Entry::PQ_Entry( T_linear = Tsw + 0.5 * Rsw * Cnode; } - float base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; + float base_cost = 0.f; + if (device_ctx.rr_switch_inf[switch_ind].configurable()) { + base_cost = get_rr_cong_cost(set_rr_node_ind); + } VTR_ASSERT(T_linear >= 0.); VTR_ASSERT(base_cost >= 0.); @@ -88,8 +90,7 @@ util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( if (parent != nullptr) { auto& device_ctx = g_vpr_ctx.device(); - int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); - this->base_cost = parent->base_cost + device_ctx.rr_indexed_data[cost_index].base_cost; + this->base_cost = parent->base_cost + (device_ctx.rr_switch_inf[switch_ind].configurable() ? get_rr_cong_cost(set_rr_node_ind) : 0); } else { this->base_cost = 0.f; } From 01a3c41d1051fad8c55b69e0bff986b7a4403965 Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Mon, 25 Nov 2019 18:01:52 -0800 Subject: [PATCH 27/41] run expansions in a region until a number of paths are found Signed-off-by: Dustin DeWeese --- .../route/connection_box_lookahead_map.cpp | 263 +++++++++--------- 1 file changed, 127 insertions(+), 136 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index b4c884d1942..cc8e7bf112e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -11,6 +11,7 @@ #include "vtr_time.h" #include "vtr_geometry.h" #include "echo_files.h" +#include "rr_graph.h" #include "route_timing.h" #include "route_common.h" @@ -51,26 +52,26 @@ static constexpr bool BREAK_ON_MISS = false; static constexpr float PENALTY_FACTOR = 1.f; static constexpr float PENALTY_MIN = 1e-12f; +static constexpr int MIN_PATH_COUNT = 1000; + // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { - uint64_t order; // used to order sample points vtr::Point location; - std::vector samples; - int segment_type; - SamplePoint() - : location(0, 0) {} + std::vector nodes; }; -// a grid of sample points -struct SampleGrid { - SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; +struct SampleRegion { + int segment_type; + vtr::Point grid_location; + std::vector points; + uint64_t order; // for sorting }; template static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs); -static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +static std::vector find_sample_regions(int num_segments); // also known as the L1 norm static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { @@ -593,16 +594,6 @@ static std::pair run_dijkstra(int start_node_ind, return std::make_pair(max_cost, path_count); } -static uint64_t interleave(uint32_t x) { - uint64_t i = x; - i = (i ^ (i << 16)) & 0x0000ffff0000ffff; - i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; - i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; - i = (i ^ (i << 2)) & 0x3333333333333333; - i = (i ^ (i << 1)) & 0x5555555555555555; - return i; -} - // compute the cost maps for lookahead void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); @@ -611,30 +602,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen alloc_and_load_rr_node_route_structs(); size_t num_segments = segment_inf.size(); - std::vector sample_points; - { - std::vector inodes_for_segment(num_segments); - find_inodes_for_segment_types(&inodes_for_segment); - - // collapse into a vector - for (auto& grid : inodes_for_segment) { - for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { - for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - auto& point = grid.point[y][x]; - if (!point.samples.empty()) { - point.order = interleave(point.location.x()) | (interleave(point.location.y()) << 1); - sample_points.push_back(point); - } - } - } - } - } - - // sort by VPR coordinate - std::sort(sample_points.begin(), sample_points.end(), - [](const SamplePoint& a, const SamplePoint& b) { - return a.order < b.order; - }); + std::vector sample_regions = find_sample_regions(num_segments); /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); @@ -648,41 +616,65 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) tbb::mutex all_costs_mutex; - tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) { + tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { #else - for (const auto& point : sample_points) { + for (const auto& region : sample_regions) { #endif // holds the cost entries for a run RoutingCosts delay_costs; RoutingCosts base_costs; + int total_path_count = 0; + + for (auto& point : region.points) { + // statistics + vtr::Timer run_timer; + float max_delay_cost = 0.f; + float max_base_cost = 0.f; + int path_count = 0; + for (auto node_ind : point.nodes) { + { + auto result = run_dijkstra(node_ind, &delay_costs); + max_delay_cost = std::max(max_delay_cost, result.first); + path_count += result.second; + } + { + auto result = run_dijkstra(node_ind, &base_costs); + max_base_cost = std::max(max_base_cost, result.first); + path_count += result.second; + } + } - // statistics - vtr::Timer run_timer; - int path_count = 0; - float max_delay_cost = 0.f; - float max_base_cost = 0.f; - for (auto node_ind : point.samples) { - { - auto result = run_dijkstra(node_ind, &delay_costs); - max_delay_cost = std::max(max_delay_cost, result.first); - path_count += result.second; + if (path_count > 0) { + VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", + path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type, + point.location.x(), point.location.y(), + (int)point.nodes.size(), + max_delay_cost, max_base_cost, + path_count / run_timer.elapsed_sec()); } - { - auto result = run_dijkstra(node_ind, &base_costs); - max_base_cost = std::max(max_base_cost, result.first); - path_count += result.second; + + /* + * if (path_count == 0) { + * for (auto node_ind : point.nodes) { + * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); + * } + * } + */ + + total_path_count += path_count; + if (total_path_count > MIN_PATH_COUNT) { + break; } } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", - path_count, segment_inf[point.segment_type].name.c_str(), point.segment_type, - point.location.x(), point.location.y(), - (int)point.samples.size(), - max_delay_cost, max_base_cost, - path_count / run_timer.elapsed_sec()); + + if (total_path_count == 0) { + VTR_LOG("No paths found for sample region %s(%d, %d)\n", + segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); + } // combine the cost map from this run with the final cost maps for each segment for (const auto& cost : delay_costs) { @@ -779,42 +771,33 @@ static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, sample(bounding_box, sx + 1, sy + 1, n)); } -static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& window, int with_count) { - vtr::Point center = sample(window, 1, 1, 2); - vtr::Point sample_point = center; - if (with_count > 0) { - int sample_distance = std::numeric_limits::max(); - for (int y = window.ymin(); y < window.ymax(); y++) { - for (int x = window.xmin(); x < window.xmax(); x++) { - vtr::Point here(x, y); - if (counts[x][y] == with_count) { - int distance = manhattan_distance(center, here); - if (distance < sample_distance) { - sample_point = here; - sample_distance = distance; - } - } +static std::vector choose_points(const vtr::Matrix& counts, + const vtr::Rect& window, + int min_count, + int max_count) { + std::vector points; + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + if (counts[x][y] >= min_count && counts[x][y] <= max_count) { + points.push_back(SamplePoint{/* .location = */ vtr::Point(x, y), + /* .nodes = */ {}}); } } - VTR_ASSERT(counts[sample_point.x()][sample_point.y()] > 0); } - return sample_point; -} -// linear lookup, so consider something more sophisticated for large SAMPLE_GRID_SIZEs -static std::pair grid_lookup(const SampleGrid& grid, vtr::Point point) { - for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { - for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - if (grid.point[sy][sx].location == point) { - return std::make_pair(sx, sy); - } - } - } - return std::make_pair(-1, -1); + vtr::Point center = sample(window, 1, 1, 2); + + // sort by distance from center + std::sort(points.begin(), points.end(), + [&](const SamplePoint& a, const SamplePoint& b) { + return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); + }); + + return points; } // histogram is a map from segment count to number of locations having that count -static int max_count_within_quantiles(const std::map& histogram, float lower, float upper) { +static int quantile(const std::map& histogram, float ratio) { if (histogram.empty()) { return 0; } @@ -822,48 +805,46 @@ static int max_count_within_quantiles(const std::map& histogram, float for (const auto& entry : histogram) { sum += entry.second; } - int lower_limit = std::ceil(sum * lower); - int upper_limit = std::ceil(sum * upper); - std::pair max(0, 0); + int limit = std::ceil(sum * ratio); for (const auto& entry : histogram) { - upper_limit -= entry.second; - if (lower_limit > 0) { - lower_limit -= entry.second; - if (lower_limit <= 0) { - max = entry; - } - } else { - if (entry.second >= max.second) { - max = entry; - } - if (upper_limit <= 0) { - break; - } + limit -= entry.second; + if (limit <= 0) { + return entry.first; } } - return max.first; + return 0; } // select a good number of segments to find -static int select_size(const vtr::Rect& box, const vtr::Matrix& counts) { +static std::map count_histogram(const vtr::Rect& box, const vtr::Matrix& counts) { std::map histogram; for (int y = box.ymin(); y < box.ymax(); y++) { for (int x = box.xmin(); x < box.xmax(); x++) { int count = counts[x][y]; if (count > 0) { - histogram[count]++; + ++histogram[count]; } } } - return max_count_within_quantiles(histogram, 0.75, 0.9); + return histogram; +} + +static uint64_t interleave(uint32_t x) { + uint64_t i = x; + i = (i ^ (i << 16)) & 0x0000ffff0000ffff; + i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; + i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; + i = (i ^ (i << 2)) & 0x3333333333333333; + i = (i ^ (i << 1)) & 0x5555555555555555; + return i; } // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type -static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { +static std::vector find_sample_regions(int num_segments) { + std::vector sample_regions; auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; - const int num_segments = inodes_for_segment->size(); std::vector> segment_counts(num_segments); // compute bounding boxes for each segment type @@ -886,19 +867,6 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); } - // initialize the samples vector for each sample point - inodes_for_segment->clear(); - inodes_for_segment->resize(num_segments); - for (int i = 0; i < num_segments; i++) { - auto& grid = (*inodes_for_segment)[i]; - for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { - for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - grid.point[y][x].samples = std::vector(); - grid.point[y][x].segment_type = i; - } - } - } - // count sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; @@ -919,16 +887,38 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se const auto& counts = segment_counts[i]; const auto& bounding_box = bounding_box_for_segment[i]; if (bounding_box.empty()) continue; - auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); - int selected_size = select_size(window, segment_counts[i]); - grid.point[y][x].location = choose_point(counts, window, selected_size); + auto histogram = count_histogram(window, segment_counts[i]); + SampleRegion region = { + /* .segment_type = */ i, + /* .grid_location = */ vtr::Point(x, y), + /* .points = */ choose_points(counts, window, quantile(histogram, 0.5), quantile(histogram, 0.7)), + /* .order = */ 0}; + if (!region.points.empty()) { + vtr::Point location = region.points[0].location; + region.order = interleave(location.x()) | (interleave(location.y()) << 1); + sample_regions.push_back(region); + } } } } + // sort regions + std::sort(sample_regions.begin(), sample_regions.end(), + [](const SampleRegion& a, const SampleRegion& b) { + return a.order < b.order; + }); + + // build an index of sample points on segment type and location + std::map, SamplePoint*> sample_point_index; + for (auto& region : sample_regions) { + for (auto& point : region.points) { + sample_point_index[{region.segment_type, point.location.x(), point.location.y()}] = &point; + } + } + // collect the node indices for each segment type at the selected sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; @@ -942,12 +932,13 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - auto& grid = (*inodes_for_segment)[seg_index]; - auto grid_loc = grid_lookup(grid, vtr::Point(loc->first, loc->second)); - if (grid_loc.first >= 0) { - grid.point[grid_loc.first][grid_loc.second].samples.push_back(i); + auto point = sample_point_index.find({seg_index, loc->first, loc->second}); + if (point != sample_point_index.end()) { + point->second->nodes.push_back(i); } } + + return sample_regions; } #ifndef VTR_ENABLE_CAPNPROTO From a9f7b1298f5511168c8978af4fc5f1eb5a7be3fd Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Tue, 3 Dec 2019 10:46:39 -0800 Subject: [PATCH 28/41] use std::make_tuple() Signed-off-by: Dustin DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index cc8e7bf112e..ed0172fe252 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -915,7 +915,7 @@ static std::vector find_sample_regions(int num_segments) { std::map, SamplePoint*> sample_point_index; for (auto& region : sample_regions) { for (auto& point : region.points) { - sample_point_index[{region.segment_type, point.location.x(), point.location.y()}] = &point; + sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; } } @@ -932,7 +932,7 @@ static std::vector find_sample_regions(int num_segments) { VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - auto point = sample_point_index.find({seg_index, loc->first, loc->second}); + auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); if (point != sample_point_index.end()) { point->second->nodes.push_back(i); } From 2c1631d41bbf1a6a5087aa5aed875d4e27016ed4 Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Tue, 3 Dec 2019 15:28:10 -0800 Subject: [PATCH 29/41] suggested changes Signed-off-by: Dustin DeWeese --- libs/libvtrcapnproto/connection_map.capnp | 1 + .../route/connection_box_lookahead_map.cpp | 70 ++++++++++++++++--- vpr/src/route/connection_box_lookahead_map.h | 9 --- vpr/src/route/router_lookahead_map_utils.cpp | 9 +-- vpr/src/route/router_lookahead_map_utils.h | 3 + 5 files changed, 70 insertions(+), 22 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index bc445a30e70..30b9864153b 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -5,6 +5,7 @@ using Matrix = import "matrix.capnp"; struct VprCostEntry { delay @0 :Float32; congestion @1 :Float32; + fill @2 :Bool; } struct VprVector2D { diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index ed0172fe252..0edac971719 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -54,17 +54,34 @@ static constexpr float PENALTY_MIN = 1e-12f; static constexpr int MIN_PATH_COUNT = 1000; +// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria +// choose locations with higher, but not extreme, counts of each segment type +static constexpr double kSamplingCountLowerQuantile = 0.5; +static constexpr double kSamplingCountUpperQuantile = 0.7; + // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { + // canonical location vtr::Point location; + + // nodes to expand std::vector nodes; }; struct SampleRegion { + // all nodes in `points' have this segment type int segment_type; + + // location on the sample grid vtr::Point grid_location; + + // locations to try + // The computation will keep expanding each of the points + // until a number of paths (segment -> connection box) are found. std::vector points; - uint64_t order; // for sorting + + // used to sort the regions to improve caching + uint64_t order; }; template @@ -350,6 +367,7 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat } int n = 0; util::Cost_Entry fill(matrix[cx][cy]); + fill.fill = true; while (in_bounds && !fill.valid()) { n++; @@ -448,6 +466,15 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", from_node_ind, to_node_ind); + const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; + const std::string& box_name = device_ctx.connection_boxes.get_connection_box(box_id)->name; + VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) to %s (%zu) with distance (%zd, %zd)\n", + segment_name.c_str(), from_seg_index, + box_name.c_str(), + size_t(box_id), + dx, dy); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); @@ -535,9 +562,12 @@ static bool add_paths(int start_node_ind, return new_sample_found; } -/* runs Dijkstra's algorithm from specified node until all nodes have been +/* Runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information - * to that pin is stored to an entry in the routing_cost_map */ + * to that pin is stored to an entry in the routing_cost_map. + * + * Returns the maximum (last) minimum cost path stored, and + * the number of paths from start_node_ind stored. */ template static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs) { @@ -573,9 +603,6 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; - // the last cost should be the highest - max_cost = current.cost(); - /* check that we haven't already expanded from this node */ if (node_expanded[node_ind]) { continue; @@ -583,6 +610,9 @@ static std::pair run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + // the last cost should be the highest + max_cost = current.cost(); + path_count++; add_paths(start_node_ind, current, &paths, routing_costs); } else { @@ -672,8 +702,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #endif if (total_path_count == 0) { - VTR_LOG("No paths found for sample region %s(%d, %d)\n", - segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); + VTR_LOG_WARN("No paths found for sample region %s(%d, %d)\n", + segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); } // combine the cost map from this run with the final cost maps for each segment @@ -775,6 +805,7 @@ static std::vector choose_points(const vtr::Matrix& counts, const vtr::Rect& window, int min_count, int max_count) { + VTR_ASSERT(min_count <= max_count); std::vector points; for (int y = window.ymin(); y < window.ymax(); y++) { for (int x = window.xmin(); x < window.xmax(); x++) { @@ -829,6 +860,13 @@ static std::map count_histogram(const vtr::Rect& box, const vtr:: return histogram; } +// Used to calculate each region's `order.' +// A space-filling curve will order the regions so that +// nearby points stay close in order. A Hilbert curve might +// be better, but a Morton (Z)-order curve is easy to compute, +// because it's just interleaving binary bits, so this +// function interleaves with 0's so that the X and Y +// dimensions can then be OR'ed together. static uint64_t interleave(uint32_t x) { uint64_t i = x; i = (i ^ (i << 16)) & 0x0000ffff0000ffff; @@ -842,6 +880,7 @@ static uint64_t interleave(uint32_t x) { // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static std::vector find_sample_regions(int num_segments) { + vtr::ScopedStartFinishTimer timer("finding sample regions"); std::vector sample_regions; auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; @@ -894,11 +933,22 @@ static std::vector find_sample_regions(int num_segments) { SampleRegion region = { /* .segment_type = */ i, /* .grid_location = */ vtr::Point(x, y), - /* .points = */ choose_points(counts, window, quantile(histogram, 0.5), quantile(histogram, 0.7)), + /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), /* .order = */ 0}; if (!region.points.empty()) { + /* In order to improve caching, the list of sample points are + * sorted to keep points that are nearby on the Euclidean plane also + * nearby in the vector of sample points. + * + * This means subsequent expansions on the same thread are likely + * to cover a similar set of nodes, so they are more likely to be + * cached. This improves performance by about 7%, which isn't a lot, + * but not a bad improvement for a few lines of code. */ vtr::Point location = region.points[0].location; + + // interleave bits of X and Y for a Z-curve ordering. region.order = interleave(location.x()) | (interleave(location.y()) << 1); + sample_regions.push_back(region); } } @@ -962,11 +1012,13 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { out->delay = in.getDelay(); out->congestion = in.getCongestion(); + out->fill = in.getFill(); } static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { out->setDelay(in.delay); out->setCongestion(in.congestion); + out->setFill(in.fill); } static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 810dec59135..70423409f7a 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -32,15 +32,6 @@ struct RoutingCostKey { } }; -// Data in the RoutingCosts map -struct RoutingCost { - // source and destination node indices - int from_node, to_node; - - // cost entry for the route - util::Cost_Entry cost_entry; -}; - // hash implementation for RoutingCostKey struct HashRoutingCostKey { std::size_t operator()(RoutingCostKey const& key) const noexcept { diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 31d7d0d3ca3..46fcfe4c90d 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -7,9 +7,6 @@ namespace util { -/* Number of CLBs I think the average conn. goes. */ -static const int CLB_DIST = 3; - PQ_Entry::PQ_Entry( int set_rr_node_ind, int switch_ind, @@ -90,7 +87,11 @@ util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( if (parent != nullptr) { auto& device_ctx = g_vpr_ctx.device(); - this->base_cost = parent->base_cost + (device_ctx.rr_switch_inf[switch_ind].configurable() ? get_rr_cong_cost(set_rr_node_ind) : 0); + if (device_ctx.rr_switch_inf[switch_ind].configurable()) { + this->base_cost = parent->base_cost + get_rr_cong_cost(set_rr_node_ind); + } else { + this->base_cost = parent->base_cost; + } } else { this->base_cost = 0.f; } diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 9ea5506de04..1861ee031ec 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -42,14 +42,17 @@ class Cost_Entry { public: float delay; float congestion; + bool fill; Cost_Entry() { delay = std::numeric_limits::infinity(); congestion = std::numeric_limits::infinity(); + fill = false; } Cost_Entry(float set_delay, float set_congestion) { delay = set_delay; congestion = set_congestion; + fill = false; } bool valid() const { From 502acf97db3c8d071403de0f771e1eb2fca1b9b5 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 31 Oct 2019 12:22:58 -0700 Subject: [PATCH 30/41] Add debugging prints and add fill output to lookahead. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/vpr_context.h | 1 + .../route/connection_box_lookahead_map.cpp | 2 +- vpr/src/route/route_profiling.cpp | 40 +++++++++++++++++++ vpr/src/route/route_profiling.h | 4 ++ vpr/src/route/route_timing.cpp | 7 ++++ vpr/src/route/router_lookahead_map_utils.h | 14 ++++--- 7 files changed, 62 insertions(+), 7 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 170bb9f85bd..d2efc837f80 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -156,6 +156,7 @@ void SetupVPR(const t_options* Options, } Segments = Arch->Segments; + device_ctx.segment_inf = Arch->Segments; SetupSwitches(*Arch, RoutingArch, Arch->Switches, Arch->num_switches); SetupRoutingArch(*Arch, RoutingArch); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 1083c920a1b..ec615cc23e5 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -138,6 +138,7 @@ struct DeviceContext : public Context { t_rr_node_indices rr_node_indices; //[0..NUM_RR_TYPES-1][0..grid.width()-1][0..grid.width()-1][0..size-1] std::vector rr_switch_inf; /* autogenerated in build_rr_graph based on switch fan-in. [0..(num_rr_switches-1)] */ + std::vector segment_inf; //Wire segment types in RR graph std::vector rr_segments; diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 0edac971719..36355996e71 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -141,7 +141,7 @@ int CostMap::node_to_segment(int from_node_ind) const { static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { penalty = std::max(penalty, PENALTY_MIN); return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, - entry.congestion); + entry.congestion, entry.fill); } // get a cost entry for a segment type, connection box type, and offset diff --git a/vpr/src/route/route_profiling.cpp b/vpr/src/route/route_profiling.cpp index c5e5737caca..b393711eba9 100644 --- a/vpr/src/route/route_profiling.cpp +++ b/vpr/src/route/route_profiling.cpp @@ -2,6 +2,7 @@ #include "globals.h" #include "vpr_types.h" #include "route_profiling.h" +#include "rr_graph.h" namespace profiling { @@ -29,6 +30,10 @@ void time_on_fanout_analysis() {} void profiling_initialization(unsigned /*max_net_fanout*/) {} +void conn_start() {} +void conn_finish(int /*src_rr*/, int /*sink_rr*/, float /*criticality*/) {} +void net_finish() {} + #else constexpr unsigned int fanout_per_bin = 1; @@ -181,6 +186,12 @@ void congestion_analysis() { # endif } +static clock_t conn_start_time; +static float worst_conn_time = 0.f; +static int worst_src_rr; +static int worst_sink_rr; +static float worst_crit; + void profiling_initialization(unsigned max_fanout) { // add 1 so that indexing on the max fanout would still be valid time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0); @@ -195,8 +206,37 @@ void profiling_initialization(unsigned max_fanout) { part_tree_preserved = 0; connections_forced_to_reroute = 0; connections_rerouted_due_to_forcing = 0; + worst_conn_time = 0.f; return; } + +void conn_start() { + conn_start_time = clock(); +} +void conn_finish(int src_rr, int sink_rr, float criticality) { + float route_time = static_cast(clock() - conn_start_time) / CLOCKS_PER_SEC; + if (route_time > worst_conn_time) { + worst_src_rr = src_rr; + worst_sink_rr = sink_rr; + worst_conn_time = route_time; + worst_crit = criticality; + } + + VTR_LOG("%s to %s (crit: %f) took %f\n", + describe_rr_node(src_rr).c_str(), + describe_rr_node(sink_rr).c_str(), + criticality, + route_time); +} +void net_finish() { + if (worst_conn_time > 0.f) { + VTR_LOG("Worst conn was %s to %s (crit: %f) took %f\n", + describe_rr_node(worst_src_rr).c_str(), + describe_rr_node(worst_sink_rr).c_str(), + worst_crit, + worst_conn_time); + } +} #endif } // end namespace profiling diff --git a/vpr/src/route/route_profiling.h b/vpr/src/route/route_profiling.h index 52a73655214..3cc1ec377a2 100644 --- a/vpr/src/route/route_profiling.h +++ b/vpr/src/route/route_profiling.h @@ -30,6 +30,10 @@ void congestion_analysis(); void time_on_criticality_analysis(); void time_on_fanout_analysis(); +void conn_start(); +void conn_finish(int src_rr, int sink_rr, float criticality); +void net_finish(); + void profiling_initialization(unsigned max_net_fanout); } // end namespace profiling diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index e7eef6461c6..f58672606bf 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1096,6 +1096,8 @@ bool timing_driven_route_net(ConnectionRouter& router, conn_delay_budget.short_path_criticality = budgeting_inf.get_crit_short_path(net_id, target_pin); } + profiling::conn_start(); + // build a branch in the route tree to the target if (!timing_driven_route_sink(router, net_id, @@ -1109,10 +1111,15 @@ bool timing_driven_route_net(ConnectionRouter& router, router_stats)) return false; + profiling::conn_finish(route_ctx.net_rr_terminals[net_id][0], + sink_rr, + pin_criticality[target_pin]); + ++router_stats.connections_routed; } // finished all sinks ++router_stats.nets_routed; + profiling::net_finish(); /* For later timing analysis. */ diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 1861ee031ec..bc0540ec65e 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -49,12 +49,14 @@ class Cost_Entry { congestion = std::numeric_limits::infinity(); fill = false; } - Cost_Entry(float set_delay, float set_congestion) { - delay = set_delay; - congestion = set_congestion; - fill = false; - } - + Cost_Entry(float set_delay, float set_congestion) + : delay(set_delay) + , congestion(set_congestion) + , fill(false) {} + Cost_Entry(float set_delay, float set_congestion, bool set_fill) + : delay(set_delay) + , congestion(set_congestion) + , fill(set_fill) {} bool valid() const { return std::isfinite(delay) && std::isfinite(congestion); } From 8530fefb4c5a5420a73b0959a553d32bda545d25 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Wed, 29 Jan 2020 13:42:22 -0800 Subject: [PATCH 31/41] Fix missing #include Signed-off-by: Henner Zeller --- vpr/src/route/connection_box.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h index 1757c6c726b..37af4cc8320 100644 --- a/vpr/src/route/connection_box.h +++ b/vpr/src/route/connection_box.h @@ -4,11 +4,13 @@ // This class relates IPIN rr nodes with connection box type and locations, used // for connection box driven map lookahead. +#include +#include #include -#include "vtr_strong_id.h" + #include "vtr_flat_map.h" #include "vtr_range.h" -#include +#include "vtr_strong_id.h" struct connection_box_tag {}; typedef vtr::StrongId ConnectionBoxId; From 669d580817fee4662d981a8f8790d643fb0e566b Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 19 Feb 2020 12:37:46 -0800 Subject: [PATCH 32/41] Remove cached segment map from connection map lookahead file. Signed-off-by: Dusty DeWeese Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrcapnproto/connection_map.capnp | 2 +- .../route/connection_box_lookahead_map.cpp | 44 +++++++------------ vpr/src/route/connection_box_lookahead_map.h | 1 + 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index 30b9864153b..874b39822dd 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -20,6 +20,6 @@ struct VprFloatEntry { struct VprCostMap { costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); offset @1 :Matrix.Matrix(VprVector2D); - segmentMap @2 :List(Int64); + depField @2 :List(Int64); penalty @3 :Matrix.Matrix(VprFloatEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 36355996e71..8df10208c27 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -110,17 +110,8 @@ static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Poi } } -// resize internal data structures -void CostMap::set_counts(size_t seg_count, size_t box_count) { - cost_map_.clear(); - offset_.clear(); - penalty_.clear(); - cost_map_.resize({seg_count, box_count}); - offset_.resize({seg_count, box_count}); - penalty_.resize({seg_count, box_count}); - seg_count_ = seg_count; - box_count_ = box_count; - +// build the segment map +void CostMap::build_segment_map() { const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); for (size_t i = 0; i < segment_map_.size(); ++i) { @@ -133,6 +124,18 @@ void CostMap::set_counts(size_t seg_count, size_t box_count) { } } +// resize internal data structures +void CostMap::set_counts(size_t seg_count, size_t box_count) { + cost_map_.clear(); + offset_.clear(); + penalty_.clear(); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); + penalty_.resize({seg_count, box_count}); + seg_count_ = seg_count; + box_count_ = box_count; +} + // cached node -> segment map int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; @@ -638,6 +641,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen auto& device_ctx = g_vpr_ctx.device(); cost_map_.set_counts(segment_inf.size(), device_ctx.connection_boxes.num_connection_box_types()); + cost_map_.build_segment_map(); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); RoutingCosts all_delay_costs; @@ -1053,22 +1057,13 @@ static void FromFloat(VprFloatEntry::Builder* out, const float& in) { } void CostMap::read(const std::string& file) { + build_segment_map(); MmapFile f(file); ::capnp::ReaderOptions opts = default_large_capnp_opts(); ::capnp::FlatArrayMessageReader reader(f.getData(), opts); auto cost_map = reader.getRoot(); - - { - const auto& segment_map = cost_map.getSegmentMap(); - segment_map_.resize(segment_map.size()); - auto dst_iter = segment_map_.begin(); - for (const auto& src : segment_map) { - *dst_iter++ = src; - } - } - { const auto& offset = cost_map.getOffset(); ToNdMatrix<2, VprVector2D, std::pair>( @@ -1093,13 +1088,6 @@ void CostMap::write(const std::string& file) const { auto cost_map = builder.initRoot(); - { - auto segment_map = cost_map.initSegmentMap(segment_map_.size()); - for (size_t i = 0; i < segment_map_.size(); ++i) { - segment_map.set(i, segment_map_[i]); - } - } - { auto offset = cost_map.initOffset(); FromNdMatrix<2, VprVector2D, std::pair>( diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 70423409f7a..90a7ab3276f 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -50,6 +50,7 @@ typedef std::unordered_map RoutingCos class CostMap { public: void set_counts(size_t seg_count, size_t box_count); + void build_segment_map(); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); From e6c691b3939cb57e9b8c2e2aa377ce9417bdc349 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 21 Feb 2020 15:31:37 -0800 Subject: [PATCH 33/41] Refactor lookahead computation. Before on the A200T: > Computing connection box lookahead map took 31769.29 seconds (max_rss 45671.6 MiB, delta_rss +14486.3 MiB) After on the A200T: > Computing connection box lookahead map took 16791.79 seconds (max_rss 40113.6 MiB, delta_rss +8895.9 MiB) Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- .../route/connection_box_lookahead_map.cpp | 29 ++++++++++------- vpr/src/route/router_lookahead_map_utils.cpp | 32 ++++++++----------- vpr/src/route/router_lookahead_map_utils.h | 6 ++-- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 8df10208c27..3f765668c16 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -86,6 +86,8 @@ struct SampleRegion { template static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, RoutingCosts* routing_costs); static std::vector find_sample_regions(int num_segments); @@ -495,7 +497,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, template static bool add_paths(int start_node_ind, Entry current, - std::unordered_map* paths, + const std::vector& paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; @@ -511,7 +513,8 @@ static bool add_paths(int start_node_ind, // reconstruct the path std::vector path; - for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) { + for (int i = paths[node_ind].parent; i != start_node_ind; i = paths[i].parent) { + VTR_ASSERT(i != -1); path.push_back(i); } path.push_back(start_node_ind); @@ -538,7 +541,7 @@ static bool add_paths(int start_node_ind, if (*it != start_node_ind) { auto& parent_node = device_ctx.rr_nodes[parent]; - start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here); + start_to_here = Entry(*it, parent_node.edge_switch(paths[*it].edge), &start_to_here); parent = *it; } @@ -573,6 +576,8 @@ static bool add_paths(int start_node_ind, * the number of paths from start_node_ind stored. */ template static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; @@ -584,12 +589,12 @@ static std::pair run_dijkstra(int start_node_ind, /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ - std::vector node_expanded(device_ctx.rr_nodes.size(), false); + std::fill(node_expanded->begin(), node_expanded->end(), false); /* For each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the * expansion queue. * Also store the parent node so we can reconstruct a specific path. */ - std::unordered_map paths; + std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), -1, -1}); /* a priority queue for expansion */ std::priority_queue, std::greater> pq; @@ -607,7 +612,7 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; /* check that we haven't already expanded from this node */ - if (node_expanded[node_ind]) { + if ((*node_expanded)[node_ind]) { continue; } @@ -617,11 +622,11 @@ static std::pair run_dijkstra(int start_node_ind, max_cost = current.cost(); path_count++; - add_paths(start_node_ind, current, &paths, routing_costs); + add_paths(start_node_ind, current, *paths, routing_costs); } else { util::expand_dijkstra_neighbours(device_ctx.rr_nodes, - current, paths, node_expanded, pq); - node_expanded[node_ind] = true; + current, paths, node_expanded, &pq); + (*node_expanded)[node_ind] = true; } } return std::make_pair(max_cost, path_count); @@ -658,6 +663,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen RoutingCosts delay_costs; RoutingCosts base_costs; int total_path_count = 0; + std::vector node_expanded(device_ctx.rr_nodes.size()); + std::vector paths(device_ctx.rr_nodes.size()); for (auto& point : region.points) { // statistics @@ -667,12 +674,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen int path_count = 0; for (auto node_ind : point.nodes) { { - auto result = run_dijkstra(node_ind, &delay_costs); + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &delay_costs); max_delay_cost = std::max(max_delay_cost, result.first); path_count += result.second; } { - auto result = run_dijkstra(node_ind, &base_costs); + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &base_costs); max_base_cost = std::max(max_base_cost, result.first); path_count += result.second; } diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 46fcfe4c90d..c5c5bb0d55a 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -196,11 +196,11 @@ util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const { template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, const Entry& parent_entry, - std::unordered_map& paths, - std::vector& node_expanded, + std::vector* paths, + std::vector* node_expanded, std::priority_queue, - std::greater>& pq) { + std::greater>* pq) { int parent_ind = parent_entry.rr_node_ind; auto& parent_node = rr_nodes[parent_ind]; @@ -210,41 +210,37 @@ void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, int switch_ind = parent_node.edge_switch(iedge); /* skip this child if it has already been expanded from */ - if (node_expanded[child_node_ind]) { + if ((*node_expanded)[child_node_ind]) { continue; } Entry child_entry(child_node_ind, switch_ind, &parent_entry); VTR_ASSERT(child_entry.cost() >= 0); - pq.push(child_entry); /* Create (if it doesn't exist) or update (if the new cost is lower) * to specified node */ Search_Path path_entry = {child_entry.cost(), parent_ind, iedge}; - auto result = paths.insert(std::make_pair( - child_node_ind, - path_entry)); - if (!result.second) { - if (child_entry.cost() < result.first->second.cost) { - result.first->second = path_entry; - } + auto& path = (*paths)[child_node_ind]; + if (path_entry.cost < path.cost) { + pq->push(child_entry); + path = path_entry; } } } template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, const PQ_Entry_Delay& parent_entry, - std::unordered_map& paths, - std::vector& node_expanded, + std::vector* paths, + std::vector* node_expanded, std::priority_queue, - std::greater>& pq); + std::greater>* pq); template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, const PQ_Entry_Base_Cost& parent_entry, - std::unordered_map& paths, - std::vector& node_expanded, + std::vector* paths, + std::vector* node_expanded, std::priority_queue, - std::greater>& pq); + std::greater>* pq); } // namespace util diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index bc0540ec65e..28b60e3072b 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -195,11 +195,11 @@ struct Search_Path { template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, const Entry& parent_entry, - std::unordered_map& paths, - std::vector& node_expanded, + std::vector* paths, + std::vector* node_expanded, std::priority_queue, - std::greater>& pq); + std::greater>* pq); } // namespace util From 7144db3a3a00e0b65e5afe7dd748bdebed6759da Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 7 Feb 2020 17:07:56 -0800 Subject: [PATCH 34/41] Skip empty windows. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 3f765668c16..6ad88fb2c74 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -940,6 +940,8 @@ static std::vector find_sample_regions(int num_segments) { for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); + if (window.empty()) continue; + auto histogram = count_histogram(window, segment_counts[i]); SampleRegion region = { /* .segment_type = */ i, From c900a785bf691552130f2b6aaaae178ad80238d5 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 8 Jun 2020 14:08:52 +0200 Subject: [PATCH 35/41] vpr: lookahead map enhanced with CHAN -> IPIN delay information Signed-off-by: Alessandro Comodi --- vpr/src/route/router_lookahead_map.cpp | 199 ++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 3 deletions(-) diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 0b9772e2e50..797f7c97ee5 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -51,6 +51,11 @@ /* we will profile delay/congestion using this many tracks for each wire type */ #define MAX_TRACK_OFFSET 16 +#define X_OFFSET 2 +#define Y_OFFSET 2 + +#define MAX_EXPANSION_LEVEL 1 + /* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset. * there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination -- * we can take the smallest cost, the average, median, etc. This define selects the method we use. @@ -202,6 +207,13 @@ typedef std::vector>> t_src_opin // | // SOURCE/OPIN ptc +typedef std::vector>> t_chan_reachable_ipins; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] + // ^ ^ ^ + // | | | + // physical block type index | Wire to IPIN segment info + // | + // SINK/IPIN ptc + struct t_dijkstra_data { /* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */ vtr::vector node_expanded; @@ -222,12 +234,17 @@ t_wire_cost_map f_wire_cost_map; //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types t_src_opin_reachable_wires f_src_opin_reachable_wires; +//Look-up table from CHANX/CHANY to SINK/IPIN of various types +t_chan_reachable_ipins f_chan_reachable_ipins; + /******** File-Scope Functions ********/ Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y); static void compute_router_wire_lookahead(const std::vector& segment_inf); static void compute_router_src_opin_lookahead(); +static void compute_router_chan_ipin_lookahead(); static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point start); void dijkstra_flood_to_wires(int itile, RRNodeId inode, t_src_opin_reachable_wires& src_opin_reachable_wires); +void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins); /* returns index of a node from which to start routing */ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset); @@ -281,6 +298,7 @@ void MapLookahead::read(const std::string& file) { //Next, compute which wire types are accessible (and the cost to reach them) //from the different physical tile type's SOURCEs & OPINs compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); } void MapLookahead::write(const std::string& file) const { @@ -380,15 +398,40 @@ float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float critica float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; - expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + if (rr_graph.node_type(to_node) == SINK) { + auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(to_node)][rr_graph.node_ylow(to_node)].type; + auto to_tile_index = to_tile_type->index; + + auto to_ptc = rr_graph.node_ptc_num(to_node); + + if (f_chan_reachable_ipins[to_tile_index].size() != 0) { + for (const auto& kv : f_chan_reachable_ipins[to_tile_index][to_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; + + float this_delay = reachable_wire_inf.delay; + float this_congestion = reachable_wire_inf.congestion; + + float this_cost = criticality_fac * (expected_delay + this_delay) + (1.0 - criticality_fac) * (expected_congestion + this_congestion); + expected_cost = std::min(this_cost, expected_cost); + } + } + } + + if (cost_entry.delay == 0) { + expected_cost = std::numeric_limits::max() / 1e12; + } VTR_ASSERT_SAFE_MSG(std::isfinite(expected_cost), vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", rr_node_arch_name(size_t(from_node)).c_str(), describe_rr_node(size_t(from_node)).c_str()) .c_str()); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay : %10.3g\n", expected_delay); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead cong : %10.3g\n", expected_congestion); } + VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost : %10.3g\n", expected_cost); + return expected_cost; } @@ -418,6 +461,7 @@ void compute_router_lookahead(const std::vector& segment_inf) { //Next, compute which wire types are accessible (and the cost to reach them) //from the different physical tile type's SOURCEs & OPINs compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); } static void compute_router_wire_lookahead(const std::vector& segment_inf) { @@ -630,6 +674,56 @@ static void compute_router_src_opin_lookahead() { } } +static void compute_router_chan_ipin_lookahead() { + vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead"); + auto& device_ctx = g_vpr_ctx.device(); + + f_chan_reachable_ipins.clear(); + + f_chan_reachable_ipins.resize(device_ctx.physical_tile_types.size()); + + std::vector rr_nodes_at_loc; + + //We assume that the routing connectivity of each instance of a physical tile is the same, + //and so only measure one instance of each type + for (auto tile_type : device_ctx.physical_tile_types) { + vtr::Point sample_loc(-1, -1); + + sample_loc = pick_sample_tile(&tile_type, sample_loc); + + if (sample_loc.x() == -1 && sample_loc.y() == -1) { + //No untried instances of the current tile type left + VTR_LOG_WARN("Found no sample locations for %s\n", + tile_type.name); + continue; + } + + int min_x = std::max(0, sample_loc.x() - X_OFFSET); + int min_y = std::max(0, sample_loc.y() - Y_OFFSET); + int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x() + X_OFFSET); + int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y() + Y_OFFSET); + + for (int ix = min_x; ix < max_x; ix++) { + for (int iy = min_y; iy < max_y; iy++) { + for (auto rr_type : {CHANX, CHANY}) { + rr_nodes_at_loc.clear(); + + get_rr_node_indices(device_ctx.rr_node_indices, ix, iy, rr_type, &rr_nodes_at_loc); + for (int inode : rr_nodes_at_loc) { + if (inode < 0) continue; + + RRNodeId node_id(inode); + + //Find the IPINs which are reachable from the wires within the bounding box + //around the selected tile location + dijkstra_flood_to_ipins(node_id, f_chan_reachable_ipins); + } + } + } + } + } +} + static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev) { //Very simple for now, just pick the fist matching tile found vtr::Point loc(OPEN, OPEN); @@ -650,10 +744,11 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: if (y < 0) continue; //VTR_LOG(" y: %d\n", y); - if (grid[x][y].type == tile_type) { + if (grid[x][y].type->index == tile_type->index) { loc.set_x(x); loc.set_y(y); - break; + VTR_LOG("RETURN LOC! %s (%d, %d)\n", tile_type->name, x, y); + return loc; } } @@ -765,6 +860,104 @@ void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wire } } +void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + struct t_pq_entry { + float delay; + float congestion; + RRNodeId node; + int level; + int prev_seg_index; + + bool operator<(const t_pq_entry& rhs) const { + return this->delay < rhs.delay; + } + }; + + std::priority_queue pq; + + t_pq_entry root; + root.congestion = 0.; + root.delay = 0.; + root.node = node; + root.level = 0; + root.prev_seg_index = OPEN; + + /* + * Perform Djikstra from the CHAN of interest, stopping at the the first + * reachable IPIN + * + * Note that typical RR graphs are structured : + * + * CHANX/CHANY --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK + * | + * --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK + * | + * ... + * + * and there is a variable number of hops from a given CHANX/CHANY to IPIN. + * To avoid impacting on run-time, a fixed number of hops is performed. This + * should be enough to find the delay from the last CAHN to IPIN connection. + */ + pq.push(root); + while (!pq.empty()) { + t_pq_entry curr = pq.top(); + pq.pop(); + + e_rr_type curr_rr_type = rr_graph.node_type(curr.node); + if (curr_rr_type == IPIN) { + int seg_index = curr.prev_seg_index; + + int node_x = rr_graph.node_xlow(curr.node); + int node_y = rr_graph.node_ylow(curr.node); + + auto tile_type = device_ctx.grid[node_x][node_y].type; + int itile = tile_type->index; + + int ptc = rr_graph.node_ptc_num(curr.node); + + if (ptc >= int(chan_reachable_ipins[itile].size())) { + chan_reachable_ipins[itile].resize(ptc + 1); //Inefficient but functional... + } + + //Keep costs of the best path to reach each wire type + chan_reachable_ipins[itile][ptc][seg_index].wire_rr_type = curr_rr_type; + chan_reachable_ipins[itile][ptc][seg_index].wire_seg_index = seg_index; + chan_reachable_ipins[itile][ptc][seg_index].delay = curr.delay; + chan_reachable_ipins[itile][ptc][seg_index].congestion = curr.congestion; + } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) { + if (curr.level >= MAX_EXPANSION_LEVEL) { + continue; + } + + //We allow expansion through SOURCE/OPIN/IPIN types + int cost_index = rr_graph.node_cost_index(curr.node); + float new_cong = device_ctx.rr_indexed_data[cost_index].base_cost; //Current nodes congestion cost + int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; + + for (RREdgeId edge : rr_graph.edge_range(curr.node)) { + int iswitch = rr_graph.edge_switch(edge); + float new_delay = device_ctx.rr_switch_inf[iswitch].Tdel; + + RRNodeId next_node = rr_graph.edge_sink_node(edge); + + t_pq_entry next; + next.congestion = new_cong; //Of current node + next.delay = new_delay; //To reach next node + next.node = next_node; + next.level = curr.level + 1; + next.prev_seg_index = seg_index; + + pq.push(next); + } + } else { + VPR_ERROR(VPR_ERROR_ROUTE, "Unrecognized RR type"); + } + } +} + /* returns index of a node from which to start routing */ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) { auto& device_ctx = g_vpr_ctx.device(); From feeab9934180e8992dd23e057977651f41cc609c Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:05:51 +0200 Subject: [PATCH 36/41] route: extract lookahead sampling to external file Signed-off-by: Alessandro Comodi --- vpr/src/route/router_lookahead_sampling.cpp | 228 ++++++++++++++++++++ vpr/src/route/router_lookahead_sampling.h | 35 +++ 2 files changed, 263 insertions(+) create mode 100644 vpr/src/route/router_lookahead_sampling.cpp create mode 100644 vpr/src/route/router_lookahead_sampling.h diff --git a/vpr/src/route/router_lookahead_sampling.cpp b/vpr/src/route/router_lookahead_sampling.cpp new file mode 100644 index 00000000000..601f28680d7 --- /dev/null +++ b/vpr/src/route/router_lookahead_sampling.cpp @@ -0,0 +1,228 @@ +#include "router_lookahead_sampling.h" + +#include + +#include "globals.h" +#include "vtr_math.h" +#include "vtr_geometry.h" +#include "vtr_time.h" + +// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE +static constexpr int SAMPLE_GRID_SIZE = 2; + +// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria +// choose locations with higher, but not extreme, counts of each segment type +static constexpr double kSamplingCountLowerQuantile = 0.5; +static constexpr double kSamplingCountUpperQuantile = 0.7; + +// also known as the L1 norm +static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { + return abs(b.x() - a.x()) + abs(b.y() - a.y()); +} + +// the smallest bounding box containing a node +static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { + const std::pair* loc = connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + return vtr::Rect(); + } else { + return vtr::Rect(vtr::Point(loc->first, loc->second)); + } +} + +static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { + return vtr::Rect(sample(bounding_box, sx, sy, n), + sample(bounding_box, sx + 1, sy + 1, n)); +} + +static std::vector choose_points(const vtr::Matrix& counts, + const vtr::Rect& window, + int min_count, + int max_count) { + VTR_ASSERT(min_count <= max_count); + std::vector points; + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + if (counts[x][y] >= min_count && counts[x][y] <= max_count) { + points.push_back(SamplePoint{/* .location = */ vtr::Point(x, y), + /* .nodes = */ {}}); + } + } + } + + vtr::Point center = sample(window, 1, 1, 2); + + // sort by distance from center + std::sort(points.begin(), points.end(), + [&](const SamplePoint& a, const SamplePoint& b) { + return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); + }); + + return points; +} + +// histogram is a map from segment count to number of locations having that count +static int quantile(const std::map& histogram, float ratio) { + if (histogram.empty()) { + return 0; + } + int sum = 0; + for (const auto& entry : histogram) { + sum += entry.second; + } + int limit = std::ceil(sum * ratio); + for (const auto& entry : histogram) { + limit -= entry.second; + if (limit <= 0) { + return entry.first; + } + } + return 0; +} + +// select a good number of segments to find +static std::map count_histogram(const vtr::Rect& box, const vtr::Matrix& counts) { + std::map histogram; + for (int y = box.ymin(); y < box.ymax(); y++) { + for (int x = box.xmin(); x < box.xmax(); x++) { + int count = counts[x][y]; + if (count > 0) { + ++histogram[count]; + } + } + } + return histogram; +} + +// Used to calculate each region's `order.' +// A space-filling curve will order the regions so that +// nearby points stay close in order. A Hilbert curve might +// be better, but a Morton (Z)-order curve is easy to compute, +// because it's just interleaving binary bits, so this +// function interleaves with 0's so that the X and Y +// dimensions can then be OR'ed together. +static uint64_t interleave(uint32_t x) { + uint64_t i = x; + i = (i ^ (i << 16)) & 0x0000ffff0000ffff; + i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; + i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; + i = (i ^ (i << 2)) & 0x3333333333333333; + i = (i ^ (i << 1)) & 0x5555555555555555; + return i; +} + +// for each segment type, find the nearest nodes to an equally spaced grid of points +// within the bounding box for that segment type +std::vector find_sample_regions(int num_segments) { + vtr::ScopedStartFinishTimer timer("finding sample regions"); + std::vector sample_regions; + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_nodes = device_ctx.rr_nodes; + std::vector> segment_counts(num_segments); + + // compute bounding boxes for each segment type + std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); + } + + // initialize counts + for (int seg = 0; seg < num_segments; seg++) { + const auto& box = bounding_box_for_segment[seg]; + segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); + } + + // count sample points + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + segment_counts[seg_index][loc->first][loc->second] += 1; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + } + + // select sample points + for (int i = 0; i < num_segments; i++) { + const auto& counts = segment_counts[i]; + const auto& bounding_box = bounding_box_for_segment[i]; + if (bounding_box.empty()) continue; + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); + if (window.empty()) continue; + + auto histogram = count_histogram(window, segment_counts[i]); + SampleRegion region = { + /* .segment_type = */ i, + /* .grid_location = */ vtr::Point(x, y), + /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), + /* .order = */ 0}; + if (!region.points.empty()) { + /* In order to improve caching, the list of sample points are + * sorted to keep points that are nearby on the Euclidean plane also + * nearby in the vector of sample points. + * + * This means subsequent expansions on the same thread are likely + * to cover a similar set of nodes, so they are more likely to be + * cached. This improves performance by about 7%, which isn't a lot, + * but not a bad improvement for a few lines of code. */ + vtr::Point location = region.points[0].location; + + // interleave bits of X and Y for a Z-curve ordering. + region.order = interleave(location.x()) | (interleave(location.y()) << 1); + + sample_regions.push_back(region); + } + } + } + } + + // sort regions + std::sort(sample_regions.begin(), sample_regions.end(), + [](const SampleRegion& a, const SampleRegion& b) { + return a.order < b.order; + }); + + // build an index of sample points on segment type and location + std::map, SamplePoint*> sample_point_index; + for (auto& region : sample_regions) { + for (auto& point : region.points) { + sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; + } + } + + // collect the node indices for each segment type at the selected sample points + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); + if (point != sample_point_index.end()) { + point->second->nodes.push_back(i); + } + } + + return sample_regions; +} diff --git a/vpr/src/route/router_lookahead_sampling.h b/vpr/src/route/router_lookahead_sampling.h new file mode 100644 index 00000000000..6668792e9c6 --- /dev/null +++ b/vpr/src/route/router_lookahead_sampling.h @@ -0,0 +1,35 @@ +#ifndef ROUTER_LOOKAHEAD_SAMPLING_H +#define ROUTER_LOOKAHEAD_SAMPLING_H + +#include +#include "vtr_geometry.h" +#include "globals.h" + +// a sample point for a segment type, contains all segments at the VPR location +struct SamplePoint { + // canonical location + vtr::Point location; + + // nodes to expand + std::vector nodes; +}; + +struct SampleRegion { + // all nodes in `points' have this segment type + int segment_type; + + // location on the sample grid + vtr::Point grid_location; + + // locations to try + // The computation will keep expanding each of the points + // until a number of paths (segment -> connection box) are found. + std::vector points; + + // used to sort the regions to improve caching + uint64_t order; +}; + +std::vector find_sample_regions(int num_segments); + +#endif From 880e40b1d68fc58aa121c19aa6398cae286aa67c Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:06:12 +0200 Subject: [PATCH 37/41] route: avoid using connection box information Signed-off-by: Alessandro Comodi --- .../route/connection_box_lookahead_map.cpp | 1130 ++++++++++++----- vpr/src/route/connection_box_lookahead_map.h | 24 +- vpr/src/route/router_lookahead_map_utils.cpp | 2 +- 3 files changed, 805 insertions(+), 351 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 6ad88fb2c74..709c3d5bef8 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -6,6 +6,7 @@ #include "connection_box.h" #include "rr_node.h" #include "router_lookahead_map_utils.h" +#include "router_lookahead_sampling.h" #include "globals.h" #include "vtr_math.h" #include "vtr_time.h" @@ -42,8 +43,10 @@ #define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS -// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE -static constexpr int SAMPLE_GRID_SIZE = 3; +#define X_OFFSET 2 +#define Y_OFFSET 2 + +#define MAX_EXPANSION_LEVEL 1 // Don't continue storing a path after hitting a lower-or-same cost entry. static constexpr bool BREAK_ON_MISS = false; @@ -54,44 +57,12 @@ static constexpr float PENALTY_MIN = 1e-12f; static constexpr int MIN_PATH_COUNT = 1000; -// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria -// choose locations with higher, but not extreme, counts of each segment type -static constexpr double kSamplingCountLowerQuantile = 0.5; -static constexpr double kSamplingCountUpperQuantile = 0.7; - -// a sample point for a segment type, contains all segments at the VPR location -struct SamplePoint { - // canonical location - vtr::Point location; - - // nodes to expand - std::vector nodes; -}; - -struct SampleRegion { - // all nodes in `points' have this segment type - int segment_type; - - // location on the sample grid - vtr::Point grid_location; - - // locations to try - // The computation will keep expanding each of the points - // until a number of paths (segment -> connection box) are found. - std::vector points; - - // used to sort the regions to improve caching - uint64_t order; -}; - template static std::pair run_dijkstra(int start_node_ind, std::vector* node_expanded, std::vector* paths, RoutingCosts* routing_costs); -static std::vector find_sample_regions(int num_segments); - // also known as the L1 norm static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { return abs(b.x() - a.x()) + abs(b.y() - a.y()); @@ -112,6 +83,51 @@ static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Poi } } +// deltas calculation +static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y); +static void adjust_rr_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y); + +// additional lookups for IPIN/OPIN missing delays +struct t_reachable_wire_inf { + e_rr_type wire_rr_type; + int wire_seg_index; + + //Costs to reach the wire type from the current node + float congestion; + float delay; +}; + +typedef std::vector>> t_src_opin_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] + // ^ ^ ^ + // | | | + // physical block type index | Reachable wire info + // | + // SOURCE/OPIN ptc + +typedef std::vector>> t_chan_ipins_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] + // ^ ^ ^ + // | | | + // physical block type index | Wire to IPIN segment info + // | + // SINK/IPIN ptc + +//Look-up table from SOURCE/OPIN to CHANX/CHANY of various types +t_src_opin_delays f_src_opin_delays; + +//Look-up table from CHANX/CHANY to SINK/IPIN of various types +t_chan_ipins_delays f_chan_ipins_delays; + +constexpr int DIRECT_CONNECT_SPECIAL_SEG_TYPE = -1; + +static void compute_router_src_opin_lookahead(); +static void compute_router_chan_ipin_lookahead(); +static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev); +static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays); +static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays); + // build the segment map void CostMap::build_segment_map() { const auto& device_ctx = g_vpr_ctx.device(); @@ -127,15 +143,14 @@ void CostMap::build_segment_map() { } // resize internal data structures -void CostMap::set_counts(size_t seg_count, size_t box_count) { +void CostMap::set_counts(size_t seg_count) { cost_map_.clear(); offset_.clear(); penalty_.clear(); - cost_map_.resize({seg_count, box_count}); - offset_.resize({seg_count, box_count}); - penalty_.resize({seg_count, box_count}); + cost_map_.resize({2, seg_count}); + offset_.resize({2, seg_count}); + penalty_.resize({2, seg_count}); seg_count_ = seg_count; - box_count_ = box_count; } // cached node -> segment map @@ -150,19 +165,24 @@ static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, fl } // get a cost entry for a segment type, connection box type, and offset -util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { +util::Cost_Entry CostMap::find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const { + int chan_index = 0; + if (rr_type == CHANY) { + chan_index = 1; + } + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; + const auto& cost_map = cost_map_[chan_index][from_seg_index]; if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { return util::Cost_Entry(); } - vtr::Point coord(delta_x - offset_[from_seg_index][size_t(box_id)].first, - delta_y - offset_[from_seg_index][size_t(box_id)].second); + vtr::Point coord(delta_x - offset_[chan_index][from_seg_index].first, + delta_y - offset_[chan_index][from_seg_index].second); vtr::Rect bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); auto closest = closest_point_in_rect(bounds, coord); - auto cost = cost_map_[from_seg_index][size_t(box_id)][closest.x()][closest.y()]; - float penalty = penalty_[from_seg_index][size_t(box_id)]; + auto cost = cost_map_[chan_index][from_seg_index][closest.x()][closest.y()]; + float penalty = penalty_[chan_index][from_seg_index]; auto distance = manhattan_distance(closest, coord); return penalize(cost, distance, penalty); } @@ -170,28 +190,28 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, // set the cost map for a segment type and connection box type, filling holes void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { // calculate the bounding boxes - vtr::Matrix> bounds({seg_count_, box_count_}); + vtr::Matrix> bounds({2, seg_count_}); for (const auto& entry : delay_costs) { - bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); } for (const auto& entry : base_costs) { - bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); } // store bounds - for (size_t seg = 0; seg < seg_count_; seg++) { - for (size_t box = 0; box < box_count_; box++) { - const auto& seg_box_bounds = bounds[seg][box]; - if (seg_box_bounds.empty()) { + for (size_t chan = 0; chan < 2; chan++) { + for (size_t seg = 0; seg < seg_count_; seg++) { + const auto& chan_seg_bounds = bounds[chan][seg]; + if (chan_seg_bounds.empty()) { // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. - offset_[seg][box] = std::make_pair(0, 0); - cost_map_[seg][box] = vtr::NdMatrix( + offset_[chan][seg] = std::make_pair(0, 0); + cost_map_[chan][seg] = vtr::NdMatrix( {size_t(0), size_t(0)}); continue; } else { - offset_[seg][box] = std::make_pair(seg_box_bounds.xmin(), seg_box_bounds.ymin()); - cost_map_[seg][box] = vtr::NdMatrix( - {size_t(seg_box_bounds.width()), size_t(seg_box_bounds.height())}); + offset_[chan][seg] = std::make_pair(chan_seg_bounds.xmin(), chan_seg_bounds.ymin()); + cost_map_[chan][seg] = vtr::NdMatrix( + {size_t(chan_seg_bounds.width()), size_t(chan_seg_bounds.height())}); } } } @@ -199,30 +219,30 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& // store entries into the matrices for (const auto& entry : delay_costs) { int seg = entry.first.seg_index; - int box = size_t(entry.first.box_id); - const auto& seg_box_bounds = bounds[seg][box]; - int x = entry.first.delta.x() - seg_box_bounds.xmin(); - int y = entry.first.delta.y() - seg_box_bounds.ymin(); - cost_map_[seg][box][x][y].delay = entry.second; + int chan = entry.first.chan_index; + const auto& chan_seg_bounds = bounds[chan][seg]; + int x = entry.first.delta.x() - chan_seg_bounds.xmin(); + int y = entry.first.delta.y() - chan_seg_bounds.ymin(); + cost_map_[chan][seg][x][y].delay = entry.second; } for (const auto& entry : base_costs) { int seg = entry.first.seg_index; - int box = size_t(entry.first.box_id); - const auto& seg_box_bounds = bounds[seg][box]; - int x = entry.first.delta.x() - seg_box_bounds.xmin(); - int y = entry.first.delta.y() - seg_box_bounds.ymin(); - cost_map_[seg][box][x][y].congestion = entry.second; + int chan = entry.first.chan_index; + const auto& chan_seg_bounds = bounds[chan][seg]; + int x = entry.first.delta.x() - chan_seg_bounds.xmin(); + int y = entry.first.delta.y() - chan_seg_bounds.ymin(); + cost_map_[chan][seg][x][y].congestion = entry.second; } // fill the holes - for (size_t seg = 0; seg < seg_count_; seg++) { - for (size_t box = 0; box < box_count_; box++) { - penalty_[seg][box] = std::numeric_limits::infinity(); - const auto& seg_box_bounds = bounds[seg][box]; - if (seg_box_bounds.empty()) { + for (size_t chan = 0; chan < 2; chan++) { + for (size_t seg = 0; seg < seg_count_; seg++) { + penalty_[chan][seg] = std::numeric_limits::infinity(); + const auto& chan_seg_bounds = bounds[chan][seg]; + if (chan_seg_bounds.empty()) { continue; } - auto& matrix = cost_map_[seg][box]; + auto& matrix = cost_map_[chan][seg]; // calculate delay penalty float min_delay = std::numeric_limits::infinity(), max_delay = 0.f; @@ -243,12 +263,12 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& } } float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); - penalty_[seg][box] = delay_penalty; + penalty_[chan][seg] = delay_penalty; // find missing cost entries and fill them in by copying a nearby cost entry std::vector> missing; bool couldnt_fill = false; - auto shifted_bounds = vtr::Rect(0, 0, seg_box_bounds.width(), seg_box_bounds.height()); + auto shifted_bounds = vtr::Rect(0, 0, chan_seg_bounds.width(), chan_seg_bounds.height()); int max_fill = 0; for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { @@ -273,7 +293,7 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& } if (!couldnt_fill) { - VTR_LOG("At %d -> %d: max_fill = %d, delay_penalty = %e\n", seg, box, max_fill, delay_penalty); + VTR_LOG("At %d: max_fill = %d, delay_penalty = %e\n", seg, max_fill, delay_penalty); } // write back the missing entries @@ -283,7 +303,7 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& if (couldnt_fill) { VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", - seg, box, seg_box_bounds.width(), seg_box_bounds.height()); + chan, seg, chan_seg_bounds.width(), chan_seg_bounds.height()); for (unsigned y = 0; y < matrix.dim_size(1); y++) { for (unsigned x = 0; x < matrix.dim_size(0); x++) { VTR_ASSERT(!matrix[x][y].valid()); @@ -299,16 +319,15 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& // . => at or below average // * => invalid (missing) void CostMap::print(int iseg) const { - const auto& device_ctx = g_vpr_ctx.device(); - for (size_t box_id = 0; - box_id < device_ctx.connection_boxes.num_connection_box_types(); - box_id++) { - auto& matrix = cost_map_[iseg][box_id]; + for (size_t chan_index = 0; + chan_index < 2; + chan_index++) { + auto& matrix = cost_map_[chan_index][iseg]; if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { - VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); + VTR_LOG("cost EMPTY for chan_index = %lu\n", chan_index); continue; } - VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1)); + VTR_LOG("cost for chan_index = %lu (%zu, %zu)\n", chan_index, matrix.dim_size(0), matrix.dim_size(1)); double sum = 0.0; for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { @@ -342,10 +361,10 @@ void CostMap::print(int iseg) const { // list segment type and connection box type pairs that have empty cost maps (debug) std::vector> CostMap::list_empty() const { std::vector> results; - for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { - for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { - auto& matrix = cost_map_[iseg][box_id]; - if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(iseg, box_id)); + for (int chan_index = 0; chan_index < (int)cost_map_.dim_size(1); chan_index++) { + for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { + auto& matrix = cost_map_[chan_index][iseg]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(chan_index, iseg)); } } return results; @@ -402,6 +421,83 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat return std::make_pair(fill, n); } + +std::pair ConnectionBoxMapLookahead::get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + e_rr_type from_type = rr_graph.node_type(from_node); + if (from_type == SOURCE || from_type == OPIN) { + //When estimating costs from a SOURCE/OPIN we look-up to find which wire types (and the + //cost to reach them) in f_src_opin_delays. Once we know what wire types are + //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final + //delay to reach the sink. + + t_physical_tile_type_ptr tile_type = device_ctx.grid[rr_graph.node_xlow(from_node)][rr_graph.node_ylow(from_node)].type; + auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type); + + auto from_ptc = rr_graph.node_ptc_num(from_node); + + if (f_src_opin_delays[tile_index][from_ptc].empty()) { + //During lookahead profiling we were unable to find any wires which connected + //to this PTC. + // + //This can sometimes occur at very low channel widths (e.g. during min W search on + //small designs) where W discretization combined with fraction Fc may cause some + //pins/sources to be left disconnected. + // + //Such RR graphs are of course unroutable, but that should be determined by the + //router. So just return an arbitrary value here rather than error. + + return std::pair(0.f, 0.f); + } else { + //From the current SOURCE/OPIN we look-up the wiretypes which are reachable + //and then add the estimates from those wire types for the distance of interest. + //If there are multiple options we use the minimum value. + + float delay = 0; + float congestion = 0; + float expected_cost = std::numeric_limits::infinity(); + + for (const auto& kv : f_src_opin_delays[tile_index][from_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; + + util::Cost_Entry cost_entry; + if (reachable_wire_inf.wire_rr_type == SINK) { + //Some pins maybe reachable via a direct (OPIN -> IPIN) connection. + //In the lookahead, we treat such connections as 'special' wire types + //with no delay/congestion cost + cost_entry.delay = 0; + cost_entry.congestion = 0; + } else { + //For an actual accessible wire, we query the wire look-up to get it's + //delay and congestion cost estimates + cost_entry = cost_map_.find_cost(reachable_wire_inf.wire_seg_index, reachable_wire_inf.wire_rr_type, delta_x, delta_y); + } + + float this_cost = (criticality_fac) * (reachable_wire_inf.delay + cost_entry.delay) + + (1. - criticality_fac) * (reachable_wire_inf.congestion + cost_entry.congestion); + + if (this_cost < expected_cost) { + delay = reachable_wire_inf.delay; + congestion = reachable_wire_inf.congestion; + } + } + + return std::pair(delay, congestion); + } + + VTR_ASSERT_SAFE_MSG(false, + vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", + rr_node_arch_name(size_t(from_node)).c_str(), + describe_rr_node(size_t(from_node)).c_str()) + .c_str()); + + } + + return std::pair(0.f, 0.f); +} + // derive a cost from the map between two nodes float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, int to_node_ind, @@ -411,73 +507,59 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, } auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; - auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); - - if (to_node_type == SINK) { - const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); - float cost = std::numeric_limits::infinity(); - - // Find cheapest cost from from_node_ind to IPINs for this SINK. - for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { - cost = std::min(cost, - get_map_cost( - from_node_ind, - sink_to_ipin.ipin_nodes[i], criticality_fac)); - if (cost <= 0.f) break; - } + auto from_node_type = device_ctx.rr_nodes[to_node_ind].type(); - return cost; - } - - if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { - VPR_THROW(VPR_ERROR_ROUTE, "Not an IPIN/SINK, is %d", to_node_ind); - } - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - bool found = device_ctx.connection_boxes.find_connection_box( - to_node_ind, &box_id, &box_location, &site_pin_delay); - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); - } + RRNodeId to_node(to_node_ind); + RRNodeId from_node(from_node_ind); - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", - from_node_ind, to_node_ind); - } + int dx, dy; + get_xy_deltas(from_node, to_node, &dx, &dy); - ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); - ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + float extra_delay, extra_congestion; + std::tie(extra_delay, extra_congestion) = this->get_src_opin_delays(from_node, dx, dy, criticality_fac); int from_seg_index = cost_map_.node_to_segment(from_node_ind); - util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy); + util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, from_node_type, dx, dy); if (!cost_entry.valid()) { // there is no route VTR_LOGV_DEBUG(f_router_debug, - "Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + "Not connected %d (%s, %d) -> %d (%s)\n", from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), - (int)size_t(box_id), (int)box_location.first, (int)box_location.second); + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string()); return std::numeric_limits::infinity(); } - float expected_delay = cost_entry.delay; - float expected_congestion = cost_entry.congestion; + auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(to_node)][rr_graph.node_ylow(to_node)].type; + auto to_tile_index = to_tile_type->index; + + auto to_ptc = rr_graph.node_ptc_num(to_node); + + float site_pin_delay = std::numeric_limits::infinity(); + if (f_chan_ipins_delays[to_tile_index].size() != 0) { + for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; + + float this_delay = reachable_wire_inf.delay; + site_pin_delay = std::min(this_delay, site_pin_delay); + } + } + + float expected_delay = cost_entry.delay + extra_delay; + float expected_congestion = cost_entry.congestion + extra_congestion; expected_delay += site_pin_delay; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; - VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", from_node_ind, to_node_ind); + VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node \n\t%s to \n\t%s\n", + describe_rr_node(from_node_ind).c_str(), + describe_rr_node(to_node_ind).c_str()); const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; - const std::string& box_name = device_ctx.connection_boxes.get_connection_box(box_id)->name; - VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) to %s (%zu) with distance (%zd, %zd)\n", + VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) with distance (%zd, %zd)\n", segment_name.c_str(), from_seg_index, - box_name.c_str(), - size_t(box_id), dx, dy); VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); @@ -486,7 +568,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); if (!std::isfinite(expected_cost) || expected_cost < 0.f) { - VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); + VTR_LOG_ERROR("invalid cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); VTR_ASSERT(0); } @@ -500,15 +582,26 @@ static bool add_paths(int start_node_ind, const std::vector& paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; + auto& rr_graph = device_ctx.rr_nodes; + int node_ind = current.rr_node_ind; - bool found = device_ctx.connection_boxes.find_connection_box( - node_ind, &box_id, &box_location, &site_pin_delay); + RRNodeId node(node_ind); + bool new_sample_found = false; - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + + auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(node)][rr_graph.node_ylow(node)].type; + auto to_tile_index = to_tile_type->index; + + auto to_ptc = rr_graph.node_ptc_num(node); + + float site_pin_delay = std::numeric_limits::infinity(); + if (f_chan_ipins_delays[to_tile_index].size() != 0) { + for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; + + float this_delay = reachable_wire_inf.delay; + site_pin_delay = std::min(this_delay, site_pin_delay); + } } // reconstruct the path @@ -519,24 +612,32 @@ static bool add_paths(int start_node_ind, } path.push_back(start_node_ind); + current.adjust_Tsw(-site_pin_delay); // add each node along the path subtracting the incremental costs from the current costs Entry start_to_here(start_node_ind, UNDEFINED, nullptr); int parent = start_node_ind; for (auto it = path.rbegin(); it != path.rend(); it++) { + RRNodeId this_node(*it); auto& here = device_ctx.rr_nodes[*it]; int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", *it); + + auto chan_type = rr_graph.node_type(node); + + int ichan = 0; + if (chan_type == CHANY) { + ichan = 1; } - vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), - ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); + int delta_x, delta_y; + get_xy_deltas(this_node, node, &delta_x, &delta_y); + + vtr::Point delta(delta_x, delta_y); + RoutingCostKey key = { + ichan, seg_index, - box_id, delta}; if (*it != start_node_ind) { @@ -568,6 +669,209 @@ static bool add_paths(int start_node_ind, return new_sample_found; } +/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ +static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + e_rr_type from_type = rr_graph.node_type(from_node); + e_rr_type to_type = rr_graph.node_type(to_node); + + if (!is_chan(from_type) && !is_chan(to_type)) { + //Alternate formulation for non-channel types + int from_x = 0; + int from_y = 0; + adjust_rr_position(from_node, from_x, from_y); + + int to_x = 0; + int to_y = 0; + adjust_rr_position(to_node, to_x, to_y); + + *delta_x = to_x - from_x; + *delta_y = to_y - from_y; + } else { + //Traditional formulation + + /* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire, + * chan coordinate is orthogonal to the wire */ + int from_seg_low; + int from_seg_high; + int from_chan; + int to_seg; + int to_chan; + if (from_type == CHANY) { + from_seg_low = rr_graph.node_ylow(from_node); + from_seg_high = rr_graph.node_yhigh(from_node); + from_chan = rr_graph.node_xlow(from_node); + to_seg = rr_graph.node_ylow(to_node); + to_chan = rr_graph.node_xlow(to_node); + } else { + from_seg_low = rr_graph.node_xlow(from_node); + from_seg_high = rr_graph.node_xhigh(from_node); + from_chan = rr_graph.node_ylow(from_node); + to_seg = rr_graph.node_xlow(to_node); + to_chan = rr_graph.node_ylow(to_node); + } + + /* now we want to count the minimum number of *channel segments* between the from and to nodes */ + int delta_seg, delta_chan; + + /* orthogonal to wire */ + int no_need_to_pass_by_clb = 0; //if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction + if (to_chan > from_chan + 1) { + /* above */ + delta_chan = to_chan - from_chan; + no_need_to_pass_by_clb = 1; + } else if (to_chan < from_chan) { + /* below */ + delta_chan = from_chan - to_chan + 1; + no_need_to_pass_by_clb = 1; + } else { + /* adjacent to current channel */ + delta_chan = 0; + no_need_to_pass_by_clb = 0; + } + + /* along same direction as wire. */ + if (to_seg > from_seg_high) { + /* ahead */ + delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb; + } else if (to_seg < from_seg_low) { + /* behind */ + delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb; + } else { + /* along the span of the wire */ + delta_seg = 0; + } + + /* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets + * that are opposite of the wire direction, let's add 1 to delta_seg */ + e_direction from_dir = rr_graph.node_direction(from_node); + if (is_chan(from_type) + && ((to_seg < from_seg_low && from_dir == INC_DIRECTION) || (to_seg > from_seg_high && from_dir == DEC_DIRECTION))) { + delta_seg++; + } + + if (from_type == CHANY) { + *delta_x = delta_chan; + *delta_y = delta_seg; + } else { + *delta_x = delta_seg; + *delta_y = delta_chan; + } + } + + VTR_ASSERT_SAFE(std::abs(*delta_x) < (int)device_ctx.grid.width()); + VTR_ASSERT_SAFE(std::abs(*delta_y) < (int)device_ctx.grid.height()); +} + +static void adjust_rr_position(const RRNodeId rr, int& x, int& y) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + e_rr_type rr_type = rr_graph.node_type(rr); + + if (is_chan(rr_type)) { + adjust_rr_wire_position(rr, x, y); + } else if (is_pin(rr_type)) { + adjust_rr_pin_position(rr, x, y); + } else { + VTR_ASSERT_SAFE(is_src_sink(rr_type)); + adjust_rr_src_sink_position(rr, x, y); + } +} + +static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y) { + /* + * VPR uses a co-ordinate system where wires above and to the right of a block + * are at the same location as the block: + * + * + * <-----------C + * D + * | +---------+ ^ + * | | | | + * | | (1,1) | | + * | | | | + * V +---------+ | + * A + * B-----------> + * + * So wires are located as follows: + * + * A: (1, 1) CHANY + * B: (1, 0) CHANX + * C: (1, 1) CHANX + * D: (0, 1) CHANY + * + * But all pins incident on the surrounding channels + * would be at (1,1) along with a relevant side. + * + * In the following, we adjust the positions of pins to + * account for the channel they are incident too. + * + * Note that blocks at (0,*) such as IOs, may have (unconnected) + * pins on the left side, so we also clip the minimum x to zero. + * Similarly for blocks at (*,0) we clip the minimum y to zero. + */ + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_pin(rr_graph.node_type(rr))); + VTR_ASSERT_SAFE(rr_graph.node_xlow(rr) == rr_graph.node_xhigh(rr)); + VTR_ASSERT_SAFE(rr_graph.node_ylow(rr) == rr_graph.node_yhigh(rr)); + + x = rr_graph.node_xlow(rr); + y = rr_graph.node_ylow(rr); + + e_side rr_side = rr_graph.node_side(rr); + + if (rr_side == LEFT) { + x -= 1; + x = std::max(x, 0); + } else if (rr_side == BOTTOM && y > 0) { + y -= 1; + y = std::max(y, 0); + } +} + +static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_chan(rr_graph.node_type(rr))); + + e_direction rr_dir = rr_graph.node_direction(rr); + + if (rr_dir == DEC_DIRECTION) { + x = rr_graph.node_xhigh(rr); + y = rr_graph.node_yhigh(rr); + } else if (rr_dir == INC_DIRECTION) { + x = rr_graph.node_xlow(rr); + y = rr_graph.node_ylow(rr); + } else { + VTR_ASSERT_SAFE(rr_dir == BI_DIRECTION); + //Not sure what to do here... + //Try average for now. + x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); + y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); + } +} + +static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) { + //SOURCE/SINK nodes assume the full dimensions of their + //associated block + // + //Use the average position. + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_src_sink(rr_graph.node_type(rr))); + + x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); + y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); +} + /* Runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map. @@ -581,11 +885,6 @@ static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; - const std::pair* start_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); - if (start_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - start_node_ind); - } /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ @@ -634,6 +933,9 @@ static std::pair run_dijkstra(int start_node_ind, // compute the cost maps for lookahead void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { + compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); // Initialize rr_node_route_inf if not already @@ -644,8 +946,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); - cost_map_.set_counts(segment_inf.size(), - device_ctx.connection_boxes.num_connection_box_types()); + cost_map_.set_counts(segment_inf.size()); cost_map_.build_segment_map(); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); @@ -769,239 +1070,389 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_EMPTY_MAPS) for (std::pair p : cost_map_.list_empty()) { - int iseg, box_id; - std::tie(iseg, box_id) = p; - VTR_LOG("cost map for %s(%d), connection box %d EMPTY\n", + int ichan, iseg; + std::tie(ichan, iseg) = p; + VTR_LOG("cost map for %s(%d), chan %d EMPTY\n", segment_inf[iseg].name.c_str(), iseg, box_id); } #endif } -// get an expected minimum cost for routing from the current node to the target node -float ConnectionBoxMapLookahead::get_expected_cost( - int current_node, - int target_node, - const t_conn_cost_params& params, - float /*R_upstream*/) const { +static void compute_router_src_opin_lookahead() { + vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead"); auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; - t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + f_src_opin_delays.clear(); - if (rr_type == CHANX || rr_type == CHANY) { - return get_map_cost( - current_node, target_node, params.criticality); - } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ - return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); - } else { /* Change this if you want to investigate route-throughs */ - return (0.); - } -} + f_src_opin_delays.resize(device_ctx.physical_tile_types.size()); -// the smallest bounding box containing a node -static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { - const std::pair* loc = connection_boxes.find_canonical_loc(node_ind); - if (loc == nullptr) { - return vtr::Rect(); - } else { - return vtr::Rect(vtr::Point(loc->first, loc->second)); - } -} + std::vector rr_nodes_at_loc; -static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { - return vtr::Rect(sample(bounding_box, sx, sy, n), - sample(bounding_box, sx + 1, sy + 1, n)); -} + //We assume that the routing connectivity of each instance of a physical tile is the same, + //and so only measure one instance of each type + for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { + for (e_rr_type rr_type : {SOURCE, OPIN}) { + vtr::Point sample_loc(-1, -1); + + size_t num_sampled_locs = 0; + bool ptcs_with_no_delays = true; + while (ptcs_with_no_delays) { //Haven't found wire connected to ptc + ptcs_with_no_delays = false; + + sample_loc = pick_sample_tile(&device_ctx.physical_tile_types[itile], sample_loc); + + if (sample_loc.x() == -1 && sample_loc.y() == -1) { + //No untried instances of the current tile type left + VTR_LOG_WARN("Found no %ssample locations for %s in %s\n", + (num_sampled_locs == 0) ? "" : "more ", + rr_node_typename[rr_type], + device_ctx.physical_tile_types[itile].name); + break; + } + + //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y()); + + rr_nodes_at_loc.clear(); + + get_rr_node_indices(device_ctx.rr_node_indices, sample_loc.x(), sample_loc.y(), rr_type, &rr_nodes_at_loc); + for (int inode : rr_nodes_at_loc) { + if (inode < 0) continue; + + RRNodeId node_id(inode); + + int ptc = rr_graph.node_ptc_num(node_id); + + if (ptc >= int(f_src_opin_delays[itile].size())) { + f_src_opin_delays[itile].resize(ptc + 1); //Inefficient but functional... + } + + //Find the wire types which are reachable from inode and record them and + //the cost to reach them + dijkstra_flood_to_wires(itile, node_id, f_src_opin_delays); + + if (f_src_opin_delays[itile][ptc].empty()) { + VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", + rr_node_typename[rr_type], + rr_node_arch_name(inode).c_str(), + sample_loc.x(), + sample_loc.y()); + + ptcs_with_no_delays = true; + } + } -static std::vector choose_points(const vtr::Matrix& counts, - const vtr::Rect& window, - int min_count, - int max_count) { - VTR_ASSERT(min_count <= max_count); - std::vector points; - for (int y = window.ymin(); y < window.ymax(); y++) { - for (int x = window.xmin(); x < window.xmax(); x++) { - if (counts[x][y] >= min_count && counts[x][y] <= max_count) { - points.push_back(SamplePoint{/* .location = */ vtr::Point(x, y), - /* .nodes = */ {}}); + ++num_sampled_locs; + } + if (ptcs_with_no_delays) { + VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n"); } } } +} - vtr::Point center = sample(window, 1, 1, 2); +static void compute_router_chan_ipin_lookahead() { + vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead"); + auto& device_ctx = g_vpr_ctx.device(); - // sort by distance from center - std::sort(points.begin(), points.end(), - [&](const SamplePoint& a, const SamplePoint& b) { - return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); - }); + f_chan_ipins_delays.clear(); - return points; -} + f_chan_ipins_delays.resize(device_ctx.physical_tile_types.size()); -// histogram is a map from segment count to number of locations having that count -static int quantile(const std::map& histogram, float ratio) { - if (histogram.empty()) { - return 0; - } - int sum = 0; - for (const auto& entry : histogram) { - sum += entry.second; - } - int limit = std::ceil(sum * ratio); - for (const auto& entry : histogram) { - limit -= entry.second; - if (limit <= 0) { - return entry.first; + std::vector rr_nodes_at_loc; + + //We assume that the routing connectivity of each instance of a physical tile is the same, + //and so only measure one instance of each type + for (auto tile_type : device_ctx.physical_tile_types) { + vtr::Point sample_loc(-1, -1); + + sample_loc = pick_sample_tile(&tile_type, sample_loc); + + if (sample_loc.x() == -1 && sample_loc.y() == -1) { + //No untried instances of the current tile type left + VTR_LOG_WARN("Found no sample locations for %s\n", + tile_type.name); + continue; + } + + int min_x = std::max(0, sample_loc.x() - X_OFFSET); + int min_y = std::max(0, sample_loc.y() - Y_OFFSET); + int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x() + X_OFFSET); + int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y() + Y_OFFSET); + + for (int ix = min_x; ix < max_x; ix++) { + for (int iy = min_y; iy < max_y; iy++) { + for (auto rr_type : {CHANX, CHANY}) { + rr_nodes_at_loc.clear(); + + get_rr_node_indices(device_ctx.rr_node_indices, ix, iy, rr_type, &rr_nodes_at_loc); + for (int inode : rr_nodes_at_loc) { + if (inode < 0) continue; + + RRNodeId node_id(inode); + + //Find the IPINs which are reachable from the wires within the bounding box + //around the selected tile location + dijkstra_flood_to_ipins(node_id, f_chan_ipins_delays); + } + } + } } } - return 0; } -// select a good number of segments to find -static std::map count_histogram(const vtr::Rect& box, const vtr::Matrix& counts) { - std::map histogram; - for (int y = box.ymin(); y < box.ymax(); y++) { - for (int x = box.xmin(); x < box.xmax(); x++) { - int count = counts[x][y]; - if (count > 0) { - ++histogram[count]; +static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev) { + //Very simple for now, just pick the fist matching tile found + vtr::Point loc(OPEN, OPEN); + + //VTR_LOG("Prev: %d,%d\n", prev.x(), prev.y()); + + auto& device_ctx = g_vpr_ctx.device(); + auto& grid = device_ctx.grid; + + int y_init = prev.y() + 1; //Start searching next element above prev + + for (int x = prev.x(); x < int(grid.width()); ++x) { + if (x < 0) continue; + + //VTR_LOG(" x: %d\n", x); + + for (int y = y_init; y < int(grid.height()); ++y) { + if (y < 0) continue; + + //VTR_LOG(" y: %d\n", y); + if (grid[x][y].type->index == tile_type->index) { + loc.set_x(x); + loc.set_y(y); + return loc; } } + + if (loc.x() != OPEN && loc.y() != OPEN) { + break; + } else { + y_init = 0; //Prepare to search next column + } } - return histogram; -} + //VTR_LOG("Next: %d,%d\n", loc.x(), loc.y()); -// Used to calculate each region's `order.' -// A space-filling curve will order the regions so that -// nearby points stay close in order. A Hilbert curve might -// be better, but a Morton (Z)-order curve is easy to compute, -// because it's just interleaving binary bits, so this -// function interleaves with 0's so that the X and Y -// dimensions can then be OR'ed together. -static uint64_t interleave(uint32_t x) { - uint64_t i = x; - i = (i ^ (i << 16)) & 0x0000ffff0000ffff; - i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; - i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; - i = (i ^ (i << 2)) & 0x3333333333333333; - i = (i ^ (i << 1)) & 0x5555555555555555; - return i; + return loc; } -// for each segment type, find the nearest nodes to an equally spaced grid of points -// within the bounding box for that segment type -static std::vector find_sample_regions(int num_segments) { - vtr::ScopedStartFinishTimer timer("finding sample regions"); - std::vector sample_regions; +static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays) { auto& device_ctx = g_vpr_ctx.device(); - auto& rr_nodes = device_ctx.rr_nodes; - std::vector> segment_counts(num_segments); + auto& rr_graph = device_ctx.rr_nodes; - // compute bounding boxes for each segment type - std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); - for (size_t i = 0; i < rr_nodes.size(); i++) { - auto& node = rr_nodes[i]; - if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0 || node.num_edges() == 0) continue; - int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + struct t_pq_entry { + float delay; + float congestion; + RRNodeId node; - VTR_ASSERT(seg_index != OPEN); - VTR_ASSERT(seg_index < num_segments); + bool operator<(const t_pq_entry& rhs) const { + return this->delay < rhs.delay; + } + }; + + std::priority_queue pq; + + t_pq_entry root; + root.congestion = 0.; + root.delay = 0.; + root.node = node; + + int ptc = rr_graph.node_ptc_num(node); + + /* + * Perform Djikstra from the SOURCE/OPIN of interest, stopping at the the first + * reachable wires (i.e until we hit the inter-block routing network), or a SINK + * (via a direct-connect). + * + * Note that typical RR graphs are structured : + * + * SOURCE ---> OPIN --> CHANX/CHANY + * | + * --> OPIN --> CHANX/CHANY + * | + * ... + * + * possibly with direct-connects of the form: + * + * SOURCE --> OPIN --> IPIN --> SINK + * + * and there is a small number of fixed hops from SOURCE/OPIN to CHANX/CHANY or + * to a SINK (via a direct-connect), so this runs very fast (i.e. O(1)) + */ + pq.push(root); + while (!pq.empty()) { + t_pq_entry curr = pq.top(); + pq.pop(); - bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); - } + e_rr_type curr_rr_type = rr_graph.node_type(curr.node); + if (curr_rr_type == CHANX || curr_rr_type == CHANY || curr_rr_type == SINK) { + //We stop expansion at any CHANX/CHANY/SINK + int seg_index; + if (curr_rr_type != SINK) { + //It's a wire, figure out its type + int cost_index = rr_graph.node_cost_index(curr.node); + seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; + } else { + //This is a direct-connect path between an IPIN and OPIN, + //which terminated at a SINK. + // + //We treat this as a 'special' wire type + seg_index = DIRECT_CONNECT_SPECIAL_SEG_TYPE; + } - // initialize counts - for (int seg = 0; seg < num_segments; seg++) { - const auto& box = bounding_box_for_segment[seg]; - segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); - } + //Keep costs of the best path to reach each wire type + if (!src_opin_delays[itile][ptc].count(seg_index) + || curr.delay < src_opin_delays[itile][ptc][seg_index].delay) { + src_opin_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; + src_opin_delays[itile][ptc][seg_index].wire_seg_index = seg_index; + src_opin_delays[itile][ptc][seg_index].delay = curr.delay; + src_opin_delays[itile][ptc][seg_index].congestion = curr.congestion; + } - // count sample points - for (size_t i = 0; i < rr_nodes.size(); i++) { - auto& node = rr_nodes[i]; - if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0 || node.num_edges() == 0) continue; - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); - if (loc == nullptr) continue; + } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) { + //We allow expansion through SOURCE/OPIN/IPIN types + int cost_index = rr_graph.node_cost_index(curr.node); + float incr_cong = device_ctx.rr_indexed_data[cost_index].base_cost; //Current nodes congestion cost - int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; - segment_counts[seg_index][loc->first][loc->second] += 1; + for (RREdgeId edge : rr_graph.edge_range(curr.node)) { + int iswitch = rr_graph.edge_switch(edge); + float incr_delay = device_ctx.rr_switch_inf[iswitch].Tdel; - VTR_ASSERT(seg_index != OPEN); - VTR_ASSERT(seg_index < num_segments); - } + RRNodeId next_node = rr_graph.edge_sink_node(edge); - // select sample points - for (int i = 0; i < num_segments; i++) { - const auto& counts = segment_counts[i]; - const auto& bounding_box = bounding_box_for_segment[i]; - if (bounding_box.empty()) continue; - for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { - for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); - if (window.empty()) continue; - - auto histogram = count_histogram(window, segment_counts[i]); - SampleRegion region = { - /* .segment_type = */ i, - /* .grid_location = */ vtr::Point(x, y), - /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), - /* .order = */ 0}; - if (!region.points.empty()) { - /* In order to improve caching, the list of sample points are - * sorted to keep points that are nearby on the Euclidean plane also - * nearby in the vector of sample points. - * - * This means subsequent expansions on the same thread are likely - * to cover a similar set of nodes, so they are more likely to be - * cached. This improves performance by about 7%, which isn't a lot, - * but not a bad improvement for a few lines of code. */ - vtr::Point location = region.points[0].location; - - // interleave bits of X and Y for a Z-curve ordering. - region.order = interleave(location.x()) | (interleave(location.y()) << 1); - - sample_regions.push_back(region); - } + t_pq_entry next; + next.congestion = curr.congestion + incr_cong; //Of current node + next.delay = curr.delay + incr_delay; //To reach next node + next.node = next_node; + + pq.push(next); } + } else { + VPR_ERROR(VPR_ERROR_ROUTE, "Unrecognized RR type"); } } +} + +static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; - // sort regions - std::sort(sample_regions.begin(), sample_regions.end(), - [](const SampleRegion& a, const SampleRegion& b) { - return a.order < b.order; - }); + struct t_pq_entry { + float delay; + float congestion; + RRNodeId node; + int level; + int prev_seg_index; - // build an index of sample points on segment type and location - std::map, SamplePoint*> sample_point_index; - for (auto& region : sample_regions) { - for (auto& point : region.points) { - sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; + bool operator<(const t_pq_entry& rhs) const { + return this->delay < rhs.delay; } - } + }; + + std::priority_queue pq; + + t_pq_entry root; + root.congestion = 0.; + root.delay = 0.; + root.node = node; + root.level = 0; + root.prev_seg_index = OPEN; + + /* + * Perform Djikstra from the CHAN of interest, stopping at the the first + * reachable IPIN + * + * Note that typical RR graphs are structured : + * + * CHANX/CHANY --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK + * | + * --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK + * | + * ... + * + * and there is a variable number of hops from a given CHANX/CHANY to IPIN. + * To avoid impacting on run-time, a fixed number of hops is performed. This + * should be enough to find the delay from the last CAHN to IPIN connection. + */ + pq.push(root); + while (!pq.empty()) { + t_pq_entry curr = pq.top(); + pq.pop(); + + e_rr_type curr_rr_type = rr_graph.node_type(curr.node); + if (curr_rr_type == IPIN) { + int seg_index = curr.prev_seg_index; - // collect the node indices for each segment type at the selected sample points - for (size_t i = 0; i < rr_nodes.size(); i++) { - auto& node = rr_nodes[i]; - if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0 || node.num_edges() == 0) continue; - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); - if (loc == nullptr) continue; + int node_x = rr_graph.node_xlow(curr.node); + int node_y = rr_graph.node_ylow(curr.node); - int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + auto tile_type = device_ctx.grid[node_x][node_y].type; + int itile = tile_type->index; - VTR_ASSERT(seg_index != OPEN); - VTR_ASSERT(seg_index < num_segments); + int ptc = rr_graph.node_ptc_num(curr.node); - auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); - if (point != sample_point_index.end()) { - point->second->nodes.push_back(i); + if (ptc >= int(chan_ipins_delays[itile].size())) { + chan_ipins_delays[itile].resize(ptc + 1); //Inefficient but functional... + } + + //Keep costs of the best path to reach each wire type + chan_ipins_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; + chan_ipins_delays[itile][ptc][seg_index].wire_seg_index = seg_index; + chan_ipins_delays[itile][ptc][seg_index].delay = curr.delay; + chan_ipins_delays[itile][ptc][seg_index].congestion = curr.congestion; + } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) { + if (curr.level >= MAX_EXPANSION_LEVEL) { + continue; + } + + //We allow expansion through SOURCE/OPIN/IPIN types + int cost_index = rr_graph.node_cost_index(curr.node); + float new_cong = device_ctx.rr_indexed_data[cost_index].base_cost; //Current nodes congestion cost + int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; + + for (RREdgeId edge : rr_graph.edge_range(curr.node)) { + int iswitch = rr_graph.edge_switch(edge); + float new_delay = device_ctx.rr_switch_inf[iswitch].Tdel; + + RRNodeId next_node = rr_graph.edge_sink_node(edge); + + t_pq_entry next; + next.congestion = new_cong; //Of current node + next.delay = new_delay; //To reach next node + next.node = next_node; + next.level = curr.level + 1; + next.prev_seg_index = seg_index; + + pq.push(next); + } + } else { + VPR_ERROR(VPR_ERROR_ROUTE, "Unrecognized RR type"); } } +} + - return sample_regions; +// get an expected minimum cost for routing from the current node to the target node +float ConnectionBoxMapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { + auto& device_ctx = g_vpr_ctx.device(); + + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + + if (rr_type == CHANX || rr_type == CHANY) { + return get_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); + } } #ifndef VTR_ENABLE_CAPNPROTO @@ -1017,6 +1468,9 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { void ConnectionBoxMapLookahead::read(const std::string& file) { cost_map_.read(file); + + compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); } void ConnectionBoxMapLookahead::write(const std::string& file) const { cost_map_.write(file); diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 90a7ab3276f..6d821c817ae 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -10,33 +10,33 @@ // Keys in the RoutingCosts map struct RoutingCostKey { + // index of the channel (CHANX or CHANY) + int chan_index; + // segment type index int seg_index; - // type of the destination connection box - ConnectionBoxId box_id; - // offset of the destination connection box from the starting segment vtr::Point delta; RoutingCostKey() : seg_index(-1) , delta(0, 0) {} - RoutingCostKey(int seg_index_arg, ConnectionBoxId box_id_arg, vtr::Point delta_arg) - : seg_index(seg_index_arg) - , box_id(box_id_arg) + RoutingCostKey(int chan_index_arg, int seg_index_arg, vtr::Point delta_arg) + : chan_index(chan_index_arg) + , seg_index(seg_index_arg) , delta(delta_arg) {} bool operator==(const RoutingCostKey& other) const { - return seg_index == other.seg_index && box_id == other.box_id && delta == other.delta; + return seg_index == other.seg_index && chan_index == other.chan_index && delta == other.delta; } }; // hash implementation for RoutingCostKey struct HashRoutingCostKey { std::size_t operator()(RoutingCostKey const& key) const noexcept { - std::size_t hash = std::hash{}(key.seg_index); - vtr::hash_combine(hash, key.box_id); + std::size_t hash = std::hash{}(key.chan_index); + vtr::hash_combine(hash, key.seg_index); vtr::hash_combine(hash, key.delta.x()); vtr::hash_combine(hash, key.delta.y()); return hash; @@ -49,10 +49,10 @@ typedef std::unordered_map RoutingCos // Dense cost maps per source segment and destination connection box types class CostMap { public: - void set_counts(size_t seg_count, size_t box_count); + void set_counts(size_t seg_count); void build_segment_map(); int node_to_segment(int from_node_ind) const; - util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; + util::Cost_Entry find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const; void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); @@ -66,12 +66,12 @@ class CostMap { vtr::Matrix penalty_; std::vector segment_map_; size_t seg_count_; - size_t box_count_; }; // Implementation of RouterLookahead based on source segment and destination connection box types class ConnectionBoxMapLookahead : public RouterLookahead { public: + std::pair get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const; float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; void compute(const std::vector& segment_inf) override; diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index c5c5bb0d55a..57c12ab84fd 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -201,7 +201,7 @@ void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes, std::priority_queue, std::greater>* pq) { - int parent_ind = parent_entry.rr_node_ind; + int parent_ind = size_t(parent_entry.rr_node_ind); auto& parent_node = rr_nodes[parent_ind]; From b51ccec858bbae38506fb21fc73ee2b9292cddf5 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:21:20 +0200 Subject: [PATCH 38/41] make format Signed-off-by: Alessandro Comodi --- .../route/connection_box_lookahead_map.cpp | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 709c3d5bef8..6fa0e0bdb41 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -101,18 +101,18 @@ struct t_reachable_wire_inf { }; typedef std::vector>> t_src_opin_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Reachable wire info - // | - // SOURCE/OPIN ptc + // ^ ^ ^ + // | | | + // physical block type index | Reachable wire info + // | + // SOURCE/OPIN ptc typedef std::vector>> t_chan_ipins_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Wire to IPIN segment info - // | - // SINK/IPIN ptc + // ^ ^ ^ + // | | | + // physical block type index | Wire to IPIN segment info + // | + // SINK/IPIN ptc //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types t_src_opin_delays f_src_opin_delays; @@ -421,7 +421,6 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat return std::make_pair(fill, n); } - std::pair ConnectionBoxMapLookahead::get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_nodes; @@ -492,7 +491,6 @@ std::pair ConnectionBoxMapLookahead::get_src_opin_delays(RRNodeId rr_node_arch_name(size_t(from_node)).c_str(), describe_rr_node(size_t(from_node)).c_str()) .c_str()); - } return std::pair(0.f, 0.f); @@ -612,7 +610,6 @@ static bool add_paths(int start_node_ind, } path.push_back(start_node_ind); - current.adjust_Tsw(-site_pin_delay); // add each node along the path subtracting the incremental costs from the current costs @@ -1434,7 +1431,6 @@ static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipi } } - // get an expected minimum cost for routing from the current node to the target node float ConnectionBoxMapLookahead::get_expected_cost( int current_node, From 52014e743e587aee61fdd09f6f2aeb281ce8ef48 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:33:45 +0200 Subject: [PATCH 39/41] lookahead: move connection box to router lookahead map Signed-off-by: Alessandro Comodi --- vpr/src/base/vpr_context.h | 3 - vpr/src/route/connection_box.cpp | 146 -- vpr/src/route/connection_box.h | 82 - .../route/connection_box_lookahead_map.cpp | 1570 ------------ vpr/src/route/connection_box_lookahead_map.h | 85 - vpr/src/route/router_lookahead.cpp | 3 - vpr/src/route/router_lookahead_map.cpp | 2115 ++++++++--------- vpr/src/route/router_lookahead_map.h | 106 +- vpr/src/route/router_lookahead_sampling.cpp | 16 +- vpr/src/route/rr_graph.cpp | 1 - 10 files changed, 1132 insertions(+), 2995 deletions(-) delete mode 100644 vpr/src/route/connection_box.cpp delete mode 100644 vpr/src/route/connection_box.h delete mode 100644 vpr/src/route/connection_box_lookahead_map.cpp delete mode 100644 vpr/src/route/connection_box_lookahead_map.h diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index ec615cc23e5..ff43a23345f 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -24,7 +24,6 @@ #include "place_macro.h" #include "compressed_grid.h" #include "metadata_storage.h" -#include "connection_box.h" //A Context is collection of state relating to a particular part of VPR // @@ -191,8 +190,6 @@ struct DeviceContext : public Context { // Name of rrgraph file read (if any). // Used to determine when reading rrgraph if file is already loaded. std::string read_rr_graph_filename; - - ConnectionBoxes connection_boxes; }; //State relating to power analysis diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp deleted file mode 100644 index 181fe171989..00000000000 --- a/vpr/src/route/connection_box.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include "connection_box.h" -#include "vtr_assert.h" -#include "globals.h" - -ConnectionBoxes::ConnectionBoxes() - : size_(std::make_pair(0, 0)) { -} - -size_t ConnectionBoxes::num_connection_box_types() const { - return boxes_.size(); -} - -std::pair ConnectionBoxes::connection_box_grid_size() const { - return size_; -} - -const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const { - if (!bool(box)) { - return nullptr; - } - - size_t index = size_t(box); - if (index >= boxes_.size()) { - return nullptr; - } - - return &boxes_.at(index); -} - -bool ConnectionBoxes::find_connection_box(int inode, - ConnectionBoxId* box_id, - std::pair* box_location, - float* site_pin_delay) const { - VTR_ASSERT(box_id != nullptr); - VTR_ASSERT(box_location != nullptr); - VTR_ASSERT(site_pin_delay != nullptr); - - if (inode >= (ssize_t)ipin_map_.size()) { - return false; - } - - const auto& conn_box_loc = ipin_map_[inode]; - if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { - return false; - } - - *box_id = conn_box_loc.box_id; - *box_location = conn_box_loc.box_location; - *site_pin_delay = conn_box_loc.site_pin_delay; - return true; -} - -// Clear IPIN map and set connection box grid size and box ids. -void ConnectionBoxes::reset_boxes(std::pair size, - const std::vector boxes) { - clear(); - - size_ = size; - boxes_ = boxes; -} - -void ConnectionBoxes::resize_nodes(size_t rr_node_size) { - ipin_map_.resize(rr_node_size); - ipin_map_.shrink_to_fit(); - canonical_loc_map_.resize(rr_node_size, - std::make_pair(-1, -1)); - canonical_loc_map_.shrink_to_fit(); -} - -void ConnectionBoxes::clear() { - ipin_map_.clear(); - size_ = std::make_pair(0, 0); - boxes_.clear(); - canonical_loc_map_.clear(); - sink_to_ipin_.clear(); -} - -void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay) { - // Ensure that box location is in bounds - VTR_ASSERT(box_location.first < size_.first); - VTR_ASSERT(box_location.second < size_.second); - - // Bounds check box_id - VTR_ASSERT(bool(box_id)); - VTR_ASSERT(size_t(box_id) < boxes_.size()); - - // Make sure sink map will not be invalidated upon insertion. - VTR_ASSERT(sink_to_ipin_.size() == 0); - - if (inode >= (ssize_t)(ipin_map_.size())) { - ipin_map_.resize(inode + 1); - } - ipin_map_[inode] = ConnBoxLoc(box_location, site_pin_delay, box_id); -} - -void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { - VTR_ASSERT(loc.first < size_.first); - VTR_ASSERT(loc.second < size_.second); - if (inode >= (ssize_t)(canonical_loc_map_.size())) { - canonical_loc_map_.resize(inode + 1); - } - canonical_loc_map_[inode] = loc; -} - -const std::pair* ConnectionBoxes::find_canonical_loc(int inode) const { - if (inode >= (ssize_t)canonical_loc_map_.size()) { - return nullptr; - } - - const auto& canon_loc = canonical_loc_map_[inode]; - if (canon_loc.first == size_t(-1)) { - return nullptr; - } - - return &canon_loc; -} - -void ConnectionBoxes::create_sink_back_ref() { - const auto& device_ctx = g_vpr_ctx.device(); - - sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); - - for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) { - const auto& ipin_node = device_ctx.rr_nodes[i]; - if (ipin_node.type() != IPIN) { - continue; - } - - if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) { - continue; - } - - for (auto edge : ipin_node.edges()) { - int sink_inode = ipin_node.edge_sink_node(edge); - VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK); - VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4); - auto& sink_to_ipin = sink_to_ipin_[sink_inode]; - sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i; - } - } -} - -const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes( - int inode) const { - return sink_to_ipin_[inode]; -} diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h deleted file mode 100644 index 37af4cc8320..00000000000 --- a/vpr/src/route/connection_box.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef CONNECTION_BOX_H -#define CONNECTION_BOX_H -// Some routing graphs have connectivity driven by types of connection boxes. -// This class relates IPIN rr nodes with connection box type and locations, used -// for connection box driven map lookahead. - -#include -#include -#include - -#include "vtr_flat_map.h" -#include "vtr_range.h" -#include "vtr_strong_id.h" - -struct connection_box_tag {}; -typedef vtr::StrongId ConnectionBoxId; - -struct ConnectionBox { - std::string name; -}; - -struct ConnBoxLoc { - ConnBoxLoc() - : box_location(std::make_pair(-1, -1)) {} - ConnBoxLoc( - const std::pair& a_box_location, - float a_site_pin_delay, - ConnectionBoxId a_box_id) - : box_location(a_box_location) - , site_pin_delay(a_site_pin_delay) - , box_id(a_box_id) {} - - std::pair box_location; - float site_pin_delay; - ConnectionBoxId box_id; -}; - -struct SinkToIpin { - int ipin_nodes[4]; - int ipin_count; -}; - -class ConnectionBoxes { - public: - ConnectionBoxes(); - - size_t num_connection_box_types() const; - std::pair connection_box_grid_size() const; - const ConnectionBox* get_connection_box(ConnectionBoxId box) const; - - bool find_connection_box(int inode, - ConnectionBoxId* box_id, - std::pair* box_location, - float* site_pin_delay) const; - const std::pair* find_canonical_loc(int inode) const; - - // Clear IPIN map and set connection box grid size and box ids. - void clear(); - void reset_boxes(std::pair size, - const std::vector boxes); - void resize_nodes(size_t rr_node_size); - - void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay); - void add_canonical_loc(int inode, std::pair loc); - - // Create map from SINK's back to IPIN's - // - // This must be called after all connection boxes have been added. - void create_sink_back_ref(); - const SinkToIpin& find_sink_connection_boxes( - int inode) const; - - private: - std::pair size_; - std::vector boxes_; - std::vector ipin_map_; - std::vector sink_to_ipin_; - std::vector> - canonical_loc_map_; -}; - -#endif diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp deleted file mode 100644 index 6fa0e0bdb41..00000000000 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ /dev/null @@ -1,1570 +0,0 @@ -#include "connection_box_lookahead_map.h" - -#include -#include - -#include "connection_box.h" -#include "rr_node.h" -#include "router_lookahead_map_utils.h" -#include "router_lookahead_sampling.h" -#include "globals.h" -#include "vtr_math.h" -#include "vtr_time.h" -#include "vtr_geometry.h" -#include "echo_files.h" -#include "rr_graph.h" - -#include "route_timing.h" -#include "route_common.h" - -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "connection_map.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif - -#if defined(VPR_USE_TBB) -# include -# include -#endif - -/* we're profiling routing cost over many tracks for each wire type, so we'll - * have many cost entries at each |dx|,|dy| offset. There are many ways to - * "boil down" the many costs at each offset to a single entry for a given - * (wire type, chan_type) combination we can take the smallest cost, the - * average, median, etc. This define selects the method we use. - * - * NOTE: Currently, only SMALLEST is supported. - * - * See e_representative_entry_method */ -#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST - -#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS - -#define X_OFFSET 2 -#define Y_OFFSET 2 - -#define MAX_EXPANSION_LEVEL 1 - -// Don't continue storing a path after hitting a lower-or-same cost entry. -static constexpr bool BREAK_ON_MISS = false; - -// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. -static constexpr float PENALTY_FACTOR = 1.f; -static constexpr float PENALTY_MIN = 1e-12f; - -static constexpr int MIN_PATH_COUNT = 1000; - -template -static std::pair run_dijkstra(int start_node_ind, - std::vector* node_expanded, - std::vector* paths, - RoutingCosts* routing_costs); - -// also known as the L1 norm -static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { - return abs(b.x() - a.x()) + abs(b.y() - a.y()); -} - -template -constexpr const T& clamp(const T& v, const T& lo, const T& hi) { - return std::min(std::max(v, lo), hi); -} - -template -static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Point& p) { - if (r.empty()) { - return vtr::Point(0, 0); - } else { - return vtr::Point(clamp(p.x(), r.xmin(), r.xmax() - 1), - clamp(p.y(), r.ymin(), r.ymax() - 1)); - } -} - -// deltas calculation -static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y); -static void adjust_rr_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y); - -// additional lookups for IPIN/OPIN missing delays -struct t_reachable_wire_inf { - e_rr_type wire_rr_type; - int wire_seg_index; - - //Costs to reach the wire type from the current node - float congestion; - float delay; -}; - -typedef std::vector>> t_src_opin_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Reachable wire info - // | - // SOURCE/OPIN ptc - -typedef std::vector>> t_chan_ipins_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Wire to IPIN segment info - // | - // SINK/IPIN ptc - -//Look-up table from SOURCE/OPIN to CHANX/CHANY of various types -t_src_opin_delays f_src_opin_delays; - -//Look-up table from CHANX/CHANY to SINK/IPIN of various types -t_chan_ipins_delays f_chan_ipins_delays; - -constexpr int DIRECT_CONNECT_SPECIAL_SEG_TYPE = -1; - -static void compute_router_src_opin_lookahead(); -static void compute_router_chan_ipin_lookahead(); -static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev); -static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays); -static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays); - -// build the segment map -void CostMap::build_segment_map() { - const auto& device_ctx = g_vpr_ctx.device(); - segment_map_.resize(device_ctx.rr_nodes.size()); - for (size_t i = 0; i < segment_map_.size(); ++i) { - auto& from_node = device_ctx.rr_nodes[i]; - - int from_cost_index = from_node.cost_index(); - int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; - - segment_map_[i] = from_seg_index; - } -} - -// resize internal data structures -void CostMap::set_counts(size_t seg_count) { - cost_map_.clear(); - offset_.clear(); - penalty_.clear(); - cost_map_.resize({2, seg_count}); - offset_.resize({2, seg_count}); - penalty_.resize({2, seg_count}); - seg_count_ = seg_count; -} - -// cached node -> segment map -int CostMap::node_to_segment(int from_node_ind) const { - return segment_map_[from_node_ind]; -} - -static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { - penalty = std::max(penalty, PENALTY_MIN); - return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, - entry.congestion, entry.fill); -} - -// get a cost entry for a segment type, connection box type, and offset -util::Cost_Entry CostMap::find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const { - int chan_index = 0; - if (rr_type == CHANY) { - chan_index = 1; - } - - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - const auto& cost_map = cost_map_[chan_index][from_seg_index]; - if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { - return util::Cost_Entry(); - } - - vtr::Point coord(delta_x - offset_[chan_index][from_seg_index].first, - delta_y - offset_[chan_index][from_seg_index].second); - vtr::Rect bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); - auto closest = closest_point_in_rect(bounds, coord); - auto cost = cost_map_[chan_index][from_seg_index][closest.x()][closest.y()]; - float penalty = penalty_[chan_index][from_seg_index]; - auto distance = manhattan_distance(closest, coord); - return penalize(cost, distance, penalty); -} - -// set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { - // calculate the bounding boxes - vtr::Matrix> bounds({2, seg_count_}); - for (const auto& entry : delay_costs) { - bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); - } - for (const auto& entry : base_costs) { - bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); - } - - // store bounds - for (size_t chan = 0; chan < 2; chan++) { - for (size_t seg = 0; seg < seg_count_; seg++) { - const auto& chan_seg_bounds = bounds[chan][seg]; - if (chan_seg_bounds.empty()) { - // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. - offset_[chan][seg] = std::make_pair(0, 0); - cost_map_[chan][seg] = vtr::NdMatrix( - {size_t(0), size_t(0)}); - continue; - } else { - offset_[chan][seg] = std::make_pair(chan_seg_bounds.xmin(), chan_seg_bounds.ymin()); - cost_map_[chan][seg] = vtr::NdMatrix( - {size_t(chan_seg_bounds.width()), size_t(chan_seg_bounds.height())}); - } - } - } - - // store entries into the matrices - for (const auto& entry : delay_costs) { - int seg = entry.first.seg_index; - int chan = entry.first.chan_index; - const auto& chan_seg_bounds = bounds[chan][seg]; - int x = entry.first.delta.x() - chan_seg_bounds.xmin(); - int y = entry.first.delta.y() - chan_seg_bounds.ymin(); - cost_map_[chan][seg][x][y].delay = entry.second; - } - for (const auto& entry : base_costs) { - int seg = entry.first.seg_index; - int chan = entry.first.chan_index; - const auto& chan_seg_bounds = bounds[chan][seg]; - int x = entry.first.delta.x() - chan_seg_bounds.xmin(); - int y = entry.first.delta.y() - chan_seg_bounds.ymin(); - cost_map_[chan][seg][x][y].congestion = entry.second; - } - - // fill the holes - for (size_t chan = 0; chan < 2; chan++) { - for (size_t seg = 0; seg < seg_count_; seg++) { - penalty_[chan][seg] = std::numeric_limits::infinity(); - const auto& chan_seg_bounds = bounds[chan][seg]; - if (chan_seg_bounds.empty()) { - continue; - } - auto& matrix = cost_map_[chan][seg]; - - // calculate delay penalty - float min_delay = std::numeric_limits::infinity(), max_delay = 0.f; - vtr::Point min_location(0, 0), max_location(0, 0); - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - util::Cost_Entry& cost_entry = matrix[ix][iy]; - if (cost_entry.valid()) { - if (cost_entry.delay < min_delay) { - min_delay = cost_entry.delay; - min_location = vtr::Point(ix, iy); - } - if (cost_entry.delay > max_delay) { - max_delay = cost_entry.delay; - max_location = vtr::Point(ix, iy); - } - } - } - } - float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); - penalty_[chan][seg] = delay_penalty; - - // find missing cost entries and fill them in by copying a nearby cost entry - std::vector> missing; - bool couldnt_fill = false; - auto shifted_bounds = vtr::Rect(0, 0, chan_seg_bounds.width(), chan_seg_bounds.height()); - int max_fill = 0; - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - util::Cost_Entry& cost_entry = matrix[ix][iy]; - if (!cost_entry.valid()) { - // maximum search radius - util::Cost_Entry filler; - int distance; - std::tie(filler, distance) = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); - if (filler.valid()) { - missing.push_back(std::make_tuple(ix, iy, penalize(filler, distance, delay_penalty))); - max_fill = std::max(max_fill, distance); - } else { - couldnt_fill = true; - } - } - } - if (couldnt_fill) { - // give up trying to fill an empty matrix - break; - } - } - - if (!couldnt_fill) { - VTR_LOG("At %d: max_fill = %d, delay_penalty = %e\n", seg, max_fill, delay_penalty); - } - - // write back the missing entries - for (auto& xy_entry : missing) { - matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); - } - - if (couldnt_fill) { - VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", - chan, seg, chan_seg_bounds.width(), chan_seg_bounds.height()); - for (unsigned y = 0; y < matrix.dim_size(1); y++) { - for (unsigned x = 0; x < matrix.dim_size(0); x++) { - VTR_ASSERT(!matrix[x][y].valid()); - } - } - } - } - } -} - -// prints an ASCII diagram of each cost map for a segment type (debug) -// o => above average -// . => at or below average -// * => invalid (missing) -void CostMap::print(int iseg) const { - for (size_t chan_index = 0; - chan_index < 2; - chan_index++) { - auto& matrix = cost_map_[chan_index][iseg]; - if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { - VTR_LOG("cost EMPTY for chan_index = %lu\n", chan_index); - continue; - } - VTR_LOG("cost for chan_index = %lu (%zu, %zu)\n", chan_index, matrix.dim_size(0), matrix.dim_size(1)); - double sum = 0.0; - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - const auto& entry = matrix[ix][iy]; - if (entry.valid()) { - sum += entry.delay; - } - } - } - double avg = sum / ((double)matrix.dim_size(0) * (double)matrix.dim_size(1)); - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - const auto& entry = matrix[ix][iy]; - if (!entry.valid()) { - VTR_LOG("*"); - } else if (entry.delay * 4 > avg * 5) { - VTR_LOG("O"); - } else if (entry.delay > avg) { - VTR_LOG("o"); - } else if (entry.delay * 4 > avg * 3) { - VTR_LOG("."); - } else { - VTR_LOG(" "); - } - } - VTR_LOG("\n"); - } - } -} - -// list segment type and connection box type pairs that have empty cost maps (debug) -std::vector> CostMap::list_empty() const { - std::vector> results; - for (int chan_index = 0; chan_index < (int)cost_map_.dim_size(1); chan_index++) { - for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { - auto& matrix = cost_map_[chan_index][iseg]; - if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(chan_index, iseg)); - } - } - return results; -} - -static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { - if (src.delay < dst->delay) { - dst->delay = src.delay; - } - if (src.congestion < dst->congestion) { - dst->congestion = src.congestion; - } -} - -// find the minimum cost entry from the nearest manhattan distance neighbor -std::pair CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, - int cx, - int cy, - const vtr::Rect& bounds) { - // spiral around (cx, cy) looking for a nearby entry - bool in_bounds = bounds.contains(vtr::Point(cx, cy)); - if (!in_bounds) { - return std::make_pair(util::Cost_Entry(), 0); - } - int n = 0; - util::Cost_Entry fill(matrix[cx][cy]); - fill.fill = true; - - while (in_bounds && !fill.valid()) { - n++; - in_bounds = false; - util::Cost_Entry min_entry; - for (int ox = -n; ox <= n; ox++) { - int x = cx + ox; - int oy = n - abs(ox); - int yp = cy + oy; - int yn = cy - oy; - if (bounds.contains(vtr::Point(x, yp))) { - assign_min_entry(&min_entry, matrix[x][yp]); - in_bounds = true; - } - if (bounds.contains(vtr::Point(x, yn))) { - assign_min_entry(&min_entry, matrix[x][yn]); - in_bounds = true; - } - } - if (!std::isfinite(fill.delay)) { - fill.delay = min_entry.delay; - } - if (!std::isfinite(fill.congestion)) { - fill.congestion = min_entry.congestion; - } - } - return std::make_pair(fill, n); -} - -std::pair ConnectionBoxMapLookahead::get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - e_rr_type from_type = rr_graph.node_type(from_node); - if (from_type == SOURCE || from_type == OPIN) { - //When estimating costs from a SOURCE/OPIN we look-up to find which wire types (and the - //cost to reach them) in f_src_opin_delays. Once we know what wire types are - //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final - //delay to reach the sink. - - t_physical_tile_type_ptr tile_type = device_ctx.grid[rr_graph.node_xlow(from_node)][rr_graph.node_ylow(from_node)].type; - auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type); - - auto from_ptc = rr_graph.node_ptc_num(from_node); - - if (f_src_opin_delays[tile_index][from_ptc].empty()) { - //During lookahead profiling we were unable to find any wires which connected - //to this PTC. - // - //This can sometimes occur at very low channel widths (e.g. during min W search on - //small designs) where W discretization combined with fraction Fc may cause some - //pins/sources to be left disconnected. - // - //Such RR graphs are of course unroutable, but that should be determined by the - //router. So just return an arbitrary value here rather than error. - - return std::pair(0.f, 0.f); - } else { - //From the current SOURCE/OPIN we look-up the wiretypes which are reachable - //and then add the estimates from those wire types for the distance of interest. - //If there are multiple options we use the minimum value. - - float delay = 0; - float congestion = 0; - float expected_cost = std::numeric_limits::infinity(); - - for (const auto& kv : f_src_opin_delays[tile_index][from_ptc]) { - const t_reachable_wire_inf& reachable_wire_inf = kv.second; - - util::Cost_Entry cost_entry; - if (reachable_wire_inf.wire_rr_type == SINK) { - //Some pins maybe reachable via a direct (OPIN -> IPIN) connection. - //In the lookahead, we treat such connections as 'special' wire types - //with no delay/congestion cost - cost_entry.delay = 0; - cost_entry.congestion = 0; - } else { - //For an actual accessible wire, we query the wire look-up to get it's - //delay and congestion cost estimates - cost_entry = cost_map_.find_cost(reachable_wire_inf.wire_seg_index, reachable_wire_inf.wire_rr_type, delta_x, delta_y); - } - - float this_cost = (criticality_fac) * (reachable_wire_inf.delay + cost_entry.delay) - + (1. - criticality_fac) * (reachable_wire_inf.congestion + cost_entry.congestion); - - if (this_cost < expected_cost) { - delay = reachable_wire_inf.delay; - congestion = reachable_wire_inf.congestion; - } - } - - return std::pair(delay, congestion); - } - - VTR_ASSERT_SAFE_MSG(false, - vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(size_t(from_node)).c_str(), - describe_rr_node(size_t(from_node)).c_str()) - .c_str()); - } - - return std::pair(0.f, 0.f); -} - -// derive a cost from the map between two nodes -float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, - int to_node_ind, - float criticality_fac) const { - if (from_node_ind == to_node_ind) { - return 0.f; - } - - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - auto from_node_type = device_ctx.rr_nodes[to_node_ind].type(); - - RRNodeId to_node(to_node_ind); - RRNodeId from_node(from_node_ind); - - int dx, dy; - get_xy_deltas(from_node, to_node, &dx, &dy); - - float extra_delay, extra_congestion; - std::tie(extra_delay, extra_congestion) = this->get_src_opin_delays(from_node, dx, dy, criticality_fac); - - int from_seg_index = cost_map_.node_to_segment(from_node_ind); - util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, from_node_type, dx, dy); - - if (!cost_entry.valid()) { - // there is no route - VTR_LOGV_DEBUG(f_router_debug, - "Not connected %d (%s, %d) -> %d (%s)\n", - from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string()); - return std::numeric_limits::infinity(); - } - - auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(to_node)][rr_graph.node_ylow(to_node)].type; - auto to_tile_index = to_tile_type->index; - - auto to_ptc = rr_graph.node_ptc_num(to_node); - - float site_pin_delay = std::numeric_limits::infinity(); - if (f_chan_ipins_delays[to_tile_index].size() != 0) { - for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { - const t_reachable_wire_inf& reachable_wire_inf = kv.second; - - float this_delay = reachable_wire_inf.delay; - site_pin_delay = std::min(this_delay, site_pin_delay); - } - } - - float expected_delay = cost_entry.delay + extra_delay; - float expected_congestion = cost_entry.congestion + extra_congestion; - - expected_delay += site_pin_delay; - - float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; - - VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node \n\t%s to \n\t%s\n", - describe_rr_node(from_node_ind).c_str(), - describe_rr_node(to_node_ind).c_str()); - const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; - VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) with distance (%zd, %zd)\n", - segment_name.c_str(), from_seg_index, - dx, dy); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); - VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); - VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); - - if (!std::isfinite(expected_cost) || expected_cost < 0.f) { - VTR_LOG_ERROR("invalid cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); - VTR_ASSERT(0); - } - - return expected_cost; -} - -// add a best cost routing path from start_node_ind to node_ind to routing costs -template -static bool add_paths(int start_node_ind, - Entry current, - const std::vector& paths, - RoutingCosts* routing_costs) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - int node_ind = current.rr_node_ind; - RRNodeId node(node_ind); - - bool new_sample_found = false; - - auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(node)][rr_graph.node_ylow(node)].type; - auto to_tile_index = to_tile_type->index; - - auto to_ptc = rr_graph.node_ptc_num(node); - - float site_pin_delay = std::numeric_limits::infinity(); - if (f_chan_ipins_delays[to_tile_index].size() != 0) { - for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { - const t_reachable_wire_inf& reachable_wire_inf = kv.second; - - float this_delay = reachable_wire_inf.delay; - site_pin_delay = std::min(this_delay, site_pin_delay); - } - } - - // reconstruct the path - std::vector path; - for (int i = paths[node_ind].parent; i != start_node_ind; i = paths[i].parent) { - VTR_ASSERT(i != -1); - path.push_back(i); - } - path.push_back(start_node_ind); - - current.adjust_Tsw(-site_pin_delay); - - // add each node along the path subtracting the incremental costs from the current costs - Entry start_to_here(start_node_ind, UNDEFINED, nullptr); - int parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - RRNodeId this_node(*it); - auto& here = device_ctx.rr_nodes[*it]; - int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; - - auto chan_type = rr_graph.node_type(node); - - int ichan = 0; - if (chan_type == CHANY) { - ichan = 1; - } - - int delta_x, delta_y; - get_xy_deltas(this_node, node, &delta_x, &delta_y); - - vtr::Point delta(delta_x, delta_y); - - RoutingCostKey key = { - ichan, - seg_index, - delta}; - - if (*it != start_node_ind) { - auto& parent_node = device_ctx.rr_nodes[parent]; - start_to_here = Entry(*it, parent_node.edge_switch(paths[*it].edge), &start_to_here); - parent = *it; - } - - float cost = current.cost() - start_to_here.cost(); - if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) { - cost = 0.f; - } - - VTR_ASSERT(cost >= 0.f); - - // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - auto result = routing_costs->insert(std::make_pair(key, cost)); - if (!result.second) { - if (cost < result.first->second) { - result.first->second = cost; - new_sample_found = true; - } else if (BREAK_ON_MISS) { - break; - } - } else { - new_sample_found = true; - } - } - return new_sample_found; -} - -/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ -static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - e_rr_type from_type = rr_graph.node_type(from_node); - e_rr_type to_type = rr_graph.node_type(to_node); - - if (!is_chan(from_type) && !is_chan(to_type)) { - //Alternate formulation for non-channel types - int from_x = 0; - int from_y = 0; - adjust_rr_position(from_node, from_x, from_y); - - int to_x = 0; - int to_y = 0; - adjust_rr_position(to_node, to_x, to_y); - - *delta_x = to_x - from_x; - *delta_y = to_y - from_y; - } else { - //Traditional formulation - - /* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire, - * chan coordinate is orthogonal to the wire */ - int from_seg_low; - int from_seg_high; - int from_chan; - int to_seg; - int to_chan; - if (from_type == CHANY) { - from_seg_low = rr_graph.node_ylow(from_node); - from_seg_high = rr_graph.node_yhigh(from_node); - from_chan = rr_graph.node_xlow(from_node); - to_seg = rr_graph.node_ylow(to_node); - to_chan = rr_graph.node_xlow(to_node); - } else { - from_seg_low = rr_graph.node_xlow(from_node); - from_seg_high = rr_graph.node_xhigh(from_node); - from_chan = rr_graph.node_ylow(from_node); - to_seg = rr_graph.node_xlow(to_node); - to_chan = rr_graph.node_ylow(to_node); - } - - /* now we want to count the minimum number of *channel segments* between the from and to nodes */ - int delta_seg, delta_chan; - - /* orthogonal to wire */ - int no_need_to_pass_by_clb = 0; //if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction - if (to_chan > from_chan + 1) { - /* above */ - delta_chan = to_chan - from_chan; - no_need_to_pass_by_clb = 1; - } else if (to_chan < from_chan) { - /* below */ - delta_chan = from_chan - to_chan + 1; - no_need_to_pass_by_clb = 1; - } else { - /* adjacent to current channel */ - delta_chan = 0; - no_need_to_pass_by_clb = 0; - } - - /* along same direction as wire. */ - if (to_seg > from_seg_high) { - /* ahead */ - delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb; - } else if (to_seg < from_seg_low) { - /* behind */ - delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb; - } else { - /* along the span of the wire */ - delta_seg = 0; - } - - /* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets - * that are opposite of the wire direction, let's add 1 to delta_seg */ - e_direction from_dir = rr_graph.node_direction(from_node); - if (is_chan(from_type) - && ((to_seg < from_seg_low && from_dir == INC_DIRECTION) || (to_seg > from_seg_high && from_dir == DEC_DIRECTION))) { - delta_seg++; - } - - if (from_type == CHANY) { - *delta_x = delta_chan; - *delta_y = delta_seg; - } else { - *delta_x = delta_seg; - *delta_y = delta_chan; - } - } - - VTR_ASSERT_SAFE(std::abs(*delta_x) < (int)device_ctx.grid.width()); - VTR_ASSERT_SAFE(std::abs(*delta_y) < (int)device_ctx.grid.height()); -} - -static void adjust_rr_position(const RRNodeId rr, int& x, int& y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - e_rr_type rr_type = rr_graph.node_type(rr); - - if (is_chan(rr_type)) { - adjust_rr_wire_position(rr, x, y); - } else if (is_pin(rr_type)) { - adjust_rr_pin_position(rr, x, y); - } else { - VTR_ASSERT_SAFE(is_src_sink(rr_type)); - adjust_rr_src_sink_position(rr, x, y); - } -} - -static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y) { - /* - * VPR uses a co-ordinate system where wires above and to the right of a block - * are at the same location as the block: - * - * - * <-----------C - * D - * | +---------+ ^ - * | | | | - * | | (1,1) | | - * | | | | - * V +---------+ | - * A - * B-----------> - * - * So wires are located as follows: - * - * A: (1, 1) CHANY - * B: (1, 0) CHANX - * C: (1, 1) CHANX - * D: (0, 1) CHANY - * - * But all pins incident on the surrounding channels - * would be at (1,1) along with a relevant side. - * - * In the following, we adjust the positions of pins to - * account for the channel they are incident too. - * - * Note that blocks at (0,*) such as IOs, may have (unconnected) - * pins on the left side, so we also clip the minimum x to zero. - * Similarly for blocks at (*,0) we clip the minimum y to zero. - */ - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - VTR_ASSERT_SAFE(is_pin(rr_graph.node_type(rr))); - VTR_ASSERT_SAFE(rr_graph.node_xlow(rr) == rr_graph.node_xhigh(rr)); - VTR_ASSERT_SAFE(rr_graph.node_ylow(rr) == rr_graph.node_yhigh(rr)); - - x = rr_graph.node_xlow(rr); - y = rr_graph.node_ylow(rr); - - e_side rr_side = rr_graph.node_side(rr); - - if (rr_side == LEFT) { - x -= 1; - x = std::max(x, 0); - } else if (rr_side == BOTTOM && y > 0) { - y -= 1; - y = std::max(y, 0); - } -} - -static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - VTR_ASSERT_SAFE(is_chan(rr_graph.node_type(rr))); - - e_direction rr_dir = rr_graph.node_direction(rr); - - if (rr_dir == DEC_DIRECTION) { - x = rr_graph.node_xhigh(rr); - y = rr_graph.node_yhigh(rr); - } else if (rr_dir == INC_DIRECTION) { - x = rr_graph.node_xlow(rr); - y = rr_graph.node_ylow(rr); - } else { - VTR_ASSERT_SAFE(rr_dir == BI_DIRECTION); - //Not sure what to do here... - //Try average for now. - x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); - y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); - } -} - -static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) { - //SOURCE/SINK nodes assume the full dimensions of their - //associated block - // - //Use the average position. - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - VTR_ASSERT_SAFE(is_src_sink(rr_graph.node_type(rr))); - - x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); - y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); -} - -/* Runs Dijkstra's algorithm from specified node until all nodes have been - * visited. Each time a pin is visited, the delay/congestion information - * to that pin is stored to an entry in the routing_cost_map. - * - * Returns the maximum (last) minimum cost path stored, and - * the number of paths from start_node_ind stored. */ -template -static std::pair run_dijkstra(int start_node_ind, - std::vector* node_expanded, - std::vector* paths, - RoutingCosts* routing_costs) { - auto& device_ctx = g_vpr_ctx.device(); - int path_count = 0; - - /* a list of boolean flags (one for each rr node) to figure out if a - * certain node has already been expanded */ - std::fill(node_expanded->begin(), node_expanded->end(), false); - /* For each node keep a list of the cost with which that node has been - * visited (used to determine whether to push a candidate node onto the - * expansion queue. - * Also store the parent node so we can reconstruct a specific path. */ - std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), -1, -1}); - /* a priority queue for expansion */ - std::priority_queue, std::greater> pq; - - /* first entry has no upstream delay or congestion */ - Entry first_entry(start_node_ind, UNDEFINED, nullptr); - float max_cost = first_entry.cost(); - - pq.push(first_entry); - - /* now do routing */ - while (!pq.empty()) { - auto current = pq.top(); - pq.pop(); - - int node_ind = current.rr_node_ind; - - /* check that we haven't already expanded from this node */ - if ((*node_expanded)[node_ind]) { - continue; - } - - /* if this node is an ipin record its congestion/delay in the routing_cost_map */ - if (device_ctx.rr_nodes[node_ind].type() == IPIN) { - // the last cost should be the highest - max_cost = current.cost(); - - path_count++; - add_paths(start_node_ind, current, *paths, routing_costs); - } else { - util::expand_dijkstra_neighbours(device_ctx.rr_nodes, - current, paths, node_expanded, &pq); - (*node_expanded)[node_ind] = true; - } - } - return std::make_pair(max_cost, path_count); -} - -// compute the cost maps for lookahead -void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { - compute_router_src_opin_lookahead(); - compute_router_chan_ipin_lookahead(); - - vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); - - // Initialize rr_node_route_inf if not already - alloc_and_load_rr_node_route_structs(); - - size_t num_segments = segment_inf.size(); - std::vector sample_regions = find_sample_regions(num_segments); - - /* free previous delay map and allocate new one */ - auto& device_ctx = g_vpr_ctx.device(); - cost_map_.set_counts(segment_inf.size()); - cost_map_.build_segment_map(); - - VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); - RoutingCosts all_delay_costs; - RoutingCosts all_base_costs; - - /* run Dijkstra's algorithm for each segment type & channel type combination */ -#if defined(VPR_USE_TBB) - tbb::mutex all_costs_mutex; - tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { -#else - for (const auto& region : sample_regions) { -#endif - // holds the cost entries for a run - RoutingCosts delay_costs; - RoutingCosts base_costs; - int total_path_count = 0; - std::vector node_expanded(device_ctx.rr_nodes.size()); - std::vector paths(device_ctx.rr_nodes.size()); - - for (auto& point : region.points) { - // statistics - vtr::Timer run_timer; - float max_delay_cost = 0.f; - float max_base_cost = 0.f; - int path_count = 0; - for (auto node_ind : point.nodes) { - { - auto result = run_dijkstra(node_ind, &node_expanded, &paths, &delay_costs); - max_delay_cost = std::max(max_delay_cost, result.first); - path_count += result.second; - } - { - auto result = run_dijkstra(node_ind, &node_expanded, &paths, &base_costs); - max_base_cost = std::max(max_base_cost, result.first); - path_count += result.second; - } - } - - if (path_count > 0) { - VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", - path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type, - point.location.x(), point.location.y(), - (int)point.nodes.size(), - max_delay_cost, max_base_cost, - path_count / run_timer.elapsed_sec()); - } - - /* - * if (path_count == 0) { - * for (auto node_ind : point.nodes) { - * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); - * } - * } - */ - - total_path_count += path_count; - if (total_path_count > MIN_PATH_COUNT) { - break; - } - } - -#if defined(VPR_USE_TBB) - all_costs_mutex.lock(); -#endif - - if (total_path_count == 0) { - VTR_LOG_WARN("No paths found for sample region %s(%d, %d)\n", - segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); - } - - // combine the cost map from this run with the final cost maps for each segment - for (const auto& cost : delay_costs) { - const auto& val = cost.second; - auto result = all_delay_costs.insert(std::make_pair(cost.first, val)); - if (!result.second) { - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - result.first->second = std::min(result.first->second, val); - } - } - for (const auto& cost : base_costs) { - const auto& val = cost.second; - auto result = all_base_costs.insert(std::make_pair(cost.first, val)); - if (!result.second) { - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - result.first->second = std::min(result.first->second, val); - } - } - -#if defined(VPR_USE_TBB) - all_costs_mutex.unlock(); - }); -#else - } -#endif - - VTR_LOG("Combining results\n"); - /* boil down the cost list in routing_cost_map at each coordinate to a - * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(all_delay_costs, all_base_costs); - -// diagnostics -#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) - for (auto& cost : all_costs) { - const auto& key = cost.first; - const auto& val = cost.second; - VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - val.from_node, val.to_node, - key.delta.x(), key.delta.y(), - val.cost_entry.delay, val.cost_entry.congestion); - } -#endif - -#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS) - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { - VTR_LOG("cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); - cost_map_.print(iseg); - } -#endif - -#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_EMPTY_MAPS) - for (std::pair p : cost_map_.list_empty()) { - int ichan, iseg; - std::tie(ichan, iseg) = p; - VTR_LOG("cost map for %s(%d), chan %d EMPTY\n", - segment_inf[iseg].name.c_str(), iseg, box_id); - } -#endif -} - -static void compute_router_src_opin_lookahead() { - vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead"); - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - f_src_opin_delays.clear(); - - f_src_opin_delays.resize(device_ctx.physical_tile_types.size()); - - std::vector rr_nodes_at_loc; - - //We assume that the routing connectivity of each instance of a physical tile is the same, - //and so only measure one instance of each type - for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { - for (e_rr_type rr_type : {SOURCE, OPIN}) { - vtr::Point sample_loc(-1, -1); - - size_t num_sampled_locs = 0; - bool ptcs_with_no_delays = true; - while (ptcs_with_no_delays) { //Haven't found wire connected to ptc - ptcs_with_no_delays = false; - - sample_loc = pick_sample_tile(&device_ctx.physical_tile_types[itile], sample_loc); - - if (sample_loc.x() == -1 && sample_loc.y() == -1) { - //No untried instances of the current tile type left - VTR_LOG_WARN("Found no %ssample locations for %s in %s\n", - (num_sampled_locs == 0) ? "" : "more ", - rr_node_typename[rr_type], - device_ctx.physical_tile_types[itile].name); - break; - } - - //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y()); - - rr_nodes_at_loc.clear(); - - get_rr_node_indices(device_ctx.rr_node_indices, sample_loc.x(), sample_loc.y(), rr_type, &rr_nodes_at_loc); - for (int inode : rr_nodes_at_loc) { - if (inode < 0) continue; - - RRNodeId node_id(inode); - - int ptc = rr_graph.node_ptc_num(node_id); - - if (ptc >= int(f_src_opin_delays[itile].size())) { - f_src_opin_delays[itile].resize(ptc + 1); //Inefficient but functional... - } - - //Find the wire types which are reachable from inode and record them and - //the cost to reach them - dijkstra_flood_to_wires(itile, node_id, f_src_opin_delays); - - if (f_src_opin_delays[itile][ptc].empty()) { - VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", - rr_node_typename[rr_type], - rr_node_arch_name(inode).c_str(), - sample_loc.x(), - sample_loc.y()); - - ptcs_with_no_delays = true; - } - } - - ++num_sampled_locs; - } - if (ptcs_with_no_delays) { - VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n"); - } - } - } -} - -static void compute_router_chan_ipin_lookahead() { - vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead"); - auto& device_ctx = g_vpr_ctx.device(); - - f_chan_ipins_delays.clear(); - - f_chan_ipins_delays.resize(device_ctx.physical_tile_types.size()); - - std::vector rr_nodes_at_loc; - - //We assume that the routing connectivity of each instance of a physical tile is the same, - //and so only measure one instance of each type - for (auto tile_type : device_ctx.physical_tile_types) { - vtr::Point sample_loc(-1, -1); - - sample_loc = pick_sample_tile(&tile_type, sample_loc); - - if (sample_loc.x() == -1 && sample_loc.y() == -1) { - //No untried instances of the current tile type left - VTR_LOG_WARN("Found no sample locations for %s\n", - tile_type.name); - continue; - } - - int min_x = std::max(0, sample_loc.x() - X_OFFSET); - int min_y = std::max(0, sample_loc.y() - Y_OFFSET); - int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x() + X_OFFSET); - int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y() + Y_OFFSET); - - for (int ix = min_x; ix < max_x; ix++) { - for (int iy = min_y; iy < max_y; iy++) { - for (auto rr_type : {CHANX, CHANY}) { - rr_nodes_at_loc.clear(); - - get_rr_node_indices(device_ctx.rr_node_indices, ix, iy, rr_type, &rr_nodes_at_loc); - for (int inode : rr_nodes_at_loc) { - if (inode < 0) continue; - - RRNodeId node_id(inode); - - //Find the IPINs which are reachable from the wires within the bounding box - //around the selected tile location - dijkstra_flood_to_ipins(node_id, f_chan_ipins_delays); - } - } - } - } - } -} - -static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev) { - //Very simple for now, just pick the fist matching tile found - vtr::Point loc(OPEN, OPEN); - - //VTR_LOG("Prev: %d,%d\n", prev.x(), prev.y()); - - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - - int y_init = prev.y() + 1; //Start searching next element above prev - - for (int x = prev.x(); x < int(grid.width()); ++x) { - if (x < 0) continue; - - //VTR_LOG(" x: %d\n", x); - - for (int y = y_init; y < int(grid.height()); ++y) { - if (y < 0) continue; - - //VTR_LOG(" y: %d\n", y); - if (grid[x][y].type->index == tile_type->index) { - loc.set_x(x); - loc.set_y(y); - return loc; - } - } - - if (loc.x() != OPEN && loc.y() != OPEN) { - break; - } else { - y_init = 0; //Prepare to search next column - } - } - //VTR_LOG("Next: %d,%d\n", loc.x(), loc.y()); - - return loc; -} - -static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - struct t_pq_entry { - float delay; - float congestion; - RRNodeId node; - - bool operator<(const t_pq_entry& rhs) const { - return this->delay < rhs.delay; - } - }; - - std::priority_queue pq; - - t_pq_entry root; - root.congestion = 0.; - root.delay = 0.; - root.node = node; - - int ptc = rr_graph.node_ptc_num(node); - - /* - * Perform Djikstra from the SOURCE/OPIN of interest, stopping at the the first - * reachable wires (i.e until we hit the inter-block routing network), or a SINK - * (via a direct-connect). - * - * Note that typical RR graphs are structured : - * - * SOURCE ---> OPIN --> CHANX/CHANY - * | - * --> OPIN --> CHANX/CHANY - * | - * ... - * - * possibly with direct-connects of the form: - * - * SOURCE --> OPIN --> IPIN --> SINK - * - * and there is a small number of fixed hops from SOURCE/OPIN to CHANX/CHANY or - * to a SINK (via a direct-connect), so this runs very fast (i.e. O(1)) - */ - pq.push(root); - while (!pq.empty()) { - t_pq_entry curr = pq.top(); - pq.pop(); - - e_rr_type curr_rr_type = rr_graph.node_type(curr.node); - if (curr_rr_type == CHANX || curr_rr_type == CHANY || curr_rr_type == SINK) { - //We stop expansion at any CHANX/CHANY/SINK - int seg_index; - if (curr_rr_type != SINK) { - //It's a wire, figure out its type - int cost_index = rr_graph.node_cost_index(curr.node); - seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; - } else { - //This is a direct-connect path between an IPIN and OPIN, - //which terminated at a SINK. - // - //We treat this as a 'special' wire type - seg_index = DIRECT_CONNECT_SPECIAL_SEG_TYPE; - } - - //Keep costs of the best path to reach each wire type - if (!src_opin_delays[itile][ptc].count(seg_index) - || curr.delay < src_opin_delays[itile][ptc][seg_index].delay) { - src_opin_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; - src_opin_delays[itile][ptc][seg_index].wire_seg_index = seg_index; - src_opin_delays[itile][ptc][seg_index].delay = curr.delay; - src_opin_delays[itile][ptc][seg_index].congestion = curr.congestion; - } - - } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) { - //We allow expansion through SOURCE/OPIN/IPIN types - int cost_index = rr_graph.node_cost_index(curr.node); - float incr_cong = device_ctx.rr_indexed_data[cost_index].base_cost; //Current nodes congestion cost - - for (RREdgeId edge : rr_graph.edge_range(curr.node)) { - int iswitch = rr_graph.edge_switch(edge); - float incr_delay = device_ctx.rr_switch_inf[iswitch].Tdel; - - RRNodeId next_node = rr_graph.edge_sink_node(edge); - - t_pq_entry next; - next.congestion = curr.congestion + incr_cong; //Of current node - next.delay = curr.delay + incr_delay; //To reach next node - next.node = next_node; - - pq.push(next); - } - } else { - VPR_ERROR(VPR_ERROR_ROUTE, "Unrecognized RR type"); - } - } -} - -static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - struct t_pq_entry { - float delay; - float congestion; - RRNodeId node; - int level; - int prev_seg_index; - - bool operator<(const t_pq_entry& rhs) const { - return this->delay < rhs.delay; - } - }; - - std::priority_queue pq; - - t_pq_entry root; - root.congestion = 0.; - root.delay = 0.; - root.node = node; - root.level = 0; - root.prev_seg_index = OPEN; - - /* - * Perform Djikstra from the CHAN of interest, stopping at the the first - * reachable IPIN - * - * Note that typical RR graphs are structured : - * - * CHANX/CHANY --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK - * | - * --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK - * | - * ... - * - * and there is a variable number of hops from a given CHANX/CHANY to IPIN. - * To avoid impacting on run-time, a fixed number of hops is performed. This - * should be enough to find the delay from the last CAHN to IPIN connection. - */ - pq.push(root); - while (!pq.empty()) { - t_pq_entry curr = pq.top(); - pq.pop(); - - e_rr_type curr_rr_type = rr_graph.node_type(curr.node); - if (curr_rr_type == IPIN) { - int seg_index = curr.prev_seg_index; - - int node_x = rr_graph.node_xlow(curr.node); - int node_y = rr_graph.node_ylow(curr.node); - - auto tile_type = device_ctx.grid[node_x][node_y].type; - int itile = tile_type->index; - - int ptc = rr_graph.node_ptc_num(curr.node); - - if (ptc >= int(chan_ipins_delays[itile].size())) { - chan_ipins_delays[itile].resize(ptc + 1); //Inefficient but functional... - } - - //Keep costs of the best path to reach each wire type - chan_ipins_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; - chan_ipins_delays[itile][ptc][seg_index].wire_seg_index = seg_index; - chan_ipins_delays[itile][ptc][seg_index].delay = curr.delay; - chan_ipins_delays[itile][ptc][seg_index].congestion = curr.congestion; - } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) { - if (curr.level >= MAX_EXPANSION_LEVEL) { - continue; - } - - //We allow expansion through SOURCE/OPIN/IPIN types - int cost_index = rr_graph.node_cost_index(curr.node); - float new_cong = device_ctx.rr_indexed_data[cost_index].base_cost; //Current nodes congestion cost - int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; - - for (RREdgeId edge : rr_graph.edge_range(curr.node)) { - int iswitch = rr_graph.edge_switch(edge); - float new_delay = device_ctx.rr_switch_inf[iswitch].Tdel; - - RRNodeId next_node = rr_graph.edge_sink_node(edge); - - t_pq_entry next; - next.congestion = new_cong; //Of current node - next.delay = new_delay; //To reach next node - next.node = next_node; - next.level = curr.level + 1; - next.prev_seg_index = seg_index; - - pq.push(next); - } - } else { - VPR_ERROR(VPR_ERROR_ROUTE, "Unrecognized RR type"); - } - } -} - -// get an expected minimum cost for routing from the current node to the target node -float ConnectionBoxMapLookahead::get_expected_cost( - int current_node, - int target_node, - const t_conn_cost_params& params, - float /*R_upstream*/) const { - auto& device_ctx = g_vpr_ctx.device(); - - t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); - - if (rr_type == CHANX || rr_type == CHANY) { - return get_map_cost( - current_node, target_node, params.criticality); - } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ - return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); - } else { /* Change this if you want to investigate route-throughs */ - return (0.); - } -} - -#ifndef VTR_ENABLE_CAPNPROTO - -void ConnectionBoxMapLookahead::read(const std::string& file) { - VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::read not implemented"); -} -void ConnectionBoxMapLookahead::write(const std::string& file) const { - VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::write not implemented"); -} - -#else - -void ConnectionBoxMapLookahead::read(const std::string& file) { - cost_map_.read(file); - - compute_router_src_opin_lookahead(); - compute_router_chan_ipin_lookahead(); -} -void ConnectionBoxMapLookahead::write(const std::string& file) const { - cost_map_.write(file); -} - -static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { - out->delay = in.getDelay(); - out->congestion = in.getCongestion(); - out->fill = in.getFill(); -} - -static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { - out->setDelay(in.delay); - out->setCongestion(in.congestion); - out->setFill(in.fill); -} - -static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { - *out = std::make_pair(in.getX(), in.getY()); -} - -static void FromVprVector2D(VprVector2D::Builder* out, const std::pair& in) { - out->setX(in.first); - out->setY(in.second); -} - -static void ToMatrixCostEntry(vtr::NdMatrix* out, - const Matrix::Reader& in) { - ToNdMatrix<2, VprCostEntry, util::Cost_Entry>(out, in, ToCostEntry); -} - -static void FromMatrixCostEntry( - Matrix::Builder* out, - const vtr::NdMatrix& in) { - FromNdMatrix<2, VprCostEntry, util::Cost_Entry>( - out, in, FromCostEntry); -} - -static void ToFloat(float* out, const VprFloatEntry::Reader& in) { - // Getting a scalar field is always "get()". - *out = in.getValue(); -} - -static void FromFloat(VprFloatEntry::Builder* out, const float& in) { - // Setting a scalar field is always "set(value)". - out->setValue(in); -} - -void CostMap::read(const std::string& file) { - build_segment_map(); - MmapFile f(file); - - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - auto cost_map = reader.getRoot(); - { - const auto& offset = cost_map.getOffset(); - ToNdMatrix<2, VprVector2D, std::pair>( - &offset_, offset, ToVprVector2D); - } - - { - const auto& cost_maps = cost_map.getCostMap(); - ToNdMatrix<2, Matrix, vtr::NdMatrix>( - &cost_map_, cost_maps, ToMatrixCostEntry); - } - - { - const auto& penalty = cost_map.getPenalty(); - ToNdMatrix<2, VprFloatEntry, float>( - &penalty_, penalty, ToFloat); - } -} - -void CostMap::write(const std::string& file) const { - ::capnp::MallocMessageBuilder builder; - - auto cost_map = builder.initRoot(); - - { - auto offset = cost_map.initOffset(); - FromNdMatrix<2, VprVector2D, std::pair>( - &offset, offset_, FromVprVector2D); - } - - { - auto cost_maps = cost_map.initCostMap(); - FromNdMatrix<2, Matrix, vtr::NdMatrix>( - &cost_maps, cost_map_, FromMatrixCostEntry); - } - - { - auto penalty = cost_map.initPenalty(); - FromNdMatrix<2, VprFloatEntry, float>( - &penalty, penalty_, FromFloat); - } - - writeMessageToFile(file, &builder); -} -#endif diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h deleted file mode 100644 index 6d821c817ae..00000000000 --- a/vpr/src/route/connection_box_lookahead_map.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef CONNECTION_BOX_LOOKAHEAD_H_ -#define CONNECTION_BOX_LOOKAHEAD_H_ - -#include -#include "physical_types.h" -#include "router_lookahead.h" -#include "router_lookahead_map_utils.h" -#include "connection_box.h" -#include "vtr_geometry.h" - -// Keys in the RoutingCosts map -struct RoutingCostKey { - // index of the channel (CHANX or CHANY) - int chan_index; - - // segment type index - int seg_index; - - // offset of the destination connection box from the starting segment - vtr::Point delta; - - RoutingCostKey() - : seg_index(-1) - , delta(0, 0) {} - RoutingCostKey(int chan_index_arg, int seg_index_arg, vtr::Point delta_arg) - : chan_index(chan_index_arg) - , seg_index(seg_index_arg) - , delta(delta_arg) {} - - bool operator==(const RoutingCostKey& other) const { - return seg_index == other.seg_index && chan_index == other.chan_index && delta == other.delta; - } -}; - -// hash implementation for RoutingCostKey -struct HashRoutingCostKey { - std::size_t operator()(RoutingCostKey const& key) const noexcept { - std::size_t hash = std::hash{}(key.chan_index); - vtr::hash_combine(hash, key.seg_index); - vtr::hash_combine(hash, key.delta.x()); - vtr::hash_combine(hash, key.delta.y()); - return hash; - } -}; - -// Map used to store intermediate routing costs -typedef std::unordered_map RoutingCosts; - -// Dense cost maps per source segment and destination connection box types -class CostMap { - public: - void set_counts(size_t seg_count); - void build_segment_map(); - int node_to_segment(int from_node_ind) const; - util::Cost_Entry find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const; - void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); - std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); - void read(const std::string& file); - void write(const std::string& file) const; - void print(int iseg) const; - std::vector> list_empty() const; - - private: - vtr::Matrix> cost_map_; - vtr::Matrix> offset_; - vtr::Matrix penalty_; - std::vector segment_map_; - size_t seg_count_; -}; - -// Implementation of RouterLookahead based on source segment and destination connection box types -class ConnectionBoxMapLookahead : public RouterLookahead { - public: - std::pair get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const; - float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; - float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; - void compute(const std::vector& segment_inf) override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - - CostMap cost_map_; -}; - -#endif diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index 9b3dfa03fc1..b72b38b4e2d 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -1,7 +1,6 @@ #include "router_lookahead.h" #include "router_lookahead_map.h" -#include "connection_box_lookahead_map.h" #include "vpr_error.h" #include "globals.h" #include "route_timing.h" @@ -14,8 +13,6 @@ static std::unique_ptr make_router_lookahead_object(e_router_lo return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::MAP) { return std::make_unique(); - } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { - return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::NO_OP) { return std::make_unique(); } diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 797f7c97ee5..02cf16a8cc1 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -1,192 +1,95 @@ -/* - * The router lookahead provides an estimate of the cost from an intermediate node to the target node - * during directed (A*-like) routing. - * - * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and - * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used - * to complete the route. While this method is efficient, it can run into trouble with architectures that use - * multiple interconnected wire types. - * - * The lookahead in this file performs undirected Dijkstra searches to evaluate many paths through the routing network, - * starting from all the different wire types in the routing architecture. This ensures the lookahead captures the - * effect of inter-wire connectivity. This information is then reduced into a delta_x delta_y based lookup table for - * reach source wire type (f_cost_map). This is used for estimates from CHANX/CHANY -> SINK nodes. See Section 3.2.4 - * in Oleg Petelin's MASc thesis (2016) for more discussion. - * - * To handle estimates starting from SOURCE/OPIN's the lookahead also creates a small side look-up table of the wire types - * which are reachable from each physical tile type's SOURCEs/OPINs (f_src_opin_reachable_wires). This is used for - * SRC/OPIN -> CHANX/CHANY estimates. - * - * In the case of SRC/OPIN -> SINK estimates the resuls from the two look-ups are added together (and the minimum taken - * if there are multiple possiblities). - */ +#include "router_lookahead_map.h" -#include #include #include -#include -#include "vpr_types.h" -#include "vpr_error.h" -#include "vpr_utils.h" + +#include "rr_node.h" +#include "router_lookahead_map_utils.h" +#include "router_lookahead_sampling.h" #include "globals.h" #include "vtr_math.h" -#include "vtr_log.h" -#include "vtr_assert.h" #include "vtr_time.h" #include "vtr_geometry.h" -#include "router_lookahead_map.h" -#include "rr_graph2.h" +#include "echo_files.h" #include "rr_graph.h" -#include "route_common.h" + #include "route_timing.h" +#include "route_common.h" #ifdef VTR_ENABLE_CAPNPROTO # include "capnp/serialize.h" -# include "map_lookahead.capnp.h" +# include "connection_map.capnp.h" # include "ndmatrix_serdes.h" # include "mmap_file.h" # include "serdes_utils.h" -#endif /* VTR_ENABLE_CAPNPROTO */ - -/* we will profile delay/congestion using this many tracks for each wire type */ -#define MAX_TRACK_OFFSET 16 - -#define X_OFFSET 2 -#define Y_OFFSET 2 +#endif -#define MAX_EXPANSION_LEVEL 1 +#if defined(VPR_USE_TBB) +# include +# include +#endif -/* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset. - * there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination -- - * we can take the smallest cost, the average, median, etc. This define selects the method we use. +/* we're profiling routing cost over many tracks for each wire type, so we'll + * have many cost entries at each |dx|,|dy| offset. There are many ways to + * "boil down" the many costs at each offset to a single entry for a given + * (wire type, chan_type) combination we can take the smallest cost, the + * average, median, etc. This define selects the method we use. + * + * NOTE: Currently, only SMALLEST is supported. + * * See e_representative_entry_method */ -#define REPRESENTATIVE_ENTRY_METHOD SMALLEST - -/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single - * representative entry, this enum is passed-in to specify how that representative entry should be - * calculated */ -enum e_representative_entry_method { - FIRST = 0, //the first cost that was recorded - SMALLEST, //the smallest-delay cost recorded - AVERAGE, - GEOMEAN, - MEDIAN -}; +#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST -/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion. - * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored - * in the final lookahead cost map */ -class Expansion_Cost_Entry { - public: - std::vector cost_vector; - - private: - Cost_Entry get_smallest_entry(); - Cost_Entry get_average_entry(); - Cost_Entry get_geomean_entry(); - Cost_Entry get_median_entry(); - - public: - void add_cost_entry(float add_delay, float add_congestion) { - Cost_Entry cost_entry(add_delay, add_congestion); - if (REPRESENTATIVE_ENTRY_METHOD == SMALLEST) { - /* taking the smallest-delay entry anyway, so no need to push back multple entries */ - if (this->cost_vector.empty()) { - this->cost_vector.push_back(cost_entry); - } else { - if (add_delay < this->cost_vector[0].delay) { - this->cost_vector[0] = cost_entry; - } - } - } else { - this->cost_vector.push_back(cost_entry); - } - } - void clear_cost_entries() { - this->cost_vector.clear(); - } +#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS - Cost_Entry get_representative_cost_entry(e_representative_entry_method method) { - float nan = std::numeric_limits::quiet_NaN(); - Cost_Entry entry(nan, nan); +#define X_OFFSET 2 +#define Y_OFFSET 2 - if (!cost_vector.empty()) { - switch (method) { - case FIRST: - entry = cost_vector[0]; - break; - case SMALLEST: - entry = this->get_smallest_entry(); - break; - case AVERAGE: - entry = this->get_average_entry(); - break; - case GEOMEAN: - entry = this->get_geomean_entry(); - break; - case MEDIAN: - entry = this->get_median_entry(); - break; - default: - break; - } - } - return entry; - } -}; +#define MAX_EXPANSION_LEVEL 1 -/* a class that represents an entry in the Dijkstra expansion priority queue */ -class PQ_Entry { - public: - RRNodeId rr_node; //index in device_ctx.rr_nodes that this entry represents - float cost; //the cost of the path to get to this node +// Don't continue storing a path after hitting a lower-or-same cost entry. +static constexpr bool BREAK_ON_MISS = false; - /* store backward delay, R and congestion info */ - float delay; - float R_upstream; - float congestion_upstream; - - PQ_Entry(RRNodeId set_rr_node, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node) { - this->rr_node = set_rr_node; - - auto& device_ctx = g_vpr_ctx.device(); - this->delay = parent_delay; - this->congestion_upstream = parent_congestion_upstream; - this->R_upstream = parent_R_upstream; - if (!starting_node) { - int cost_index = device_ctx.rr_nodes.node_cost_index(RRNodeId(set_rr_node)); - //this->delay += g_rr_nodes[set_rr_node].C() * (g_rr_switch_inf[switch_ind].R + 0.5*g_rr_nodes[set_rr_node].R()) + - // g_rr_switch_inf[switch_ind].Tdel; - - //FIXME going to use the delay data that the VPR7 lookahead uses. For some reason the delay calculation above calculates - // a value that's just a little smaller compared to what VPR7 lookahead gets. While the above calculation should be more accurate, - // I have found that it produces the same CPD results but with worse runtime. - // - // TODO: figure out whether anything's wrong with the calculation above and use that instead. If not, add the other - // terms like T_quadratic and R_upstream to the calculation below (they are == 0 or UNDEFINED for buffered archs I think) +// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. +static constexpr float PENALTY_FACTOR = 1.f; +static constexpr float PENALTY_MIN = 1e-12f; - //NOTE: We neglect the T_quadratic and C_load terms and Switch R, so this lookahead is likely - // less accurate on unbuffered (e.g. pass-gate) architectures +static constexpr int MIN_PATH_COUNT = 1000; - this->delay += device_ctx.rr_indexed_data[cost_index].T_linear; +template +static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, + RoutingCosts* routing_costs); - this->congestion_upstream += device_ctx.rr_indexed_data[cost_index].base_cost; - } +// also known as the L1 norm +static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { + return abs(b.x() - a.x()) + abs(b.y() - a.y()); +} - if (this->delay < 0) { - VTR_LOG("NEGATIVE DELAY!\n"); - } +template +constexpr const T& clamp(const T& v, const T& lo, const T& hi) { + return std::min(std::max(v, lo), hi); +} - /* set the cost of this node */ - this->cost = this->delay; +template +static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Point& p) { + if (r.empty()) { + return vtr::Point(0, 0); + } else { + return vtr::Point(clamp(p.x(), r.xmin(), r.xmax() - 1), + clamp(p.y(), r.ymin(), r.ymax() - 1)); } +} - bool operator<(const PQ_Entry& obj) const { - /* inserted into max priority queue so want queue entries with a lower cost to be greater */ - return (this->cost > obj.cost); - } -}; +// deltas calculation +static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y); +static void adjust_rr_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y); +static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y); +// additional lookups for IPIN/OPIN missing delays struct t_reachable_wire_inf { e_rr_type wire_rr_type; int wire_seg_index; @@ -196,133 +99,335 @@ struct t_reachable_wire_inf { float delay; }; -/* used during Dijkstra expansion to store delay/congestion info lists for each relative coordinate for a given segment and channel type. - * the list at each coordinate is later boiled down to a single representative cost entry to be stored in the final cost map */ -typedef vtr::Matrix t_routing_cost_map; //[0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1] - -typedef std::vector>> t_src_opin_reachable_wires; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Reachable wire info - // | - // SOURCE/OPIN ptc - -typedef std::vector>> t_chan_reachable_ipins; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] - // ^ ^ ^ - // | | | - // physical block type index | Wire to IPIN segment info - // | - // SINK/IPIN ptc - -struct t_dijkstra_data { - /* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */ - vtr::vector node_expanded; - /* for each node keep a list of the cost with which that node has been visited (used to determine whether to push - * a candidate node onto the expansion queue */ - vtr::vector node_visited_costs; - /* a priority queue for expansion */ - std::priority_queue pq; -}; - -/******** File-Scope Variables ********/ - -constexpr int DIRECT_CONNECT_SPECIAL_SEG_TYPE = -1; +typedef std::vector>> t_src_opin_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] + // ^ ^ ^ + // | | | + // physical block type index | Reachable wire info + // | + // SOURCE/OPIN ptc -//Look-up table from CHANX/CHANY (to SINKs) for various distances -t_wire_cost_map f_wire_cost_map; +typedef std::vector>> t_chan_ipins_delays; //[0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index] + // ^ ^ ^ + // | | | + // physical block type index | Wire to IPIN segment info + // | + // SINK/IPIN ptc //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types -t_src_opin_reachable_wires f_src_opin_reachable_wires; +t_src_opin_delays f_src_opin_delays; //Look-up table from CHANX/CHANY to SINK/IPIN of various types -t_chan_reachable_ipins f_chan_reachable_ipins; +t_chan_ipins_delays f_chan_ipins_delays; + +constexpr int DIRECT_CONNECT_SPECIAL_SEG_TYPE = -1; -/******** File-Scope Functions ********/ -Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y); -static void compute_router_wire_lookahead(const std::vector& segment_inf); static void compute_router_src_opin_lookahead(); static void compute_router_chan_ipin_lookahead(); -static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point start); -void dijkstra_flood_to_wires(int itile, RRNodeId inode, t_src_opin_reachable_wires& src_opin_reachable_wires); -void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins); - -/* returns index of a node from which to start routing */ -static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset); -/* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information - * to that pin is stored is added to an entry in the routing_cost_map */ -static void run_dijkstra(RRNodeId start_node, int start_x, int start_y, t_routing_cost_map& routing_cost_map, t_dijkstra_data* data); -/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ -static void expand_dijkstra_neighbours(PQ_Entry parent_entry, vtr::vector& node_visited_costs, vtr::vector& node_expanded, std::priority_queue& pq); -/* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */ -static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map); -/* fills in missing lookahead map entries by copying the cost of the closest valid entry */ -static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type); -/* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */ -static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index); -/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ -static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y); -static void adjust_rr_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y); -static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y); +static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev); +static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays); +static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays); + +// build the segment map +void CostMap::build_segment_map() { + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; -static void print_wire_cost_map(const std::vector& segment_inf); -static void print_router_cost_map(const t_routing_cost_map& router_cost_map); + segment_map_[i] = from_seg_index; + } +} -/******** Interface class member function definitions ********/ -float MapLookahead::get_expected_cost(int current_node, int target_node, const t_conn_cost_params& params, float /*R_upstream*/) const { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; +// resize internal data structures +void CostMap::set_counts(size_t seg_count) { + cost_map_.clear(); + offset_.clear(); + penalty_.clear(); + cost_map_.resize({2, seg_count}); + offset_.resize({2, seg_count}); + penalty_.resize({2, seg_count}); + seg_count_ = seg_count; +} - RRNodeId current(current_node); - RRNodeId target(target_node); +// cached node -> segment map +int CostMap::node_to_segment(int from_node_ind) const { + return segment_map_[from_node_ind]; +} - t_rr_type rr_type = rr_graph.node_type(current); +static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + penalty = std::max(penalty, PENALTY_MIN); + return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, + entry.congestion, entry.fill); +} - if (rr_type == CHANX || rr_type == CHANY || rr_type == SOURCE || rr_type == OPIN) { - return get_lookahead_map_cost(current, target, params.criticality); - } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ - return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); - } else { /* Change this if you want to investigate route-throughs */ - return (0.); +// get a cost entry for a segment type, connection box type, and offset +util::Cost_Entry CostMap::find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const { + int chan_index = 0; + if (rr_type == CHANY) { + chan_index = 1; + } + + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + const auto& cost_map = cost_map_[chan_index][from_seg_index]; + if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { + return util::Cost_Entry(); } + + vtr::Point coord(delta_x - offset_[chan_index][from_seg_index].first, + delta_y - offset_[chan_index][from_seg_index].second); + vtr::Rect bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); + auto closest = closest_point_in_rect(bounds, coord); + auto cost = cost_map_[chan_index][from_seg_index][closest.x()][closest.y()]; + float penalty = penalty_[chan_index][from_seg_index]; + auto distance = manhattan_distance(closest, coord); + return penalize(cost, distance, penalty); } -void MapLookahead::compute(const std::vector& segment_inf) { - compute_router_lookahead(segment_inf); +// set the cost map for a segment type and connection box type, filling holes +void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { + // calculate the bounding boxes + vtr::Matrix> bounds({2, seg_count_}); + for (const auto& entry : delay_costs) { + bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + for (const auto& entry : base_costs) { + bounds[entry.first.chan_index][entry.first.seg_index].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + + // store bounds + for (size_t chan = 0; chan < 2; chan++) { + for (size_t seg = 0; seg < seg_count_; seg++) { + const auto& chan_seg_bounds = bounds[chan][seg]; + if (chan_seg_bounds.empty()) { + // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. + offset_[chan][seg] = std::make_pair(0, 0); + cost_map_[chan][seg] = vtr::NdMatrix( + {size_t(0), size_t(0)}); + continue; + } else { + offset_[chan][seg] = std::make_pair(chan_seg_bounds.xmin(), chan_seg_bounds.ymin()); + cost_map_[chan][seg] = vtr::NdMatrix( + {size_t(chan_seg_bounds.width()), size_t(chan_seg_bounds.height())}); + } + } + } + + // store entries into the matrices + for (const auto& entry : delay_costs) { + int seg = entry.first.seg_index; + int chan = entry.first.chan_index; + const auto& chan_seg_bounds = bounds[chan][seg]; + int x = entry.first.delta.x() - chan_seg_bounds.xmin(); + int y = entry.first.delta.y() - chan_seg_bounds.ymin(); + cost_map_[chan][seg][x][y].delay = entry.second; + } + for (const auto& entry : base_costs) { + int seg = entry.first.seg_index; + int chan = entry.first.chan_index; + const auto& chan_seg_bounds = bounds[chan][seg]; + int x = entry.first.delta.x() - chan_seg_bounds.xmin(); + int y = entry.first.delta.y() - chan_seg_bounds.ymin(); + cost_map_[chan][seg][x][y].congestion = entry.second; + } + + // fill the holes + for (size_t chan = 0; chan < 2; chan++) { + for (size_t seg = 0; seg < seg_count_; seg++) { + penalty_[chan][seg] = std::numeric_limits::infinity(); + const auto& chan_seg_bounds = bounds[chan][seg]; + if (chan_seg_bounds.empty()) { + continue; + } + auto& matrix = cost_map_[chan][seg]; + + // calculate delay penalty + float min_delay = std::numeric_limits::infinity(), max_delay = 0.f; + vtr::Point min_location(0, 0), max_location(0, 0); + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (cost_entry.valid()) { + if (cost_entry.delay < min_delay) { + min_delay = cost_entry.delay; + min_location = vtr::Point(ix, iy); + } + if (cost_entry.delay > max_delay) { + max_delay = cost_entry.delay; + max_location = vtr::Point(ix, iy); + } + } + } + } + float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + penalty_[chan][seg] = delay_penalty; + + // find missing cost entries and fill them in by copying a nearby cost entry + std::vector> missing; + bool couldnt_fill = false; + auto shifted_bounds = vtr::Rect(0, 0, chan_seg_bounds.width(), chan_seg_bounds.height()); + int max_fill = 0; + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (!cost_entry.valid()) { + // maximum search radius + util::Cost_Entry filler; + int distance; + std::tie(filler, distance) = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler.valid()) { + missing.push_back(std::make_tuple(ix, iy, penalize(filler, distance, delay_penalty))); + max_fill = std::max(max_fill, distance); + } else { + couldnt_fill = true; + } + } + } + if (couldnt_fill) { + // give up trying to fill an empty matrix + break; + } + } + + if (!couldnt_fill) { + VTR_LOG("At %d: max_fill = %d, delay_penalty = %e\n", seg, max_fill, delay_penalty); + } + + // write back the missing entries + for (auto& xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); + } + + if (couldnt_fill) { + VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", + chan, seg, chan_seg_bounds.width(), chan_seg_bounds.height()); + for (unsigned y = 0; y < matrix.dim_size(1); y++) { + for (unsigned x = 0; x < matrix.dim_size(0); x++) { + VTR_ASSERT(!matrix[x][y].valid()); + } + } + } + } + } } -void MapLookahead::read(const std::string& file) { - read_router_lookahead(file); +// prints an ASCII diagram of each cost map for a segment type (debug) +// o => above average +// . => at or below average +// * => invalid (missing) +void CostMap::print(int iseg) const { + for (size_t chan_index = 0; + chan_index < 2; + chan_index++) { + auto& matrix = cost_map_[chan_index][iseg]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { + VTR_LOG("cost EMPTY for chan_index = %lu\n", chan_index); + continue; + } + VTR_LOG("cost for chan_index = %lu (%zu, %zu)\n", chan_index, matrix.dim_size(0), matrix.dim_size(1)); + double sum = 0.0; + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (entry.valid()) { + sum += entry.delay; + } + } + } + double avg = sum / ((double)matrix.dim_size(0) * (double)matrix.dim_size(1)); + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (!entry.valid()) { + VTR_LOG("*"); + } else if (entry.delay * 4 > avg * 5) { + VTR_LOG("O"); + } else if (entry.delay > avg) { + VTR_LOG("o"); + } else if (entry.delay * 4 > avg * 3) { + VTR_LOG("."); + } else { + VTR_LOG(" "); + } + } + VTR_LOG("\n"); + } + } +} - //Next, compute which wire types are accessible (and the cost to reach them) - //from the different physical tile type's SOURCEs & OPINs - compute_router_src_opin_lookahead(); - compute_router_chan_ipin_lookahead(); +// list segment type and connection box type pairs that have empty cost maps (debug) +std::vector> CostMap::list_empty() const { + std::vector> results; + for (int chan_index = 0; chan_index < (int)cost_map_.dim_size(1); chan_index++) { + for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { + auto& matrix = cost_map_[chan_index][iseg]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(chan_index, iseg)); + } + } + return results; } -void MapLookahead::write(const std::string& file) const { - write_router_lookahead(file); +static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { + if (src.delay < dst->delay) { + dst->delay = src.delay; + } + if (src.congestion < dst->congestion) { + dst->congestion = src.congestion; + } +} + +// find the minimum cost entry from the nearest manhattan distance neighbor +std::pair CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, + int cx, + int cy, + const vtr::Rect& bounds) { + // spiral around (cx, cy) looking for a nearby entry + bool in_bounds = bounds.contains(vtr::Point(cx, cy)); + if (!in_bounds) { + return std::make_pair(util::Cost_Entry(), 0); + } + int n = 0; + util::Cost_Entry fill(matrix[cx][cy]); + fill.fill = true; + + while (in_bounds && !fill.valid()) { + n++; + in_bounds = false; + util::Cost_Entry min_entry; + for (int ox = -n; ox <= n; ox++) { + int x = cx + ox; + int oy = n - abs(ox); + int yp = cy + oy; + int yn = cy - oy; + if (bounds.contains(vtr::Point(x, yp))) { + assign_min_entry(&min_entry, matrix[x][yp]); + in_bounds = true; + } + if (bounds.contains(vtr::Point(x, yn))) { + assign_min_entry(&min_entry, matrix[x][yn]); + in_bounds = true; + } + } + if (!std::isfinite(fill.delay)) { + fill.delay = min_entry.delay; + } + if (!std::isfinite(fill.congestion)) { + fill.congestion = min_entry.congestion; + } + } + return std::make_pair(fill, n); } -/******** Function Definitions ********/ -/* queries the lookahead_map (should have been computed prior to routing) to get the expected cost - * from the specified source to the specified target */ -float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float criticality_fac) { +std::pair MapLookahead::get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_nodes; - int delta_x, delta_y; - get_xy_deltas(from_node, to_node, &delta_x, &delta_y); - delta_x = abs(delta_x); - delta_y = abs(delta_y); - - float expected_cost = std::numeric_limits::infinity(); - e_rr_type from_type = rr_graph.node_type(from_node); if (from_type == SOURCE || from_type == OPIN) { //When estimating costs from a SOURCE/OPIN we look-up to find which wire types (and the - //cost to reach them) in f_src_opin_reachable_wires. Once we know what wire types are + //cost to reach them) in f_src_opin_delays. Once we know what wire types are //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final //delay to reach the sink. @@ -331,7 +436,7 @@ float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float critica auto from_ptc = rr_graph.node_ptc_num(from_node); - if (f_src_opin_reachable_wires[tile_index][from_ptc].empty()) { + if (f_src_opin_delays[tile_index][from_ptc].empty()) { //During lookahead profiling we were unable to find any wires which connected //to this PTC. // @@ -342,286 +447,654 @@ float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float critica //Such RR graphs are of course unroutable, but that should be determined by the //router. So just return an arbitrary value here rather than error. - //We choose to return the largest (non-infinite) value possible, but scaled - //down by a large factor to maintain some dynaimc range in case this value ends - //up being processed (e.g. by the timing analyzer). - // - //The cost estimate should still be *extremely* large compared to a typical delay, and - //so should ensure that the router de-prioritizes exploring this path, but does not - //forbid the router from trying. - expected_cost = std::numeric_limits::max() / 1e12; + return std::pair(0.f, 0.f); } else { //From the current SOURCE/OPIN we look-up the wiretypes which are reachable //and then add the estimates from those wire types for the distance of interest. //If there are multiple options we use the minimum value. - for (const auto& kv : f_src_opin_reachable_wires[tile_index][from_ptc]) { + + float delay = 0; + float congestion = 0; + float expected_cost = std::numeric_limits::infinity(); + + for (const auto& kv : f_src_opin_delays[tile_index][from_ptc]) { const t_reachable_wire_inf& reachable_wire_inf = kv.second; - Cost_Entry wire_cost_entry; + util::Cost_Entry cost_entry; if (reachable_wire_inf.wire_rr_type == SINK) { //Some pins maybe reachable via a direct (OPIN -> IPIN) connection. //In the lookahead, we treat such connections as 'special' wire types //with no delay/congestion cost - wire_cost_entry.delay = 0; - wire_cost_entry.congestion = 0; + cost_entry.delay = 0; + cost_entry.congestion = 0; } else { //For an actual accessible wire, we query the wire look-up to get it's //delay and congestion cost estimates - wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, reachable_wire_inf.wire_seg_index, delta_x, delta_y); + cost_entry = cost_map_.find_cost(reachable_wire_inf.wire_seg_index, reachable_wire_inf.wire_rr_type, delta_x, delta_y); } - float this_cost = (criticality_fac) * (reachable_wire_inf.congestion + wire_cost_entry.congestion) - + (1. - criticality_fac) * (reachable_wire_inf.delay + wire_cost_entry.delay); + float this_cost = (criticality_fac) * (reachable_wire_inf.delay + cost_entry.delay) + + (1. - criticality_fac) * (reachable_wire_inf.congestion + cost_entry.congestion); - expected_cost = std::min(expected_cost, this_cost); + if (this_cost < expected_cost) { + delay = reachable_wire_inf.delay; + congestion = reachable_wire_inf.congestion; + } } + + return std::pair(delay, congestion); } - VTR_ASSERT_SAFE_MSG(std::isfinite(expected_cost), + VTR_ASSERT_SAFE_MSG(false, vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", rr_node_arch_name(size_t(from_node)).c_str(), describe_rr_node(size_t(from_node)).c_str()) .c_str()); + } - } else { - VTR_ASSERT_SAFE(from_type == CHANX || from_type == CHANY); - //When estimating costs from a wire, we directly look-up the result in the wire lookahead (f_wire_cost_map) - - int from_cost_index = rr_graph.node_cost_index(from_node); - int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; - - VTR_ASSERT(from_seg_index >= 0); - - /* now get the expected cost from our lookahead map */ - Cost_Entry cost_entry = get_wire_cost_entry(from_type, from_seg_index, delta_x, delta_y); + return std::pair(0.f, 0.f); +} - float expected_delay = cost_entry.delay; - float expected_congestion = cost_entry.congestion; +// derive a cost from the map between two nodes +float MapLookahead::get_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) const { + if (from_node_ind == to_node_ind) { + return 0.f; + } - if (rr_graph.node_type(to_node) == SINK) { - auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(to_node)][rr_graph.node_ylow(to_node)].type; - auto to_tile_index = to_tile_type->index; + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; - auto to_ptc = rr_graph.node_ptc_num(to_node); + auto from_node_type = device_ctx.rr_nodes[to_node_ind].type(); - if (f_chan_reachable_ipins[to_tile_index].size() != 0) { - for (const auto& kv : f_chan_reachable_ipins[to_tile_index][to_ptc]) { - const t_reachable_wire_inf& reachable_wire_inf = kv.second; + RRNodeId to_node(to_node_ind); + RRNodeId from_node(from_node_ind); - float this_delay = reachable_wire_inf.delay; - float this_congestion = reachable_wire_inf.congestion; + int dx, dy; + get_xy_deltas(from_node, to_node, &dx, &dy); - float this_cost = criticality_fac * (expected_delay + this_delay) + (1.0 - criticality_fac) * (expected_congestion + this_congestion); - expected_cost = std::min(this_cost, expected_cost); - } - } - } + float extra_delay, extra_congestion; + std::tie(extra_delay, extra_congestion) = this->get_src_opin_delays(from_node, dx, dy, criticality_fac); - if (cost_entry.delay == 0) { - expected_cost = std::numeric_limits::max() / 1e12; - } + int from_seg_index = cost_map_.node_to_segment(from_node_ind); + util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, from_node_type, dx, dy); - VTR_ASSERT_SAFE_MSG(std::isfinite(expected_cost), - vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(size_t(from_node)).c_str(), - describe_rr_node(size_t(from_node)).c_str()) - .c_str()); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay : %10.3g\n", expected_delay); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead cong : %10.3g\n", expected_congestion); + if (!cost_entry.valid()) { + // there is no route + VTR_LOGV_DEBUG(f_router_debug, + "Not connected %d (%s, %d) -> %d (%s)\n", + from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string()); + return std::numeric_limits::infinity(); } - VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost : %10.3g\n", expected_cost); + auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(to_node)][rr_graph.node_ylow(to_node)].type; + auto to_tile_index = to_tile_type->index; - return expected_cost; -} + auto to_ptc = rr_graph.node_ptc_num(to_node); -Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y) { - VTR_ASSERT_SAFE(rr_type == CHANX || rr_type == CHANY); + float site_pin_delay = std::numeric_limits::infinity(); + if (f_chan_ipins_delays[to_tile_index].size() != 0) { + for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; - int chan_index = 0; - if (rr_type == CHANY) { - chan_index = 1; + float this_delay = reachable_wire_inf.delay; + site_pin_delay = std::min(this_delay, site_pin_delay); + } } - VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(2)); - VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(3)); - - return f_wire_cost_map[chan_index][seg_index][delta_x][delta_y]; -} - -/* Computes the lookahead map to be used by the router. If a map was computed prior to this, a new one will not be computed again. - * The rr graph must have been built before calling this function. */ -void compute_router_lookahead(const std::vector& segment_inf) { - vtr::ScopedStartFinishTimer timer("Computing router lookahead map"); - - //First compute the delay map when starting from the various wire types - //(CHANX/CHANY)in the routing architecture - compute_router_wire_lookahead(segment_inf); + float expected_delay = cost_entry.delay + extra_delay; + float expected_congestion = cost_entry.congestion + extra_congestion; + + expected_delay += site_pin_delay; + + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + + VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node \n\t%s to \n\t%s\n", + describe_rr_node(from_node_ind).c_str(), + describe_rr_node(to_node_ind).c_str()); + const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; + VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) with distance (%zd, %zd)\n", + segment_name.c_str(), from_seg_index, + dx, dy); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); + VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); + VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); + + if (!std::isfinite(expected_cost) || expected_cost < 0.f) { + VTR_LOG_ERROR("invalid cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); + VTR_ASSERT(0); + } - //Next, compute which wire types are accessible (and the cost to reach them) - //from the different physical tile type's SOURCEs & OPINs - compute_router_src_opin_lookahead(); - compute_router_chan_ipin_lookahead(); + return expected_cost; } -static void compute_router_wire_lookahead(const std::vector& segment_inf) { - vtr::ScopedStartFinishTimer timer("Computing wire lookahead"); - +// add a best cost routing path from start_node_ind to node_ind to routing costs +template +static bool add_paths(int start_node_ind, + Entry current, + const std::vector& paths, + RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; auto& rr_graph = device_ctx.rr_nodes; - //Re-allocate - f_wire_cost_map = t_wire_cost_map({2, segment_inf.size(), device_ctx.grid.width(), device_ctx.grid.height()}); + int node_ind = current.rr_node_ind; + RRNodeId node(node_ind); - int longest_length = 0; - for (const auto& seg_inf : segment_inf) { - longest_length = std::max(longest_length, seg_inf.length); - } - - //Start sampling at the lower left non-corner - int ref_x = 1; - int ref_y = 1; - - //Sample from locations near the reference location (to capture maximum distance paths) - //Also sample from locations at least the longest wire length away from the edge (to avoid - //edge effects for shorter distances) - std::vector ref_increments = {0, 1, - longest_length, longest_length + 1}; - - //Uniquify the increments (avoid sampling the same locations repeatedly if they happen to - //overlap) - std::sort(ref_increments.begin(), ref_increments.end()); - ref_increments.erase(std::unique(ref_increments.begin(), ref_increments.end()), ref_increments.end()); - - //Upper right non-corner - int target_x = device_ctx.grid.width() - 2; - int target_y = device_ctx.grid.height() - 2; - - //Profile each wire segment type - for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) { - //First try to pick good representative sample locations for each type - std::map> sample_nodes; - for (e_rr_type chan_type : {CHANX, CHANY}) { - for (int ref_inc : ref_increments) { - int sample_x = ref_x + ref_inc; - int sample_y = ref_y + ref_inc; - - if (sample_x >= int(grid.width())) continue; - if (sample_y >= int(grid.height())) continue; - - for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) { - /* get the rr node index from which to start routing */ - RRNodeId start_node = get_start_node(sample_x, sample_y, - target_x, target_y, //non-corner upper right - chan_type, iseg, track_offset); - - if (!start_node) { - continue; - } + bool new_sample_found = false; - sample_nodes[chan_type].push_back(RRNodeId(start_node)); - } - } - } + auto to_tile_type = device_ctx.grid[rr_graph.node_xlow(node)][rr_graph.node_ylow(node)].type; + auto to_tile_index = to_tile_type->index; - //If we failed to find any representative sample locations, search exhaustively - // - //This is to ensure we sample 'unusual' wire types which may not exist in all channels - //(e.g. clock routing) - for (e_rr_type chan_type : {CHANX, CHANY}) { - if (!sample_nodes[chan_type].empty()) continue; + auto to_ptc = rr_graph.node_ptc_num(node); - //Try an exhaustive search to find a suitable sample point - for (int inode = 0; inode < int(device_ctx.rr_nodes.size()); ++inode) { - auto rr_node = RRNodeId(inode); - auto rr_type = rr_graph.node_type(rr_node); - if (rr_type != chan_type) continue; + float site_pin_delay = std::numeric_limits::infinity(); + if (f_chan_ipins_delays[to_tile_index].size() != 0) { + for (const auto& kv : f_chan_ipins_delays[to_tile_index][to_ptc]) { + const t_reachable_wire_inf& reachable_wire_inf = kv.second; - int cost_index = rr_graph.node_cost_index(rr_node); - VTR_ASSERT(cost_index != OPEN); + float this_delay = reachable_wire_inf.delay; + site_pin_delay = std::min(this_delay, site_pin_delay); + } + } - int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; + // reconstruct the path + std::vector path; + for (int i = paths[node_ind].parent; i != start_node_ind; i = paths[i].parent) { + VTR_ASSERT(i != -1); + path.push_back(i); + } + path.push_back(start_node_ind); - if (seg_index == iseg) { - sample_nodes[chan_type].push_back(RRNodeId(inode)); - } + current.adjust_Tsw(-site_pin_delay); - if (sample_nodes[chan_type].size() >= ref_increments.size()) { - break; - } - } - } + // add each node along the path subtracting the incremental costs from the current costs + Entry start_to_here(start_node_ind, UNDEFINED, nullptr); + int parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + RRNodeId this_node(*it); + auto& here = device_ctx.rr_nodes[*it]; + int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; - //Finally, now that we have a list of sample locations, run a Djikstra flood from - //each sample location to profile the routing network from this type + auto chan_type = rr_graph.node_type(node); - t_dijkstra_data dijkstra_data; - t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()}); + int ichan = 0; + if (chan_type == CHANY) { + ichan = 1; + } - for (e_rr_type chan_type : {CHANX, CHANY}) { - if (sample_nodes[chan_type].empty()) { - VTR_LOG_WARN("Unable to find any sample location for segment %s type '%s' (length %d)\n", - rr_node_typename[chan_type], - segment_inf[iseg].name.c_str(), - segment_inf[iseg].length); - } else { - //reset cost for this segment - routing_cost_map.fill(Expansion_Cost_Entry()); + int delta_x, delta_y; + get_xy_deltas(this_node, node, &delta_x, &delta_y); - for (RRNodeId sample_node : sample_nodes[chan_type]) { - int sample_x = rr_graph.node_xlow(sample_node); - int sample_y = rr_graph.node_ylow(sample_node); + vtr::Point delta(delta_x, delta_y); - if (rr_graph.node_direction(sample_node) == DEC_DIRECTION) { - sample_x = rr_graph.node_xhigh(sample_node); - sample_y = rr_graph.node_yhigh(sample_node); - } + RoutingCostKey key = { + ichan, + seg_index, + delta}; - run_dijkstra(sample_node, - sample_x, - sample_y, - routing_cost_map, - &dijkstra_data); - } + if (*it != start_node_ind) { + auto& parent_node = device_ctx.rr_nodes[parent]; + start_to_here = Entry(*it, parent_node.edge_switch(paths[*it].edge), &start_to_here); + parent = *it; + } - if (false) print_router_cost_map(routing_cost_map); + float cost = current.cost() - start_to_here.cost(); + if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) { + cost = 0.f; + } - /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead - * cost map */ - set_lookahead_map_costs(iseg, chan_type, routing_cost_map); + VTR_ASSERT(cost >= 0.f); - /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on - * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */ - fill_in_missing_lookahead_entries(iseg, chan_type); + // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + auto result = routing_costs->insert(std::make_pair(key, cost)); + if (!result.second) { + if (cost < result.first->second) { + result.first->second = cost; + new_sample_found = true; + } else if (BREAK_ON_MISS) { + break; } + } else { + new_sample_found = true; } } - - if (false) print_wire_cost_map(segment_inf); + return new_sample_found; } -static void compute_router_src_opin_lookahead() { - vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead"); +/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ +static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_nodes; - f_src_opin_reachable_wires.clear(); + e_rr_type from_type = rr_graph.node_type(from_node); + e_rr_type to_type = rr_graph.node_type(to_node); - f_src_opin_reachable_wires.resize(device_ctx.physical_tile_types.size()); + if (!is_chan(from_type) && !is_chan(to_type)) { + //Alternate formulation for non-channel types + int from_x = 0; + int from_y = 0; + adjust_rr_position(from_node, from_x, from_y); - std::vector rr_nodes_at_loc; + int to_x = 0; + int to_y = 0; + adjust_rr_position(to_node, to_x, to_y); - //We assume that the routing connectivity of each instance of a physical tile is the same, - //and so only measure one instance of each type - for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { - for (e_rr_type rr_type : {SOURCE, OPIN}) { - vtr::Point sample_loc(-1, -1); + *delta_x = to_x - from_x; + *delta_y = to_y - from_y; + } else { + //Traditional formulation - size_t num_sampled_locs = 0; - bool ptcs_with_no_reachable_wires = true; - while (ptcs_with_no_reachable_wires) { //Haven't found wire connected to ptc - ptcs_with_no_reachable_wires = false; + /* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire, + * chan coordinate is orthogonal to the wire */ + int from_seg_low; + int from_seg_high; + int from_chan; + int to_seg; + int to_chan; + if (from_type == CHANY) { + from_seg_low = rr_graph.node_ylow(from_node); + from_seg_high = rr_graph.node_yhigh(from_node); + from_chan = rr_graph.node_xlow(from_node); + to_seg = rr_graph.node_ylow(to_node); + to_chan = rr_graph.node_xlow(to_node); + } else { + from_seg_low = rr_graph.node_xlow(from_node); + from_seg_high = rr_graph.node_xhigh(from_node); + from_chan = rr_graph.node_ylow(from_node); + to_seg = rr_graph.node_xlow(to_node); + to_chan = rr_graph.node_ylow(to_node); + } + + /* now we want to count the minimum number of *channel segments* between the from and to nodes */ + int delta_seg, delta_chan; + + /* orthogonal to wire */ + int no_need_to_pass_by_clb = 0; //if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction + if (to_chan > from_chan + 1) { + /* above */ + delta_chan = to_chan - from_chan; + no_need_to_pass_by_clb = 1; + } else if (to_chan < from_chan) { + /* below */ + delta_chan = from_chan - to_chan + 1; + no_need_to_pass_by_clb = 1; + } else { + /* adjacent to current channel */ + delta_chan = 0; + no_need_to_pass_by_clb = 0; + } + + /* along same direction as wire. */ + if (to_seg > from_seg_high) { + /* ahead */ + delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb; + } else if (to_seg < from_seg_low) { + /* behind */ + delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb; + } else { + /* along the span of the wire */ + delta_seg = 0; + } + + /* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets + * that are opposite of the wire direction, let's add 1 to delta_seg */ + e_direction from_dir = rr_graph.node_direction(from_node); + if (is_chan(from_type) + && ((to_seg < from_seg_low && from_dir == INC_DIRECTION) || (to_seg > from_seg_high && from_dir == DEC_DIRECTION))) { + delta_seg++; + } + + if (from_type == CHANY) { + *delta_x = delta_chan; + *delta_y = delta_seg; + } else { + *delta_x = delta_seg; + *delta_y = delta_chan; + } + } + + VTR_ASSERT_SAFE(std::abs(*delta_x) < (int)device_ctx.grid.width()); + VTR_ASSERT_SAFE(std::abs(*delta_y) < (int)device_ctx.grid.height()); +} + +static void adjust_rr_position(const RRNodeId rr, int& x, int& y) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + e_rr_type rr_type = rr_graph.node_type(rr); + + if (is_chan(rr_type)) { + adjust_rr_wire_position(rr, x, y); + } else if (is_pin(rr_type)) { + adjust_rr_pin_position(rr, x, y); + } else { + VTR_ASSERT_SAFE(is_src_sink(rr_type)); + adjust_rr_src_sink_position(rr, x, y); + } +} + +static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y) { + /* + * VPR uses a co-ordinate system where wires above and to the right of a block + * are at the same location as the block: + * + * + * <-----------C + * D + * | +---------+ ^ + * | | | | + * | | (1,1) | | + * | | | | + * V +---------+ | + * A + * B-----------> + * + * So wires are located as follows: + * + * A: (1, 1) CHANY + * B: (1, 0) CHANX + * C: (1, 1) CHANX + * D: (0, 1) CHANY + * + * But all pins incident on the surrounding channels + * would be at (1,1) along with a relevant side. + * + * In the following, we adjust the positions of pins to + * account for the channel they are incident too. + * + * Note that blocks at (0,*) such as IOs, may have (unconnected) + * pins on the left side, so we also clip the minimum x to zero. + * Similarly for blocks at (*,0) we clip the minimum y to zero. + */ + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_pin(rr_graph.node_type(rr))); + VTR_ASSERT_SAFE(rr_graph.node_xlow(rr) == rr_graph.node_xhigh(rr)); + VTR_ASSERT_SAFE(rr_graph.node_ylow(rr) == rr_graph.node_yhigh(rr)); + + x = rr_graph.node_xlow(rr); + y = rr_graph.node_ylow(rr); + + e_side rr_side = rr_graph.node_side(rr); + + if (rr_side == LEFT) { + x -= 1; + x = std::max(x, 0); + } else if (rr_side == BOTTOM && y > 0) { + y -= 1; + y = std::max(y, 0); + } +} + +static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_chan(rr_graph.node_type(rr))); + + e_direction rr_dir = rr_graph.node_direction(rr); + + if (rr_dir == DEC_DIRECTION) { + x = rr_graph.node_xhigh(rr); + y = rr_graph.node_yhigh(rr); + } else if (rr_dir == INC_DIRECTION) { + x = rr_graph.node_xlow(rr); + y = rr_graph.node_ylow(rr); + } else { + VTR_ASSERT_SAFE(rr_dir == BI_DIRECTION); + //Not sure what to do here... + //Try average for now. + x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); + y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); + } +} + +static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) { + //SOURCE/SINK nodes assume the full dimensions of their + //associated block + // + //Use the average position. + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + VTR_ASSERT_SAFE(is_src_sink(rr_graph.node_type(rr))); + + x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); + y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); +} + +/* Runs Dijkstra's algorithm from specified node until all nodes have been + * visited. Each time a pin is visited, the delay/congestion information + * to that pin is stored to an entry in the routing_cost_map. + * + * Returns the maximum (last) minimum cost path stored, and + * the number of paths from start_node_ind stored. */ +template +static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, + RoutingCosts* routing_costs) { + auto& device_ctx = g_vpr_ctx.device(); + int path_count = 0; + + /* a list of boolean flags (one for each rr node) to figure out if a + * certain node has already been expanded */ + std::fill(node_expanded->begin(), node_expanded->end(), false); + /* For each node keep a list of the cost with which that node has been + * visited (used to determine whether to push a candidate node onto the + * expansion queue. + * Also store the parent node so we can reconstruct a specific path. */ + std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), -1, -1}); + /* a priority queue for expansion */ + std::priority_queue, std::greater> pq; + + /* first entry has no upstream delay or congestion */ + Entry first_entry(start_node_ind, UNDEFINED, nullptr); + float max_cost = first_entry.cost(); + + pq.push(first_entry); + + /* now do routing */ + while (!pq.empty()) { + auto current = pq.top(); + pq.pop(); + + int node_ind = current.rr_node_ind; + + /* check that we haven't already expanded from this node */ + if ((*node_expanded)[node_ind]) { + continue; + } + + /* if this node is an ipin record its congestion/delay in the routing_cost_map */ + if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + // the last cost should be the highest + max_cost = current.cost(); + + path_count++; + add_paths(start_node_ind, current, *paths, routing_costs); + } else { + util::expand_dijkstra_neighbours(device_ctx.rr_nodes, + current, paths, node_expanded, &pq); + (*node_expanded)[node_ind] = true; + } + } + return std::make_pair(max_cost, path_count); +} + +// compute the cost maps for lookahead +void MapLookahead::compute(const std::vector& segment_inf) { + compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); + + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + // Initialize rr_node_route_inf if not already + alloc_and_load_rr_node_route_structs(); + + size_t num_segments = segment_inf.size(); + std::vector sample_regions = find_sample_regions(num_segments); + + /* free previous delay map and allocate new one */ + auto& device_ctx = g_vpr_ctx.device(); + cost_map_.set_counts(segment_inf.size()); + cost_map_.build_segment_map(); + + VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); + RoutingCosts all_delay_costs; + RoutingCosts all_base_costs; + + /* run Dijkstra's algorithm for each segment type & channel type combination */ +#if defined(VPR_USE_TBB) + tbb::mutex all_costs_mutex; + tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { +#else + for (const auto& region : sample_regions) { +#endif + // holds the cost entries for a run + RoutingCosts delay_costs; + RoutingCosts base_costs; + int total_path_count = 0; + std::vector node_expanded(device_ctx.rr_nodes.size()); + std::vector paths(device_ctx.rr_nodes.size()); + + for (auto& point : region.points) { + // statistics + vtr::Timer run_timer; + float max_delay_cost = 0.f; + float max_base_cost = 0.f; + int path_count = 0; + for (auto node_ind : point.nodes) { + { + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &delay_costs); + max_delay_cost = std::max(max_delay_cost, result.first); + path_count += result.second; + } + { + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &base_costs); + max_base_cost = std::max(max_base_cost, result.first); + path_count += result.second; + } + } + + if (path_count > 0) { + VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", + path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type, + point.location.x(), point.location.y(), + (int)point.nodes.size(), + max_delay_cost, max_base_cost, + path_count / run_timer.elapsed_sec()); + } + + /* + * if (path_count == 0) { + * for (auto node_ind : point.nodes) { + * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); + * } + * } + */ + + total_path_count += path_count; + if (total_path_count > MIN_PATH_COUNT) { + break; + } + } + +#if defined(VPR_USE_TBB) + all_costs_mutex.lock(); +#endif + + if (total_path_count == 0) { + VTR_LOG_WARN("No paths found for sample region %s(%d, %d)\n", + segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); + } + + // combine the cost map from this run with the final cost maps for each segment + for (const auto& cost : delay_costs) { + const auto& val = cost.second; + auto result = all_delay_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); + } + } + for (const auto& cost : base_costs) { + const auto& val = cost.second; + auto result = all_base_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); + } + } + +#if defined(VPR_USE_TBB) + all_costs_mutex.unlock(); + }); +#else + } +#endif + + VTR_LOG("Combining results\n"); + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + cost_map_.set_cost_map(all_delay_costs, all_base_costs); + +// diagnostics +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) + for (auto& cost : all_costs) { + const auto& key = cost.first; + const auto& val = cost.second; + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + val.from_node, val.to_node, + key.delta.x(), key.delta.y(), + val.cost_entry.delay, val.cost_entry.congestion); + } +#endif + +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS) + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + cost_map_.print(iseg); + } +#endif + +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_EMPTY_MAPS) + for (std::pair p : cost_map_.list_empty()) { + int ichan, iseg; + std::tie(ichan, iseg) = p; + VTR_LOG("cost map for %s(%d), chan %d EMPTY\n", + segment_inf[iseg].name.c_str(), iseg, box_id); + } +#endif +} + +static void compute_router_src_opin_lookahead() { + vtr::ScopedStartFinishTimer timer("Computing src/opin lookahead"); + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + + f_src_opin_delays.clear(); + + f_src_opin_delays.resize(device_ctx.physical_tile_types.size()); + + std::vector rr_nodes_at_loc; + + //We assume that the routing connectivity of each instance of a physical tile is the same, + //and so only measure one instance of each type + for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { + for (e_rr_type rr_type : {SOURCE, OPIN}) { + vtr::Point sample_loc(-1, -1); + + size_t num_sampled_locs = 0; + bool ptcs_with_no_delays = true; + while (ptcs_with_no_delays) { //Haven't found wire connected to ptc + ptcs_with_no_delays = false; sample_loc = pick_sample_tile(&device_ctx.physical_tile_types[itile], sample_loc); @@ -646,28 +1119,28 @@ static void compute_router_src_opin_lookahead() { int ptc = rr_graph.node_ptc_num(node_id); - if (ptc >= int(f_src_opin_reachable_wires[itile].size())) { - f_src_opin_reachable_wires[itile].resize(ptc + 1); //Inefficient but functional... + if (ptc >= int(f_src_opin_delays[itile].size())) { + f_src_opin_delays[itile].resize(ptc + 1); //Inefficient but functional... } //Find the wire types which are reachable from inode and record them and //the cost to reach them - dijkstra_flood_to_wires(itile, node_id, f_src_opin_reachable_wires); + dijkstra_flood_to_wires(itile, node_id, f_src_opin_delays); - if (f_src_opin_reachable_wires[itile][ptc].empty()) { - VTR_LOG_WARN("Found no reachable wires from %s (%s) at (%d,%d)\n", - rr_node_typename[rr_type], - rr_node_arch_name(inode).c_str(), - sample_loc.x(), - sample_loc.y()); + if (f_src_opin_delays[itile][ptc].empty()) { + VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", + rr_node_typename[rr_type], + rr_node_arch_name(inode).c_str(), + sample_loc.x(), + sample_loc.y()); - ptcs_with_no_reachable_wires = true; + ptcs_with_no_delays = true; } } ++num_sampled_locs; } - if (ptcs_with_no_reachable_wires) { + if (ptcs_with_no_delays) { VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n"); } } @@ -678,9 +1151,9 @@ static void compute_router_chan_ipin_lookahead() { vtr::ScopedStartFinishTimer timer("Computing chan/ipin lookahead"); auto& device_ctx = g_vpr_ctx.device(); - f_chan_reachable_ipins.clear(); + f_chan_ipins_delays.clear(); - f_chan_reachable_ipins.resize(device_ctx.physical_tile_types.size()); + f_chan_ipins_delays.resize(device_ctx.physical_tile_types.size()); std::vector rr_nodes_at_loc; @@ -716,7 +1189,7 @@ static void compute_router_chan_ipin_lookahead() { //Find the IPINs which are reachable from the wires within the bounding box //around the selected tile location - dijkstra_flood_to_ipins(node_id, f_chan_reachable_ipins); + dijkstra_flood_to_ipins(node_id, f_chan_ipins_delays); } } } @@ -747,7 +1220,6 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: if (grid[x][y].type->index == tile_type->index) { loc.set_x(x); loc.set_y(y); - VTR_LOG("RETURN LOC! %s (%d, %d)\n", tile_type->name, x, y); return loc; } } @@ -763,7 +1235,7 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: return loc; } -void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wires& src_opin_reachable_wires) { +static void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_delays& src_opin_delays) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_nodes; @@ -828,12 +1300,12 @@ void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wire } //Keep costs of the best path to reach each wire type - if (!src_opin_reachable_wires[itile][ptc].count(seg_index) - || curr.delay < src_opin_reachable_wires[itile][ptc][seg_index].delay) { - src_opin_reachable_wires[itile][ptc][seg_index].wire_rr_type = curr_rr_type; - src_opin_reachable_wires[itile][ptc][seg_index].wire_seg_index = seg_index; - src_opin_reachable_wires[itile][ptc][seg_index].delay = curr.delay; - src_opin_reachable_wires[itile][ptc][seg_index].congestion = curr.congestion; + if (!src_opin_delays[itile][ptc].count(seg_index) + || curr.delay < src_opin_delays[itile][ptc][seg_index].delay) { + src_opin_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; + src_opin_delays[itile][ptc][seg_index].wire_seg_index = seg_index; + src_opin_delays[itile][ptc][seg_index].delay = curr.delay; + src_opin_delays[itile][ptc][seg_index].congestion = curr.congestion; } } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) { @@ -860,7 +1332,7 @@ void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wire } } -void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins) { +static void dijkstra_flood_to_ipins(RRNodeId node, t_chan_ipins_delays& chan_ipins_delays) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_nodes; @@ -918,15 +1390,15 @@ void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachab int ptc = rr_graph.node_ptc_num(curr.node); - if (ptc >= int(chan_reachable_ipins[itile].size())) { - chan_reachable_ipins[itile].resize(ptc + 1); //Inefficient but functional... + if (ptc >= int(chan_ipins_delays[itile].size())) { + chan_ipins_delays[itile].resize(ptc + 1); //Inefficient but functional... } //Keep costs of the best path to reach each wire type - chan_reachable_ipins[itile][ptc][seg_index].wire_rr_type = curr_rr_type; - chan_reachable_ipins[itile][ptc][seg_index].wire_seg_index = seg_index; - chan_reachable_ipins[itile][ptc][seg_index].delay = curr.delay; - chan_reachable_ipins[itile][ptc][seg_index].congestion = curr.congestion; + chan_ipins_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; + chan_ipins_delays[itile][ptc][seg_index].wire_seg_index = seg_index; + chan_ipins_delays[itile][ptc][seg_index].delay = curr.delay; + chan_ipins_delays[itile][ptc][seg_index].congestion = curr.congestion; } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) { if (curr.level >= MAX_EXPANSION_LEVEL) { continue; @@ -958,621 +1430,140 @@ void dijkstra_flood_to_ipins(RRNodeId node, t_chan_reachable_ipins& chan_reachab } } -/* returns index of a node from which to start routing */ -static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) { +// get an expected minimum cost for routing from the current node to the target node +float MapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - int result = UNDEFINED; + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); - if (rr_type != CHANX && rr_type != CHANY) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Must start lookahead routing from CHANX or CHANY node\n"); + if (rr_type == CHANX || rr_type == CHANY) { + return get_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); } +} - /* determine which direction the wire should go in based on the start & target coordinates */ - e_direction direction = INC_DIRECTION; - if ((rr_type == CHANX && target_x < start_x) || (rr_type == CHANY && target_y < start_y)) { - direction = DEC_DIRECTION; - } +#ifndef VTR_ENABLE_CAPNPROTO - int start_lookup_x = start_x; - int start_lookup_y = start_y; - if (rr_type == CHANX) { - //Bizarely, rr_node_indices stores CHANX with swapped x/y... - std::swap(start_lookup_x, start_lookup_y); - } +void MapLookahead::read(const std::string& file) { + VPR_THROW(VPR_ERROR_ROUTE, "MapLookahead::read not implemented"); +} +void MapLookahead::write(const std::string& file) const { + VPR_THROW(VPR_ERROR_ROUTE, "MapLookahead::write not implemented"); +} - const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type][start_lookup_x][start_lookup_y][0]; +#else - /* find first node in channel that has specified segment index and goes in the desired direction */ - for (unsigned itrack = 0; itrack < channel_node_list.size(); itrack++) { - int node_ind = channel_node_list[itrack]; - if (node_ind < 0) continue; +void MapLookahead::read(const std::string& file) { + cost_map_.read(file); - RRNodeId node_id(node_ind); - - VTR_ASSERT(rr_graph.node_type(node_id) == rr_type); - - e_direction node_direction = rr_graph.node_direction(node_id); - int node_cost_ind = rr_graph.node_cost_index(node_id); - int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; - - if ((node_direction == direction || node_direction == BI_DIRECTION) && node_seg_ind == seg_index) { - /* found first track that has the specified segment index and goes in the desired direction */ - result = node_ind; - if (track_offset == 0) { - break; - } - track_offset -= 2; - } - } - - return RRNodeId(result); + compute_router_src_opin_lookahead(); + compute_router_chan_ipin_lookahead(); } - -/* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information - * to that pin is stored is added to an entry in the routing_cost_map */ -static void run_dijkstra(RRNodeId start_node, int start_x, int start_y, t_routing_cost_map& routing_cost_map, t_dijkstra_data* data) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - auto& node_expanded = data->node_expanded; - node_expanded.resize(device_ctx.rr_nodes.size()); - std::fill(node_expanded.begin(), node_expanded.end(), false); - - auto& node_visited_costs = data->node_visited_costs; - node_visited_costs.resize(device_ctx.rr_nodes.size()); - std::fill(node_visited_costs.begin(), node_visited_costs.end(), -1.0); - - /* a priority queue for expansion */ - std::priority_queue& pq = data->pq; - - //Clear priority queue if non-empty - while (!pq.empty()) { - pq.pop(); - } - - /* first entry has no upstream delay or congestion */ - PQ_Entry first_entry(start_node, UNDEFINED, 0, 0, 0, true); - - pq.push(first_entry); - - /* now do routing */ - while (!pq.empty()) { - PQ_Entry current = pq.top(); - pq.pop(); - - RRNodeId curr_node = current.rr_node; - - /* check that we haven't already expanded from this node */ - if (node_expanded[curr_node]) { - continue; - } - - //VTR_LOG("Expanding with delay=%10.3g cong=%10.3g (%s)\n", current.delay, current.congestion_upstream, describe_rr_node(curr_node).c_str()); - - /* if this node is an ipin record its congestion/delay in the routing_cost_map */ - if (rr_graph.node_type(curr_node) == IPIN) { - int ipin_x = rr_graph.node_xlow(curr_node); - int ipin_y = rr_graph.node_ylow(curr_node); - - if (ipin_x >= start_x && ipin_y >= start_y) { - int delta_x, delta_y; - get_xy_deltas(start_node, curr_node, &delta_x, &delta_y); - delta_x = std::abs(delta_x); - delta_y = std::abs(delta_y); - - routing_cost_map[delta_x][delta_y].add_cost_entry(current.delay, current.congestion_upstream); - } - } - - expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); - node_expanded[curr_node] = true; - } +void MapLookahead::write(const std::string& file) const { + cost_map_.write(file); } -/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ -static void expand_dijkstra_neighbours(PQ_Entry parent_entry, vtr::vector& node_visited_costs, vtr::vector& node_expanded, std::priority_queue& pq) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - RRNodeId parent = parent_entry.rr_node; - - for (RREdgeId edge : rr_graph.edge_range(parent)) { - RRNodeId child_node = rr_graph.edge_sink_node(edge); - int switch_ind = rr_graph.edge_switch(edge); - - if (rr_graph.node_type(child_node) == SINK) return; - - /* skip this child if it has already been expanded from */ - if (node_expanded[child_node]) { - continue; - } - - PQ_Entry child_entry(child_node, switch_ind, parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); - - //VTR_ASSERT(child_entry.cost >= 0); //Asertion fails in practise. TODO: debug - - /* skip this child if it has been visited with smaller cost */ - if (node_visited_costs[child_node] >= 0 && node_visited_costs[child_node] < child_entry.cost) { - continue; - } - - /* finally, record the cost with which the child was visited and put the child entry on the queue */ - node_visited_costs[child_node] = child_entry.cost; - pq.push(child_entry); - } +static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { + out->delay = in.getDelay(); + out->congestion = in.getCongestion(); + out->fill = in.getFill(); } -/* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */ -static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) { - int chan_index = 0; - if (chan_type == CHANY) { - chan_index = 1; - } - - /* set the lookahead cost map entries with a representative cost entry from routing_cost_map */ - for (unsigned ix = 0; ix < routing_cost_map.dim_size(0); ix++) { - for (unsigned iy = 0; iy < routing_cost_map.dim_size(1); iy++) { - Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[ix][iy]; - - f_wire_cost_map[chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD); - } - } +static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { + out->setDelay(in.delay); + out->setCongestion(in.congestion); + out->setFill(in.fill); } -/* fills in missing lookahead map entries by copying the cost of the closest valid entry */ -static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type) { - int chan_index = 0; - if (chan_type == CHANY) { - chan_index = 1; - } - - auto& device_ctx = g_vpr_ctx.device(); - - /* find missing cost entries and fill them in by copying a nearby cost entry */ - for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) { - for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) { - Cost_Entry cost_entry = f_wire_cost_map[chan_index][segment_index][ix][iy]; - - if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) { - Cost_Entry copied_entry = get_nearby_cost_entry(ix, iy, segment_index, chan_index); - f_wire_cost_map[chan_index][segment_index][ix][iy] = copied_entry; - } - } - } +static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { + *out = std::make_pair(in.getX(), in.getY()); } -/* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */ -static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index) { - /* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates - * of the cost entry to be copied */ - - //VTR_ASSERT(x > 0 || y > 0); //Asertion fails in practise. TODO: debug - - float slope; - if (x == 0) { - slope = 1e12; //just a really large number - } else if (y == 0) { - slope = 1e-12; //just a really small number - } else { - slope = (float)y / (float)x; - } - - int copy_x, copy_y; - if (slope >= 1.0) { - copy_y = y - 1; - copy_x = vtr::nint((float)y / slope); - } else { - copy_x = x - 1; - copy_y = vtr::nint((float)x * slope); - } - - copy_y = std::max(copy_y, 0); //Clip to zero - copy_x = std::max(copy_x, 0); //Clip to zero - - Cost_Entry copy_entry = f_wire_cost_map[chan_index][segment_index][copy_x][copy_y]; - - /* if the entry to be copied is also empty, recurse */ - if (std::isnan(copy_entry.delay) && std::isnan(copy_entry.congestion)) { - if (copy_x == 0 && copy_y == 0) { - copy_entry = Cost_Entry(0., 0.); //(0, 0) entry is invalid so set zero to terminate recursion - } else { - copy_entry = get_nearby_cost_entry(copy_x, copy_y, segment_index, chan_index); - } - } - - return copy_entry; +static void FromVprVector2D(VprVector2D::Builder* out, const std::pair& in) { + out->setX(in.first); + out->setY(in.second); } -/* returns cost entry with the smallest delay */ -Cost_Entry Expansion_Cost_Entry::get_smallest_entry() { - Cost_Entry smallest_entry; - - for (auto entry : this->cost_vector) { - if (std::isnan(smallest_entry.delay) || entry.delay < smallest_entry.delay) { - smallest_entry = entry; - } - } - - return smallest_entry; +static void ToMatrixCostEntry(vtr::NdMatrix* out, + const Matrix::Reader& in) { + ToNdMatrix<2, VprCostEntry, util::Cost_Entry>(out, in, ToCostEntry); } -/* returns a cost entry that represents the average of all the recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_average_entry() { - float avg_delay = 0; - float avg_congestion = 0; - - for (auto cost_entry : this->cost_vector) { - avg_delay += cost_entry.delay; - avg_congestion += cost_entry.congestion; - } - - avg_delay /= (float)this->cost_vector.size(); - avg_congestion /= (float)this->cost_vector.size(); - - return Cost_Entry(avg_delay, avg_congestion); +static void FromMatrixCostEntry( + Matrix::Builder* out, + const vtr::NdMatrix& in) { + FromNdMatrix<2, VprCostEntry, util::Cost_Entry>( + out, in, FromCostEntry); } -/* returns a cost entry that represents the geomean of all the recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_geomean_entry() { - float geomean_delay = 0; - float geomean_cong = 0; - for (auto cost_entry : this->cost_vector) { - geomean_delay += log(cost_entry.delay); - geomean_cong += log(cost_entry.congestion); - } - - geomean_delay = exp(geomean_delay / (float)this->cost_vector.size()); - geomean_cong = exp(geomean_cong / (float)this->cost_vector.size()); - - return Cost_Entry(geomean_delay, geomean_cong); +static void ToFloat(float* out, const VprFloatEntry::Reader& in) { + // Getting a scalar field is always "get()". + *out = in.getValue(); } -/* returns a cost entry that represents the medial of all recorded entries */ -Cost_Entry Expansion_Cost_Entry::get_median_entry() { - /* find median by binning the delays of all entries and then chosing the bin - * with the largest number of entries */ - - int num_bins = 10; - - /* find entries with smallest and largest delays */ - Cost_Entry min_del_entry; - Cost_Entry max_del_entry; - for (auto entry : this->cost_vector) { - if (std::isnan(min_del_entry.delay) || entry.delay < min_del_entry.delay) { - min_del_entry = entry; - } - if (std::isnan(max_del_entry.delay) || entry.delay > max_del_entry.delay) { - max_del_entry = entry; - } - } - - /* get the bin size */ - float delay_diff = max_del_entry.delay - min_del_entry.delay; - float bin_size = delay_diff / (float)num_bins; - - /* sort the cost entries into bins */ - std::vector> entry_bins(num_bins, std::vector()); - for (auto entry : this->cost_vector) { - float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size); - - VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins); - if (vtr::nint(bin_num) == num_bins) { - /* largest entry will otherwise have an out-of-bounds bin number */ - bin_num -= 1; - } - entry_bins[vtr::nint(bin_num)].push_back(entry); - } - - /* find the bin with the largest number of elements */ - int largest_bin = 0; - int largest_size = 0; - for (int ibin = 0; ibin < num_bins; ibin++) { - if (entry_bins[ibin].size() > (unsigned)largest_size) { - largest_bin = ibin; - largest_size = (unsigned)entry_bins[ibin].size(); - } - } - - /* get the representative delay of the largest bin */ - Cost_Entry representative_entry = entry_bins[largest_bin][0]; - - return representative_entry; +static void FromFloat(VprFloatEntry::Builder* out, const float& in) { + // Setting a scalar field is always "set(value)". + out->setValue(in); } -/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ -static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - e_rr_type from_type = rr_graph.node_type(from_node); - e_rr_type to_type = rr_graph.node_type(to_node); - - if (!is_chan(from_type) && !is_chan(to_type)) { - //Alternate formulation for non-channel types - int from_x = 0; - int from_y = 0; - adjust_rr_position(from_node, from_x, from_y); - - int to_x = 0; - int to_y = 0; - adjust_rr_position(to_node, to_x, to_y); - - *delta_x = to_x - from_x; - *delta_y = to_y - from_y; - } else { - //Traditional formulation - - /* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire, - * chan coordinate is orthogonal to the wire */ - int from_seg_low; - int from_seg_high; - int from_chan; - int to_seg; - int to_chan; - if (from_type == CHANY) { - from_seg_low = rr_graph.node_ylow(from_node); - from_seg_high = rr_graph.node_yhigh(from_node); - from_chan = rr_graph.node_xlow(from_node); - to_seg = rr_graph.node_ylow(to_node); - to_chan = rr_graph.node_xlow(to_node); - } else { - from_seg_low = rr_graph.node_xlow(from_node); - from_seg_high = rr_graph.node_xhigh(from_node); - from_chan = rr_graph.node_ylow(from_node); - to_seg = rr_graph.node_xlow(to_node); - to_chan = rr_graph.node_ylow(to_node); - } - - /* now we want to count the minimum number of *channel segments* between the from and to nodes */ - int delta_seg, delta_chan; - - /* orthogonal to wire */ - int no_need_to_pass_by_clb = 0; //if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction - if (to_chan > from_chan + 1) { - /* above */ - delta_chan = to_chan - from_chan; - no_need_to_pass_by_clb = 1; - } else if (to_chan < from_chan) { - /* below */ - delta_chan = from_chan - to_chan + 1; - no_need_to_pass_by_clb = 1; - } else { - /* adjacent to current channel */ - delta_chan = 0; - no_need_to_pass_by_clb = 0; - } - - /* along same direction as wire. */ - if (to_seg > from_seg_high) { - /* ahead */ - delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb; - } else if (to_seg < from_seg_low) { - /* behind */ - delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb; - } else { - /* along the span of the wire */ - delta_seg = 0; - } +void CostMap::read(const std::string& file) { + build_segment_map(); + MmapFile f(file); - /* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets - * that are opposite of the wire direction, let's add 1 to delta_seg */ - e_direction from_dir = rr_graph.node_direction(from_node); - if (is_chan(from_type) - && ((to_seg < from_seg_low && from_dir == INC_DIRECTION) || (to_seg > from_seg_high && from_dir == DEC_DIRECTION))) { - delta_seg++; - } + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - if (from_type == CHANY) { - *delta_x = delta_chan; - *delta_y = delta_seg; - } else { - *delta_x = delta_seg; - *delta_y = delta_chan; - } + auto cost_map = reader.getRoot(); + { + const auto& offset = cost_map.getOffset(); + ToNdMatrix<2, VprVector2D, std::pair>( + &offset_, offset, ToVprVector2D); } - VTR_ASSERT_SAFE(std::abs(*delta_x) < (int)device_ctx.grid.width()); - VTR_ASSERT_SAFE(std::abs(*delta_y) < (int)device_ctx.grid.height()); -} - -static void adjust_rr_position(const RRNodeId rr, int& x, int& y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - e_rr_type rr_type = rr_graph.node_type(rr); - - if (is_chan(rr_type)) { - adjust_rr_wire_position(rr, x, y); - } else if (is_pin(rr_type)) { - adjust_rr_pin_position(rr, x, y); - } else { - VTR_ASSERT_SAFE(is_src_sink(rr_type)); - adjust_rr_src_sink_position(rr, x, y); + { + const auto& cost_maps = cost_map.getCostMap(); + ToNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_map_, cost_maps, ToMatrixCostEntry); } -} - -static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y) { - /* - * VPR uses a co-ordinate system where wires above and to the right of a block - * are at the same location as the block: - * - * - * <-----------C - * D - * | +---------+ ^ - * | | | | - * | | (1,1) | | - * | | | | - * V +---------+ | - * A - * B-----------> - * - * So wires are located as follows: - * - * A: (1, 1) CHANY - * B: (1, 0) CHANX - * C: (1, 1) CHANX - * D: (0, 1) CHANY - * - * But all pins incident on the surrounding channels - * would be at (1,1) along with a relevant side. - * - * In the following, we adjust the positions of pins to - * account for the channel they are incident too. - * - * Note that blocks at (0,*) such as IOs, may have (unconnected) - * pins on the left side, so we also clip the minimum x to zero. - * Similarly for blocks at (*,0) we clip the minimum y to zero. - */ - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - VTR_ASSERT_SAFE(is_pin(rr_graph.node_type(rr))); - VTR_ASSERT_SAFE(rr_graph.node_xlow(rr) == rr_graph.node_xhigh(rr)); - VTR_ASSERT_SAFE(rr_graph.node_ylow(rr) == rr_graph.node_yhigh(rr)); - - x = rr_graph.node_xlow(rr); - y = rr_graph.node_ylow(rr); - - e_side rr_side = rr_graph.node_side(rr); - - if (rr_side == LEFT) { - x -= 1; - x = std::max(x, 0); - } else if (rr_side == BOTTOM && y > 0) { - y -= 1; - y = std::max(y, 0); + { + const auto& penalty = cost_map.getPenalty(); + ToNdMatrix<2, VprFloatEntry, float>( + &penalty_, penalty, ToFloat); } } -static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y) { - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - VTR_ASSERT_SAFE(is_chan(rr_graph.node_type(rr))); +void CostMap::write(const std::string& file) const { + ::capnp::MallocMessageBuilder builder; - e_direction rr_dir = rr_graph.node_direction(rr); + auto cost_map = builder.initRoot(); - if (rr_dir == DEC_DIRECTION) { - x = rr_graph.node_xhigh(rr); - y = rr_graph.node_yhigh(rr); - } else if (rr_dir == INC_DIRECTION) { - x = rr_graph.node_xlow(rr); - y = rr_graph.node_ylow(rr); - } else { - VTR_ASSERT_SAFE(rr_dir == BI_DIRECTION); - //Not sure what to do here... - //Try average for now. - x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); - y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); + { + auto offset = cost_map.initOffset(); + FromNdMatrix<2, VprVector2D, std::pair>( + &offset, offset_, FromVprVector2D); } -} - -static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) { - //SOURCE/SINK nodes assume the full dimensions of their - //associated block - // - //Use the average position. - auto& device_ctx = g_vpr_ctx.device(); - auto& rr_graph = device_ctx.rr_nodes; - - VTR_ASSERT_SAFE(is_src_sink(rr_graph.node_type(rr))); - - x = vtr::nint((rr_graph.node_xlow(rr) + rr_graph.node_xhigh(rr)) / 2.); - y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); -} -static void print_wire_cost_map(const std::vector& segment_inf) { - auto& device_ctx = g_vpr_ctx.device(); - - for (size_t chan_index = 0; chan_index < f_wire_cost_map.dim_size(0); chan_index++) { - for (size_t iseg = 0; iseg < f_wire_cost_map.dim_size(1); iseg++) { - vtr::printf("Seg %d (%s, length %d) %d\n", - iseg, - segment_inf[iseg].name.c_str(), - segment_inf[iseg].length, - chan_index); - for (size_t iy = 0; iy < device_ctx.grid.height(); iy++) { - for (size_t ix = 0; ix < device_ctx.grid.width(); ix++) { - vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[chan_index][iseg][ix][iy].delay); - } - vtr::printf("\n"); - } - vtr::printf("\n\n"); - } + { + auto cost_maps = cost_map.initCostMap(); + FromNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_maps, cost_map_, FromMatrixCostEntry); } -} -static void print_router_cost_map(const t_routing_cost_map& router_cost_map) { - VTR_LOG("Djikstra Flood Costs:\n"); - for (size_t x = 0; x < router_cost_map.dim_size(0); x++) { - for (size_t y = 0; y < router_cost_map.dim_size(1); y++) { - VTR_LOG("(%zu,%zu):\n", x, y); - - for (size_t i = 0; i < router_cost_map[x][y].cost_vector.size(); ++i) { - Cost_Entry entry = router_cost_map[x][y].cost_vector[i]; - VTR_LOG(" %d: delay=%10.3g cong=%10.3g\n", i, entry.delay, entry.congestion); - } - } + { + auto penalty = cost_map.initPenalty(); + FromNdMatrix<2, VprFloatEntry, float>( + &penalty, penalty_, FromFloat); } -} - -// -// When writing capnp targetted serialization, always allow compilation when -// VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception -// instead. -// -#ifndef VTR_ENABLE_CAPNPROTO - -# define DISABLE_ERROR \ - "is disabled because VTR_ENABLE_CAPNPROTO=OFF." \ - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." - -void read_router_lookahead(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::read " DISABLE_ERROR); -} - -void DeltaDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::write " DISABLE_ERROR); -} - -#else /* VTR_ENABLE_CAPNPROTO */ - -static void ToCostEntry(Cost_Entry* out, const VprMapCostEntry::Reader& in) { - out->delay = in.getDelay(); - out->congestion = in.getCongestion(); -} - -static void FromCostEntry(VprMapCostEntry::Builder* out, const Cost_Entry& in) { - out->setDelay(in.delay); - out->setCongestion(in.congestion); -} - -void read_router_lookahead(const std::string& file) { - vtr::ScopedStartFinishTimer timer("Loading router wire lookahead map"); - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - auto map = reader.getRoot(); - - ToNdMatrix<4, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry); -} - -void write_router_lookahead(const std::string& file) { - ::capnp::MallocMessageBuilder builder; - - auto map = builder.initRoot(); - - auto cost_map = map.initCostMap(); - FromNdMatrix<4, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry); writeMessageToFile(file, &builder); } - #endif diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h index e37c9967a00..4d8b043f0e6 100644 --- a/vpr/src/route/router_lookahead_map.h +++ b/vpr/src/route/router_lookahead_map.h @@ -1,48 +1,84 @@ -#pragma once +#ifndef CONNECTION_BOX_LOOKAHEAD_H_ +#define CONNECTION_BOX_LOOKAHEAD_H_ -#include -#include -#include "vtr_ndmatrix.h" +#include +#include "physical_types.h" #include "router_lookahead.h" +#include "router_lookahead_map_utils.h" +#include "vtr_geometry.h" -class MapLookahead : public RouterLookahead { - protected: - float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; - void compute(const std::vector& segment_inf) override; - void read(const std::string& file) override; - void write(const std::string& file) const override; -}; +// Keys in the RoutingCosts map +struct RoutingCostKey { + // index of the channel (CHANX or CHANY) + int chan_index; -/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates - * to travel relative x/y distances */ -class Cost_Entry { - public: - float delay; - float congestion; + // segment type index + int seg_index; + + // offset of the destination connection box from the starting segment + vtr::Point delta; + + RoutingCostKey() + : seg_index(-1) + , delta(0, 0) {} + RoutingCostKey(int chan_index_arg, int seg_index_arg, vtr::Point delta_arg) + : chan_index(chan_index_arg) + , seg_index(seg_index_arg) + , delta(delta_arg) {} - Cost_Entry() - : Cost_Entry(std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN()) { + bool operator==(const RoutingCostKey& other) const { + return seg_index == other.seg_index && chan_index == other.chan_index && delta == other.delta; } +}; - Cost_Entry(float set_delay, float set_congestion) { - delay = set_delay; - congestion = set_congestion; +// hash implementation for RoutingCostKey +struct HashRoutingCostKey { + std::size_t operator()(RoutingCostKey const& key) const noexcept { + std::size_t hash = std::hash{}(key.chan_index); + vtr::hash_combine(hash, key.seg_index); + vtr::hash_combine(hash, key.delta.x()); + vtr::hash_combine(hash, key.delta.y()); + return hash; } }; -/* provides delay/congestion estimates to travel specified distances - * in the x/y direction */ -typedef vtr::NdMatrix t_wire_cost_map; //[0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1] - //[0..1] entry distinguish between CHANX/CHANY start nodes respectively +// Map used to store intermediate routing costs +typedef std::unordered_map RoutingCosts; + +// Dense cost maps per source segment and destination connection box types +class CostMap { + public: + void set_counts(size_t seg_count); + void build_segment_map(); + int node_to_segment(int from_node_ind) const; + util::Cost_Entry find_cost(int from_seg_index, e_rr_type rr_type, int delta_x, int delta_y) const; + void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); + std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); + void read(const std::string& file); + void write(const std::string& file) const; + void print(int iseg) const; + std::vector> list_empty() const; + + private: + vtr::Matrix> cost_map_; + vtr::Matrix> offset_; + vtr::Matrix penalty_; + std::vector segment_map_; + size_t seg_count_; +}; + +// Implementation of RouterLookahead based on source segment and destination connection box types +class MapLookahead : public RouterLookahead { + public: + std::pair get_src_opin_delays(RRNodeId from_node, int delta_x, int delta_y, float criticality_fac) const; + float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; + void compute(const std::vector& segment_inf) override; -void read_router_lookahead(const std::string& file); -void write_router_lookahead(const std::string& file); + void read(const std::string& file) override; + void write(const std::string& file) const override; -/* Computes the lookahead map to be used by the router. If a map was computed prior to this, a new one will not be computed again. - * The rr graph must have been built before calling this function. */ -void compute_router_lookahead(const std::vector& segment_inf); + CostMap cost_map_; +}; -/* queries the lookahead_map (should have been computed prior to routing) to get the expected cost - * from the specified source to the specified target */ -float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float criticality_fac); +#endif diff --git a/vpr/src/route/router_lookahead_sampling.cpp b/vpr/src/route/router_lookahead_sampling.cpp index 601f28680d7..2a7c9f9082a 100644 --- a/vpr/src/route/router_lookahead_sampling.cpp +++ b/vpr/src/route/router_lookahead_sampling.cpp @@ -21,13 +21,13 @@ static int manhattan_distance(const vtr::Point& a, const vtr::Point& b } // the smallest bounding box containing a node -static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { - const std::pair* loc = connection_boxes.find_canonical_loc(node_ind); - if (loc == nullptr) { - return vtr::Rect(); - } else { - return vtr::Rect(vtr::Point(loc->first, loc->second)); - } +static vtr::Rect bounding_box_for_node(int node_ind) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_graph = device_ctx.rr_nodes; + int x = rr_graph.node_xlow(RRNodeId(node_ind)); + int y = rr_graph.node_ylow(RRNodeId(node_ind)); + + return vtr::Rect(vtr::Point(loc->first, loc->second)); } static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { @@ -131,7 +131,7 @@ std::vector find_sample_regions(int num_segments) { VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(i)); } // initialize counts diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index ea06d7c769e..eceb209227c 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -32,7 +32,6 @@ #include "rr_graph_writer.h" #include "rr_graph_reader.h" #include "router_lookahead_map.h" -#include "connection_box_lookahead_map.h" #include "rr_graph_clock.h" #include "rr_types.h" From 309a96db64c4110f7ad50d0d79dfaa4a8662ed78 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 12:56:23 +0200 Subject: [PATCH 40/41] Revert "Integrate upstream uxsdcxx parser with connection box lookahead" This reverts commit 8c4519c8723b7adf2c4571700202f06690d04278. --- .../gen/rr_graph_uxsdcxx.capnp | 33 +- vpr/src/base/read_options.cpp | 15 +- vpr/src/base/vpr_types.h | 5 +- vpr/src/route/gen/rr_graph_uxsdcxx.h | 615 +----------------- vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h | 125 ---- .../route/gen/rr_graph_uxsdcxx_interface.h | 74 --- vpr/src/route/rr_graph.xsd | 29 - vpr/src/route/rr_graph_reader.cpp | 1 - vpr/src/route/rr_graph_uxsdcxx_serializer.h | 170 ----- vpr/src/route/rr_graph_writer.cpp | 1 - 10 files changed, 15 insertions(+), 1053 deletions(-) diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 6ccf1055434..145245b3d49 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -137,18 +137,6 @@ struct BlockTypes { blockTypes @0 :List(BlockType); } -struct ConnectionBoxDeclaration { - id @0 :UInt32; - name @1 :Text; -} - -struct ConnectionBoxes { - numBoxes @0 :UInt32; - xDim @1 :UInt32; - yDim @2 :UInt32; - connectionBoxes @3 :List(ConnectionBoxDeclaration); -} - struct GridLoc { blockTypeId @0 :Int32; heightOffset @1 :Int32; @@ -188,18 +176,6 @@ struct Metadata { metas @0 :List(Meta); } -struct CanonicalLoc { - x @0 :UInt32; - y @1 :UInt32; -} - -struct ConnectionBoxAnnotation { - id @0 :UInt32; - sitePinDelay @1 :Float32; - x @2 :UInt32; - y @3 :UInt32; -} - struct Node { capacity @0 :UInt32; direction @1 :NodeDirection; @@ -209,8 +185,6 @@ struct Node { timing @5 :NodeTiming; segment @6 :NodeSegment; metadata @7 :Metadata; - canonicalLoc @8 :CanonicalLoc; - connectionBox @9 :ConnectionBoxAnnotation; } struct RrNodes { @@ -236,8 +210,7 @@ struct RrGraph { switches @4 :Switches; segments @5 :Segments; blockTypes @6 :BlockTypes; - connectionBoxes @7 :ConnectionBoxes; - grid @8 :GridLocs; - rrNodes @9 :RrNodes; - rrEdges @10 :RrEdges; + grid @7 :GridLocs; + rrNodes @8 :RrNodes; + rrEdges @9 :RrEdges; } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index af12ec39311..2c54f90394b 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -692,8 +692,6 @@ struct ParseRouterLookahead { conv_value.set_value(e_router_lookahead::CLASSIC); else if (str == "map") conv_value.set_value(e_router_lookahead::MAP); - else if (str == "connection_box_map") - conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP); else { std::stringstream msg; msg << "Invalid conversion from '" @@ -707,22 +705,17 @@ struct ParseRouterLookahead { ConvertedValue to_str(e_router_lookahead val) { ConvertedValue conv_value; - if (val == e_router_lookahead::CLASSIC) { + if (val == e_router_lookahead::CLASSIC) conv_value.set_value("classic"); - } else if (val == e_router_lookahead::MAP) { + else { + VTR_ASSERT(val == e_router_lookahead::MAP); conv_value.set_value("map"); - } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) { - conv_value.set_value("connection_box_map"); - } else { - std::stringstream msg; - msg << "Unrecognized e_router_lookahead"; - conv_value.set_error(msg.str()); } return conv_value; } std::vector default_choices() { - return {"classic", "map", "connection_box_map"}; + return {"classic", "map"}; } }; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index c282d9fde60..3b4b661eb07 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -108,10 +108,7 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY"; enum class e_router_lookahead { CLASSIC, //VPR's classic lookahead (assumes uniform wire types) MAP, //Lookahead considering different wire types (see Oleg Petelin's MASc Thesis) - NO_OP, //A no-operation lookahead which always returns zero - CONNECTION_BOX_MAP, - // Lookahead considering different wire types and IPIN - // connection box. + NO_OP //A no-operation lookahead which always returns zero }; enum class e_route_bb_update { diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx.h b/vpr/src/route/gen/rr_graph_uxsdcxx.h index 53b2e63772c..78aade3b638 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx.h @@ -80,12 +80,6 @@ inline void load_block_type_required_attributes(const pugi::xml_node& root, int* template inline void load_block_types(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template -inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error); -template -inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error); -template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error); template @@ -104,12 +98,6 @@ inline void load_meta(const pugi::xml_node& root, T& out, Context& context, cons template inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template -inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error); -template -inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error); -template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error); template @@ -142,8 +130,6 @@ inline void write_block_type(T& in, std::ostream& os, const void* data, void* it template inline void write_block_types(T& in, std::ostream& os, const void* data, void* iter); template -inline void write_connection_boxes(T& in, std::ostream& os, const void* data, void* iter); -template inline void write_grid_locs(T& in, std::ostream& os, const void* data, void* iter); template inline void write_meta(T& in, std::ostream& os, const void* data, void* iter); @@ -300,17 +286,6 @@ constexpr const char* atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types { BLOCK_TYPE }; constexpr const char* gtok_lookup_t_block_types[] = {"block_type"}; -enum class atok_t_connection_box_declaration { ID, - NAME }; -constexpr const char* atok_lookup_t_connection_box_declaration[] = {"id", "name"}; - -enum class gtok_t_connection_boxes { CONNECTION_BOX }; -constexpr const char* gtok_lookup_t_connection_boxes[] = {"connection_box"}; -enum class atok_t_connection_boxes { NUM_BOXES, - X_DIM, - Y_DIM }; -constexpr const char* atok_lookup_t_connection_boxes[] = {"num_boxes", "x_dim", "y_dim"}; - enum class atok_t_grid_loc { BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, @@ -341,24 +316,11 @@ constexpr const char* atok_lookup_t_meta[] = {"name"}; enum class gtok_t_metadata { META }; constexpr const char* gtok_lookup_t_metadata[] = {"meta"}; - -enum class atok_t_canonical_loc { X, - Y }; -constexpr const char* atok_lookup_t_canonical_loc[] = {"x", "y"}; - -enum class atok_t_connection_box_annotation { ID, - SITE_PIN_DELAY, - X, - Y }; -constexpr const char* atok_lookup_t_connection_box_annotation[] = {"id", "site_pin_delay", "x", "y"}; - enum class gtok_t_node { LOC, TIMING, SEGMENT, - METADATA, - CANONICAL_LOC, - CONNECTION_BOX }; -constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata", "canonical_loc", "connection_box"}; + METADATA }; +constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata"}; enum class atok_t_node { CAPACITY, DIRECTION, ID, @@ -380,11 +342,10 @@ enum class gtok_t_rr_graph { CHANNELS, SWITCHES, SEGMENTS, BLOCK_TYPES, - CONNECTION_BOXES, GRID, RR_NODES, RR_EDGES }; -constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "connection_boxes", "grid", "rr_nodes", "rr_edges"}; +constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "grid", "rr_nodes", "rr_edges"}; enum class atok_t_rr_graph { TOOL_COMMENT, TOOL_NAME, TOOL_VERSION }; @@ -1182,122 +1143,6 @@ inline gtok_t_block_types lex_node_t_block_types(const char* in, const std::func noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); } -inline atok_t_connection_box_declaration lex_attr_t_connection_box_declaration(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 2: - switch (in[0]) { - case onechar('i', 0, 8): - switch (in[1]) { - case onechar('d', 0, 8): - return atok_t_connection_box_declaration::ID; - break; - default: - break; - } - break; - default: - break; - } - break; - case 4: - switch (*((triehash_uu32*)&in[0])) { - case onechar('n', 0, 32) | onechar('a', 8, 32) | onechar('m', 16, 32) | onechar('e', 24, 32): - return atok_t_connection_box_declaration::NAME; - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - -inline gtok_t_connection_boxes lex_node_t_connection_boxes(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 14: - switch (*((triehash_uu64*)&in[0])) { - case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): - switch (*((triehash_uu32*)&in[8])) { - case onechar('o', 0, 32) | onechar('n', 8, 32) | onechar('_', 16, 32) | onechar('b', 24, 32): - switch (in[12]) { - case onechar('o', 0, 8): - switch (in[13]) { - case onechar('x', 0, 8): - return gtok_t_connection_boxes::CONNECTION_BOX; - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); -} -inline atok_t_connection_boxes lex_attr_t_connection_boxes(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 5: - switch (*((triehash_uu32*)&in[0])) { - case onechar('x', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): - switch (in[4]) { - case onechar('m', 0, 8): - return atok_t_connection_boxes::X_DIM; - break; - default: - break; - } - break; - case onechar('y', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): - switch (in[4]) { - case onechar('m', 0, 8): - return atok_t_connection_boxes::Y_DIM; - break; - default: - break; - } - break; - default: - break; - } - break; - case 9: - switch (*((triehash_uu64*)&in[0])) { - case onechar('n', 0, 64) | onechar('u', 8, 64) | onechar('m', 16, 64) | onechar('_', 24, 64) | onechar('b', 32, 64) | onechar('o', 40, 64) | onechar('x', 48, 64) | onechar('e', 56, 64): - switch (in[8]) { - case onechar('s', 0, 8): - return atok_t_connection_boxes::NUM_BOXES; - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - inline atok_t_grid_loc lex_attr_t_grid_loc(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1544,90 +1389,6 @@ inline gtok_t_metadata lex_node_t_metadata(const char* in, const std::function.").c_str()); } -inline atok_t_canonical_loc lex_attr_t_canonical_loc(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 1: - switch (in[0]) { - case onechar('x', 0, 8): - return atok_t_canonical_loc::X; - break; - case onechar('y', 0, 8): - return atok_t_canonical_loc::Y; - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - -inline atok_t_connection_box_annotation lex_attr_t_connection_box_annotation(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 1: - switch (in[0]) { - case onechar('x', 0, 8): - return atok_t_connection_box_annotation::X; - break; - case onechar('y', 0, 8): - return atok_t_connection_box_annotation::Y; - break; - default: - break; - } - break; - case 2: - switch (in[0]) { - case onechar('i', 0, 8): - switch (in[1]) { - case onechar('d', 0, 8): - return atok_t_connection_box_annotation::ID; - break; - default: - break; - } - break; - default: - break; - } - break; - case 14: - switch (*((triehash_uu64*)&in[0])) { - case onechar('s', 0, 64) | onechar('i', 8, 64) | onechar('t', 16, 64) | onechar('e', 24, 64) | onechar('_', 32, 64) | onechar('p', 40, 64) | onechar('i', 48, 64) | onechar('n', 56, 64): - switch (*((triehash_uu32*)&in[8])) { - case onechar('_', 0, 32) | onechar('d', 8, 32) | onechar('e', 16, 32) | onechar('l', 24, 32): - switch (in[12]) { - case onechar('a', 0, 8): - switch (in[13]) { - case onechar('y', 0, 8): - return atok_t_connection_box_annotation::SITE_PIN_DELAY; - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1709,54 +1470,6 @@ inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { - std::bitset<2> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_connection_box_declaration::ID: - *id = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_box_declaration::NAME: - /* Attribute name set after element init */ - break; - default: - break; /* Not possible. */ - } - } - std::bitset<2> test_astate = astate | std::bitset<2>(0b00); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_declaration, report_error); -} - -inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error) { - std::bitset<3> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_boxes in = lex_attr_t_connection_boxes(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_connection_boxes::NUM_BOXES: - *num_boxes = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_boxes::X_DIM: - *x_dim = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_boxes::Y_DIM: - *y_dim = load_unsigned_int(attr.value(), report_error); - break; - default: - break; /* Not possible. */ - } - } - std::bitset<3> test_astate = astate | std::bitset<3>(0b000); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_boxes, report_error); -} - inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error) { std::bitset<5> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -2839,58 +2488,6 @@ inline void load_node_segment_required_attributes(const pugi::xml_node& root, in if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_segment, report_error); } -inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error) { - std::bitset<2> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_canonical_loc in = lex_attr_t_canonical_loc(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_canonical_loc::X: - *x = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_canonical_loc::Y: - *y = load_unsigned_int(attr.value(), report_error); - break; - default: - break; /* Not possible. */ - } - } - std::bitset<2> test_astate = astate | std::bitset<2>(0b00); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_canonical_loc, report_error); -} - -inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error) { - std::bitset<4> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_box_annotation in = lex_attr_t_connection_box_annotation(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_connection_box_annotation::ID: - *id = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_box_annotation::SITE_PIN_DELAY: - *site_pin_delay = load_float(attr.value(), report_error); - break; - case atok_t_connection_box_annotation::X: - *x = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_box_annotation::Y: - *y = load_unsigned_int(attr.value(), report_error); - break; - default: - break; /* Not possible. */ - } - } - std::bitset<4> test_astate = astate | std::bitset<4>(0b0000); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_annotation, report_error); -} - inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error) { std::bitset<4> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -3615,94 +3212,6 @@ inline void load_block_types(const pugi::xml_node& root, T& out, Context& contex if (state != 0) dfa_error("end of input", gstate_t_block_types[state], gtok_lookup_t_block_types, 1, report_error); } -template -inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); - switch (in) { - case atok_t_connection_box_declaration::ID: - /* Attribute id is already set */ - break; - case atok_t_connection_box_declaration::NAME: - out.set_connection_box_declaration_name(attr.value(), context); - break; - default: - break; /* Not possible. */ - } - } - - if (root.first_child().type() == pugi::node_element) - noreturn_report(report_error, "Unexpected child element in ."); -} - -constexpr int NUM_T_CONNECTION_BOXES_STATES = 2; -constexpr const int NUM_T_CONNECTION_BOXES_INPUTS = 1; -constexpr int gstate_t_connection_boxes[NUM_T_CONNECTION_BOXES_STATES][NUM_T_CONNECTION_BOXES_INPUTS] = { - {0}, - {0}, -}; -template -inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - // Preallocate arrays by counting child nodes (if any) - size_t connection_box_count = 0; - { - int next, state = 1; - for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { - *offset_debug = node.offset_debug(); - gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); - next = gstate_t_connection_boxes[state][(int)in]; - if (next == -1) - dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); - state = next; - switch (in) { - case gtok_t_connection_boxes::CONNECTION_BOX: - connection_box_count += 1; - break; - default: - break; /* Not possible. */ - } - } - - out.preallocate_connection_boxes_connection_box(context, connection_box_count); - } - int next, state = 1; - for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { - *offset_debug = node.offset_debug(); - gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); - next = gstate_t_connection_boxes[state][(int)in]; - if (next == -1) - dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); - state = next; - switch (in) { - case gtok_t_connection_boxes::CONNECTION_BOX: { - unsigned int connection_box_declaration_id; - memset(&connection_box_declaration_id, 0, sizeof(connection_box_declaration_id)); - load_connection_box_declaration_required_attributes(node, &connection_box_declaration_id, report_error); - auto child_context = out.add_connection_boxes_connection_box(context, connection_box_declaration_id); - load_connection_box_declaration(node, out, child_context, report_error, offset_debug); - out.finish_connection_boxes_connection_box(child_context); - } break; - default: - break; /* Not possible. */ - } - } - if (state != 0) dfa_error("end of input", gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); -} - template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3939,32 +3448,6 @@ inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, if (state != 0) dfa_error("end of input", gstate_t_metadata[state], gtok_lookup_t_metadata, 1, report_error); } -template -inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - if (root.first_child().type() == pugi::node_element) - noreturn_report(report_error, "Unexpected child element in ."); -} - -template -inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - if (root.first_child().type() == pugi::node_element) - noreturn_report(report_error, "Unexpected child element in ."); -} - template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3994,7 +3477,7 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons } } - std::bitset<6> gstate = 0; + std::bitset<4> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_node in = lex_node_t_node(node.name(), report_error); @@ -4042,35 +3525,11 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons load_metadata(node, out, child_context, report_error, offset_debug); out.finish_node_metadata(child_context); } break; - case gtok_t_node::CANONICAL_LOC: { - unsigned int canonical_loc_x; - memset(&canonical_loc_x, 0, sizeof(canonical_loc_x)); - unsigned int canonical_loc_y; - memset(&canonical_loc_y, 0, sizeof(canonical_loc_y)); - load_canonical_loc_required_attributes(node, &canonical_loc_x, &canonical_loc_y, report_error); - auto child_context = out.init_node_canonical_loc(context, canonical_loc_x, canonical_loc_y); - load_canonical_loc(node, out, child_context, report_error, offset_debug); - out.finish_node_canonical_loc(child_context); - } break; - case gtok_t_node::CONNECTION_BOX: { - unsigned int connection_box_annotation_id; - memset(&connection_box_annotation_id, 0, sizeof(connection_box_annotation_id)); - float connection_box_annotation_site_pin_delay; - memset(&connection_box_annotation_site_pin_delay, 0, sizeof(connection_box_annotation_site_pin_delay)); - unsigned int connection_box_annotation_x; - memset(&connection_box_annotation_x, 0, sizeof(connection_box_annotation_x)); - unsigned int connection_box_annotation_y; - memset(&connection_box_annotation_y, 0, sizeof(connection_box_annotation_y)); - load_connection_box_annotation_required_attributes(node, &connection_box_annotation_id, &connection_box_annotation_site_pin_delay, &connection_box_annotation_x, &connection_box_annotation_y, report_error); - auto child_context = out.init_node_connection_box(context, connection_box_annotation_id, connection_box_annotation_site_pin_delay, connection_box_annotation_x, connection_box_annotation_y); - load_connection_box_annotation(node, out, child_context, report_error, offset_debug); - out.finish_node_connection_box(child_context); - } break; default: break; /* Not possible. */ } } - std::bitset<6> test_gstate = gstate | std::bitset<6>(0b111110); + std::bitset<4> test_gstate = gstate | std::bitset<4>(0b1110); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_node, report_error); } @@ -4267,7 +3726,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, } } - std::bitset<8> gstate = 0; + std::bitset<7> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_rr_graph in = lex_node_t_rr_graph(node.name(), report_error); @@ -4296,18 +3755,6 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, load_block_types(node, out, child_context, report_error, offset_debug); out.finish_rr_graph_block_types(child_context); } break; - case gtok_t_rr_graph::CONNECTION_BOXES: { - unsigned int connection_boxes_num_boxes; - memset(&connection_boxes_num_boxes, 0, sizeof(connection_boxes_num_boxes)); - unsigned int connection_boxes_x_dim; - memset(&connection_boxes_x_dim, 0, sizeof(connection_boxes_x_dim)); - unsigned int connection_boxes_y_dim; - memset(&connection_boxes_y_dim, 0, sizeof(connection_boxes_y_dim)); - load_connection_boxes_required_attributes(node, &connection_boxes_num_boxes, &connection_boxes_x_dim, &connection_boxes_y_dim, report_error); - auto child_context = out.init_rr_graph_connection_boxes(context, connection_boxes_num_boxes, connection_boxes_x_dim, connection_boxes_y_dim); - load_connection_boxes(node, out, child_context, report_error, offset_debug); - out.finish_rr_graph_connection_boxes(child_context); - } break; case gtok_t_rr_graph::GRID: { auto child_context = out.init_rr_graph_grid(context); load_grid_locs(node, out, child_context, report_error, offset_debug); @@ -4327,7 +3774,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, break; /* Not possible. */ } } - std::bitset<8> test_gstate = gstate | std::bitset<8>(0b00010000); + std::bitset<7> test_gstate = gstate | std::bitset<7>(0b0000000); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_rr_graph, report_error); } @@ -4516,22 +3963,6 @@ inline void write_block_types(T& in, std::ostream& os, Context& context) { } } -template -inline void write_connection_boxes(T& in, std::ostream& os, Context& context) { - (void)in; - (void)os; - (void)context; - { - for (size_t i = 0, n = in.num_connection_boxes_connection_box(context); i < n; i++) { - auto child_context = in.get_connection_boxes_connection_box(i, context); - os << "\n"; - } - } -} - template inline void write_grid_locs(T& in, std::ostream& os, Context& context) { (void)in; @@ -4618,26 +4049,6 @@ inline void write_node(T& in, std::ostream& os, Context& context) { os << "\n"; } } - { - if (in.has_node_canonical_loc(context)) { - auto child_context = in.get_node_canonical_loc(context); - os << "\n"; - } - } - { - if (in.has_node_connection_box(context)) { - auto child_context = in.get_node_connection_box(context); - os << "\n"; - } - } } template @@ -4724,18 +4135,6 @@ inline void write_rr_graph(T& in, std::ostream& os, Context& context) { write_block_types(in, os, child_context); os << "\n"; } - { - if (in.has_rr_graph_connection_boxes(context)) { - auto child_context = in.get_rr_graph_connection_boxes(context); - os << ""; - write_connection_boxes(in, os, child_context); - os << "\n"; - } - } { auto child_context = in.get_rr_graph_grid(context); os << "\n"; diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h index dca960a1bdb..4d78a7cf016 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h @@ -54,10 +54,6 @@ void load_block_type_capnp_type(const ucap::BlockType::Reader& root, T& out, Con template void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template -void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template -void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_grid_locs_capnp_type(const ucap::GridLocs::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -72,10 +68,6 @@ void load_meta_capnp_type(const ucap::Meta::Reader& root, T& out, Context& conte template void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template -void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template -void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_rr_nodes_capnp_type(const ucap::RrNodes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -106,8 +98,6 @@ inline void write_block_type_capnp_type(T& in, ucap::BlockType::Builder& root, C template inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, Context& context); template -inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context); -template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context); template inline void write_meta_capnp_type(T& in, ucap::Meta::Builder& root, Context& context); @@ -624,39 +614,6 @@ inline void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& stack->pop_back(); } -template -inline void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; - - out.set_connection_box_declaration_name(root.getName().cStr(), context); -} - -template -inline void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; - - stack->push_back(std::make_pair("getConnectionBox", 0)); - { - auto data = root.getConnectionBoxes(); - out.preallocate_connection_boxes_connection_box(context, data.size()); - for (const auto& el : data) { - auto child_context = out.add_connection_boxes_connection_box(context, el.getId()); - load_connection_box_declaration_capnp_type(el, out, child_context, report_error, stack); - out.finish_connection_boxes_connection_box(child_context); - stack->back().second += 1; - } - } - stack->pop_back(); -} - template inline void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -753,24 +710,6 @@ inline void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, stack->pop_back(); } -template -inline void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; -} - -template -inline void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; -} - template inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -812,22 +751,6 @@ inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context out.finish_node_metadata(child_context); } stack->pop_back(); - stack->push_back(std::make_pair("getCanonicalLoc", 0)); - if (root.hasCanonicalLoc()) { - auto child_el = root.getCanonicalLoc(); - auto child_context = out.init_node_canonical_loc(context, child_el.getX(), child_el.getY()); - load_canonical_loc_capnp_type(child_el, out, child_context, report_error, stack); - out.finish_node_canonical_loc(child_context); - } - stack->pop_back(); - stack->push_back(std::make_pair("getConnectionBox", 0)); - if (root.hasConnectionBox()) { - auto child_el = root.getConnectionBox(); - auto child_context = out.init_node_connection_box(context, child_el.getId(), child_el.getSitePinDelay(), child_el.getX(), child_el.getY()); - load_connection_box_annotation_capnp_type(child_el, out, child_context, report_error, stack); - out.finish_node_connection_box(child_context); - } - stack->pop_back(); } template @@ -935,14 +858,6 @@ inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, out.finish_rr_graph_block_types(child_context); } stack->pop_back(); - stack->push_back(std::make_pair("getConnectionBoxes", 0)); - if (root.hasConnectionBoxes()) { - auto child_el = root.getConnectionBoxes(); - auto child_context = out.init_rr_graph_connection_boxes(context, child_el.getNumBoxes(), child_el.getXDim(), child_el.getYDim()); - load_connection_boxes_capnp_type(child_el, out, child_context, report_error, stack); - out.finish_rr_graph_connection_boxes(child_context); - } - stack->pop_back(); stack->push_back(std::make_pair("getGrid", 0)); if (root.hasGrid()) { auto child_el = root.getGrid(); @@ -1136,21 +1051,6 @@ inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, } } -template -inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context) { - (void)in; - (void)root; - - size_t num_connection_boxes_connection_boxes = in.num_connection_boxes_connection_box(context); - auto connection_boxes_connection_boxes = root.initConnectionBoxes(num_connection_boxes_connection_boxes); - for (size_t i = 0; i < num_connection_boxes_connection_boxes; i++) { - auto connection_boxes_connection_box = connection_boxes_connection_boxes[i]; - auto child_context = in.get_connection_boxes_connection_box(i, context); - connection_boxes_connection_box.setId(in.get_connection_box_declaration_id(child_context)); - connection_boxes_connection_box.setName(in.get_connection_box_declaration_name(child_context)); - } -} - template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context) { (void)in; @@ -1226,22 +1126,6 @@ inline void write_node_capnp_type(T& in, ucap::Node::Builder& root, Context& con auto child_context = in.get_node_metadata(context); write_metadata_capnp_type(in, node_metadata, child_context); } - - if (in.has_node_canonical_loc(context)) { - auto node_canonical_loc = root.initCanonicalLoc(); - auto child_context = in.get_node_canonical_loc(context); - node_canonical_loc.setX(in.get_canonical_loc_x(child_context)); - node_canonical_loc.setY(in.get_canonical_loc_y(child_context)); - } - - if (in.has_node_connection_box(context)) { - auto node_connection_box = root.initConnectionBox(); - auto child_context = in.get_node_connection_box(context); - node_connection_box.setId(in.get_connection_box_annotation_id(child_context)); - node_connection_box.setSitePinDelay(in.get_connection_box_annotation_site_pin_delay(child_context)); - node_connection_box.setX(in.get_connection_box_annotation_x(child_context)); - node_connection_box.setY(in.get_connection_box_annotation_y(child_context)); - } } template @@ -1321,15 +1205,6 @@ inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Conte write_block_types_capnp_type(in, rr_graph_block_types, child_context); } - if (in.has_rr_graph_connection_boxes(context)) { - auto rr_graph_connection_boxes = root.initConnectionBoxes(); - auto child_context = in.get_rr_graph_connection_boxes(context); - rr_graph_connection_boxes.setNumBoxes(in.get_connection_boxes_num_boxes(child_context)); - rr_graph_connection_boxes.setXDim(in.get_connection_boxes_x_dim(child_context)); - rr_graph_connection_boxes.setYDim(in.get_connection_boxes_y_dim(child_context)); - write_connection_boxes_capnp_type(in, rr_graph_connection_boxes, child_context); - } - { auto child_context = in.get_rr_graph_grid(context); auto rr_graph_grid = root.initGrid(); diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h index 8c9085dc7d0..ed968c6ebcf 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h @@ -68,8 +68,6 @@ struct DefaultRrGraphContextTypes { using PinClassReadContext = void*; using BlockTypeReadContext = void*; using BlockTypesReadContext = void*; - using ConnectionBoxDeclarationReadContext = void*; - using ConnectionBoxesReadContext = void*; using GridLocReadContext = void*; using GridLocsReadContext = void*; using NodeLocReadContext = void*; @@ -77,8 +75,6 @@ struct DefaultRrGraphContextTypes { using NodeSegmentReadContext = void*; using MetaReadContext = void*; using MetadataReadContext = void*; - using CanonicalLocReadContext = void*; - using ConnectionBoxAnnotationReadContext = void*; using NodeReadContext = void*; using RrNodesReadContext = void*; using EdgeReadContext = void*; @@ -99,8 +95,6 @@ struct DefaultRrGraphContextTypes { using PinClassWriteContext = void*; using BlockTypeWriteContext = void*; using BlockTypesWriteContext = void*; - using ConnectionBoxDeclarationWriteContext = void*; - using ConnectionBoxesWriteContext = void*; using GridLocWriteContext = void*; using GridLocsWriteContext = void*; using NodeLocWriteContext = void*; @@ -108,8 +102,6 @@ struct DefaultRrGraphContextTypes { using NodeSegmentWriteContext = void*; using MetaWriteContext = void*; using MetadataWriteContext = void*; - using CanonicalLocWriteContext = void*; - using ConnectionBoxAnnotationWriteContext = void*; using NodeWriteContext = void*; using RrNodesWriteContext = void*; using EdgeWriteContext = void*; @@ -356,35 +348,6 @@ class RrGraphBase { virtual inline size_t num_block_types_block_type(typename ContextTypes::BlockTypesReadContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypeReadContext get_block_types_block_type(int n, typename ContextTypes::BlockTypesReadContext& ctx) = 0; - /** Generated for complex type "connection_box_declaration": - * - * - * - * - */ - virtual inline unsigned int get_connection_box_declaration_id(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; - virtual inline const char* get_connection_box_declaration_name(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; - virtual inline void set_connection_box_declaration_name(const char* name, typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; - - /** Generated for complex type "connection_boxes": - * - * - * - * - * - * - * - * - */ - virtual inline unsigned int get_connection_boxes_num_boxes(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline unsigned int get_connection_boxes_x_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline unsigned int get_connection_boxes_y_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline void preallocate_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, size_t size) = 0; - virtual inline typename ContextTypes::ConnectionBoxDeclarationWriteContext add_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, unsigned int id) = 0; - virtual inline void finish_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; - virtual inline size_t num_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxDeclarationReadContext get_connection_boxes_connection_box(int n, typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - /** Generated for complex type "grid_loc": * * @@ -474,28 +437,6 @@ class RrGraphBase { virtual inline size_t num_metadata_meta(typename ContextTypes::MetadataReadContext& ctx) = 0; virtual inline typename ContextTypes::MetaReadContext get_metadata_meta(int n, typename ContextTypes::MetadataReadContext& ctx) = 0; - /** Generated for complex type "canonical_loc": - * - * - * - * - */ - virtual inline unsigned int get_canonical_loc_x(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; - virtual inline unsigned int get_canonical_loc_y(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; - - /** Generated for complex type "connection_box_annotation": - * - * - * - * - * - * - */ - virtual inline unsigned int get_connection_box_annotation_id(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; - virtual inline float get_connection_box_annotation_site_pin_delay(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; - virtual inline unsigned int get_connection_box_annotation_x(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; - virtual inline unsigned int get_connection_box_annotation_y(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; - /** Generated for complex type "node": * * @@ -503,8 +444,6 @@ class RrGraphBase { * * * - * - * * * * @@ -532,14 +471,6 @@ class RrGraphBase { virtual inline void finish_node_metadata(typename ContextTypes::MetadataWriteContext& ctx) = 0; virtual inline typename ContextTypes::MetadataReadContext get_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; virtual inline bool has_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; - virtual inline typename ContextTypes::CanonicalLocWriteContext init_node_canonical_loc(typename ContextTypes::NodeWriteContext& ctx, unsigned int x, unsigned int y) = 0; - virtual inline void finish_node_canonical_loc(typename ContextTypes::CanonicalLocWriteContext& ctx) = 0; - virtual inline typename ContextTypes::CanonicalLocReadContext get_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; - virtual inline bool has_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxAnnotationWriteContext init_node_connection_box(typename ContextTypes::NodeWriteContext& ctx, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) = 0; - virtual inline void finish_node_connection_box(typename ContextTypes::ConnectionBoxAnnotationWriteContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxAnnotationReadContext get_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; - virtual inline bool has_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; /** Generated for complex type "rr_nodes": * @@ -592,7 +523,6 @@ class RrGraphBase { * * * - * * * * @@ -620,10 +550,6 @@ class RrGraphBase { virtual inline typename ContextTypes::BlockTypesWriteContext init_rr_graph_block_types(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_block_types(typename ContextTypes::BlockTypesWriteContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypesReadContext get_rr_graph_block_types(typename ContextTypes::RrGraphReadContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxesWriteContext init_rr_graph_connection_boxes(typename ContextTypes::RrGraphWriteContext& ctx, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) = 0; - virtual inline void finish_rr_graph_connection_boxes(typename ContextTypes::ConnectionBoxesWriteContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxesReadContext get_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; - virtual inline bool has_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsWriteContext init_rr_graph_grid(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_grid(typename ContextTypes::GridLocsWriteContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsReadContext get_rr_graph_grid(typename ContextTypes::RrGraphReadContext& ctx) = 0; diff --git a/vpr/src/route/rr_graph.xsd b/vpr/src/route/rr_graph.xsd index 2121b919928..728ea747e3c 100644 --- a/vpr/src/route/rr_graph.xsd +++ b/vpr/src/route/rr_graph.xsd @@ -261,26 +261,12 @@ - - - - - - - - - - - - - - @@ -297,20 +283,6 @@ - - - - - - - - - - - - - - @@ -358,7 +330,6 @@ - diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index 681e3520bb5..d76ca4fdcf9 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -62,7 +62,6 @@ void load_rr_file(const t_graph_type graph_type, &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, - &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index 35c811b0c6b..b4c09cff670 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -230,9 +230,6 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using NodeReadContext = const t_rr_node; using EdgeReadContext = const EdgeWalker*; using RrEdgesReadContext = EdgeWalker; - using ConnectionBoxDeclarationReadContext = ConnectionBoxId; - using CanonicalLocReadContext = const std::pair*; - using ConnectionBoxAnnotationReadContext = const std::tuple, float>; using TimingWriteContext = t_rr_switch_inf*; using SizingWriteContext = t_rr_switch_inf*; using SwitchWriteContext = t_rr_switch_inf*; @@ -248,7 +245,6 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using MetadataWriteContext = MetadataBind; using NodeWriteContext = int; using EdgeWriteContext = MetadataBind; - using ConnectionBoxDeclarationWriteContext = ConnectionBox*; }; class RrGraphSerializer final : public uxsd::RrGraphBase { @@ -266,7 +262,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf, std::vector* rr_indexed_data, t_rr_node_indices* rr_node_indices, - ConnectionBoxes* connection_boxes, const size_t num_arch_switches, const t_arch_switch_inf* arch_switch_inf, const std::vector& segment_inf, @@ -281,7 +276,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { , rr_switch_inf_(rr_switch_inf) , rr_indexed_data_(rr_indexed_data) , rr_node_indices_(rr_node_indices) - , connection_boxes_(connection_boxes) , read_rr_graph_filename_(read_rr_graph_filename) , graph_type_(graph_type) , base_cost_type_(base_cost_type) @@ -595,160 +589,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { } } - inline unsigned int get_connection_box_annotation_id(const std::tuple, float>& box_info) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - std::tie(box_id, box_location, site_pin_delay) = box_info; - - return (size_t)box_id; - } - inline float get_connection_box_annotation_site_pin_delay(const std::tuple, float>& box_info) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - std::tie(box_id, box_location, site_pin_delay) = box_info; - - return site_pin_delay; - } - inline unsigned int get_connection_box_annotation_x(const std::tuple, float>& box_info) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - std::tie(box_id, box_location, site_pin_delay) = box_info; - - return box_location.first; - } - inline unsigned int get_connection_box_annotation_y(const std::tuple, float>& box_info) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - std::tie(box_id, box_location, site_pin_delay) = box_info; - - return box_location.second; - } - - inline unsigned int get_canonical_loc_x(const std::pair*& data) final { - return data->first; - } - inline unsigned int get_canonical_loc_y(const std::pair*& data) final { - return data->second; - } - - inline const std::pair* get_node_canonical_loc(const t_rr_node& node) final { - return connection_boxes_->find_canonical_loc(get_node_id(node)); - } - inline bool has_node_canonical_loc(const t_rr_node& node) final { - return connection_boxes_->find_canonical_loc(get_node_id(node)) != nullptr; - } - inline const std::tuple, float> get_node_connection_box(const t_rr_node& node) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - - if (!connection_boxes_->find_connection_box(get_node_id(node), - &box_id, &box_location, &site_pin_delay)) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, - "No connection box for %d", node); - } - return std::make_tuple(box_id, box_location, site_pin_delay); - } - inline bool has_node_connection_box(const t_rr_node& node) final { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - - return connection_boxes_->find_connection_box(get_node_id(node), - &box_id, &box_location, &site_pin_delay); - } - inline void* init_node_canonical_loc(int& inode, unsigned int x, unsigned int y) final { - connection_boxes_->add_canonical_loc(inode, std::make_pair(x, y)); - return nullptr; - } - inline void finish_node_canonical_loc(void*& /*ctx*/) final {} - - inline void* init_node_connection_box(int& inode, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) final { - connection_boxes_->add_connection_box(inode, - ConnectionBoxId(id), - std::make_pair(x, y), - site_pin_delay); - return nullptr; - } - inline void finish_node_connection_box(void*& /*ctx*/) final {} - - /** Generated for complex type "connection_box_declaration": - * - * - * - * - */ - inline void set_connection_box_declaration_name(const char* name, ConnectionBox*& box) final { - box->name.assign(name); - } - - inline unsigned int get_connection_box_declaration_id(ConnectionBoxId& id) final { - return (size_t)id; - } - inline const char* get_connection_box_declaration_name(ConnectionBoxId& id) final { - return connection_boxes_->get_connection_box(id)->name.c_str(); - } - inline unsigned int get_connection_boxes_num_boxes(void*& /*ctx*/) final { - return connection_boxes_->num_connection_box_types(); - } - inline unsigned int get_connection_boxes_x_dim(void*& /*ctx*/) final { - return connection_boxes_->connection_box_grid_size().first; - } - inline unsigned int get_connection_boxes_y_dim(void*& /*ctx*/) final { - return connection_boxes_->connection_box_grid_size().second; - } - inline size_t num_connection_boxes_connection_box(void*& /*ctx*/) final { - return connection_boxes_->num_connection_box_types(); - } - inline ConnectionBoxId get_connection_boxes_connection_box(int n, void*& /*ctx*/) final { - return ConnectionBoxId(n); - } - - /** Generated for complex type "connection_boxes": - * - * - * - * - * - * - * - * - */ - inline void preallocate_connection_boxes_connection_box(void*& /*ctx*/, size_t size) final { - if (size != boxes_.size()) { - report_error("Number of connection_box %zu is greater than num_boxes %zu on ", - size, boxes_.size()); - } - } - inline ConnectionBox* add_connection_boxes_connection_box(void*& /*ctx*/, unsigned int id) final { - if (id >= boxes_.size()) { - report_error("ConnectionBox id %u is greater than num_boxes on ", - id); - } - return &boxes_[id]; - } - inline void finish_connection_boxes_connection_box(ConnectionBox*& /*ctx*/) final {} - - private: - int box_x_dim_; - int box_y_dim_; - std::vector boxes_; - - public: - inline void* init_rr_graph_connection_boxes(void*& /*ctx*/, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) final { - box_x_dim_ = x_dim; - box_y_dim_ = y_dim; - boxes_.resize(num_boxes); - return nullptr; - } - inline void finish_rr_graph_connection_boxes(void*& /*ctx*/) final { - connection_boxes_->reset_boxes(std::make_pair(box_x_dim_, box_y_dim_), std::move(boxes_)); - } - /** Generated for complex type "node_timing": * * @@ -924,7 +764,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // If make_room_in_vector was used for allocation, this ensures that // the final storage has no overhead. rr_nodes_->shrink_to_fit(); - connection_boxes_->resize_nodes(rr_nodes_->size()); } /** Generated for complex type "rr_edges": @@ -1611,12 +1450,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline void* get_rr_graph_block_types(void*& /*ctx*/) final { return nullptr; } - inline void* get_rr_graph_connection_boxes(void*& /*ctx*/) final { - return nullptr; - } - inline bool has_rr_graph_connection_boxes(void*& /*ctx*/) final { - return connection_boxes_->num_connection_box_types() > 0; - } inline void* get_rr_graph_grid(void*& /*ctx*/) final { return nullptr; } @@ -1643,8 +1476,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { (*rr_indexed_data_)[i].seg_index = seg_index_[i]; } - connection_boxes_->create_sink_back_ref(); - VTR_ASSERT(read_rr_graph_filename_ != nullptr); VTR_ASSERT(read_rr_graph_name_ != nullptr); read_rr_graph_filename_->assign(read_rr_graph_name_); @@ -2013,7 +1844,6 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf_; std::vector* rr_indexed_data_; t_rr_node_indices* rr_node_indices_; - ConnectionBoxes* connection_boxes_; std::string* read_rr_graph_filename_; // Constant data for loads and writes. diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp index 57d2bb9c0e8..86fed376c05 100644 --- a/vpr/src/route/rr_graph_writer.cpp +++ b/vpr/src/route/rr_graph_writer.cpp @@ -40,7 +40,6 @@ void write_rr_graph(const char* file_name) { &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, - &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, From 1334f91de907429abc576b3dbef72cfc0d3221aa Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 15 Jun 2020 16:21:40 +0200 Subject: [PATCH 41/41] WIP: Fixes to have Series7 place-delay and lookahead being computed Signed-off-by: Alessandro Comodi --- vpr/src/place/timing_place.h | 5 +- vpr/src/route/router_lookahead_map.cpp | 16 ++++-- vpr/src/route/router_lookahead_sampling.cpp | 15 ++--- vpr/test/test_map_lookahead_serdes.cpp | 62 --------------------- 4 files changed, 22 insertions(+), 76 deletions(-) delete mode 100644 vpr/test/test_map_lookahead_serdes.cpp diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index b2d0ff16bdb..68716fcce64 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -190,10 +190,11 @@ class PlacerTimingCosts { num_levels_ = ilevel + 1; size_t num_leaves = num_nodes_in_level(ilevel); - size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1); + //size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1); VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections"); - VTR_ASSERT_MSG(num_connections == 0 || num_level_before_leaves < num_connections, "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); + // XXX: Temporarily disabling this assertion as num_level_befor_leaves + //VTR_ASSERT_MSG(num_connections == 0 || num_level_before_leaves < num_connections, "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); //We don't need to store all possible leaves if we have fewer connections //(i.e. bottom-right of tree is empty) diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 02cf16a8cc1..6aad0be6eb5 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -497,8 +497,8 @@ std::pair MapLookahead::get_src_opin_delays(RRNodeId from_node, in // derive a cost from the map between two nodes float MapLookahead::get_map_cost(int from_node_ind, - int to_node_ind, - float criticality_fac) const { + int to_node_ind, + float criticality_fac) const { if (from_node_ind == to_node_ind) { return 0.f; } @@ -564,8 +564,14 @@ float MapLookahead::get_map_cost(int from_node_ind, VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); - if (!std::isfinite(expected_cost) || expected_cost < 0.f) { - VTR_LOG_ERROR("invalid cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); + /* XXX: temporarily disable this for further debugging as it appears that some costs are set to infinity + if (!std::isfinite(expected_cost) { + VTR_LOG_ERROR("infinite cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); + VTR_ASSERT(0); + } */ + + if (expected_cost < 0.f) { + VTR_LOG_ERROR("negative cost for segment %d at (%d, %d)\n", from_seg_index, (int)dx, (int)dy); VTR_ASSERT(0); } @@ -791,7 +797,7 @@ static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y) { * | | | | * V +---------+ | * A - * B-----------> + * B-----------> * * So wires are located as follows: * diff --git a/vpr/src/route/router_lookahead_sampling.cpp b/vpr/src/route/router_lookahead_sampling.cpp index 2a7c9f9082a..568d5134602 100644 --- a/vpr/src/route/router_lookahead_sampling.cpp +++ b/vpr/src/route/router_lookahead_sampling.cpp @@ -27,7 +27,7 @@ static vtr::Rect bounding_box_for_node(int node_ind) { int x = rr_graph.node_xlow(RRNodeId(node_ind)); int y = rr_graph.node_ylow(RRNodeId(node_ind)); - return vtr::Rect(vtr::Point(loc->first, loc->second)); + return vtr::Rect(vtr::Point(x, y)); } static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { @@ -145,11 +145,11 @@ std::vector find_sample_regions(int num_segments) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; if (node.capacity() == 0 || node.num_edges() == 0) continue; - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); - if (loc == nullptr) continue; + int x = rr_nodes.node_xlow(RRNodeId(i)); + int y = rr_nodes.node_ylow(RRNodeId(i)); int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; - segment_counts[seg_index][loc->first][loc->second] += 1; + segment_counts[seg_index][x][y] += 1; VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); @@ -210,15 +210,16 @@ std::vector find_sample_regions(int num_segments) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; if (node.capacity() == 0 || node.num_edges() == 0) continue; - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); - if (loc == nullptr) continue; + + int x = rr_nodes.node_xlow(RRNodeId(i)); + int y = rr_nodes.node_ylow(RRNodeId(i)); int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); + auto point = sample_point_index.find(std::make_tuple(seg_index, x, y)); if (point != sample_point_index.end()) { point->second->nodes.push_back(i); } diff --git a/vpr/test/test_map_lookahead_serdes.cpp b/vpr/test/test_map_lookahead_serdes.cpp deleted file mode 100644 index 270bc689a20..00000000000 --- a/vpr/test/test_map_lookahead_serdes.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "catch.hpp" - -#include "router_lookahead_map.h" - -extern t_wire_cost_map f_wire_cost_map; - -namespace { - -#ifdef VTR_ENABLE_CAPNPROTO -static constexpr const char kMapLookaheadBin[] = "test_map_lookahead.bin"; - -TEST_CASE("round_trip_map_lookahead", "[vpr]") { - constexpr std::array kDim({10, 12, 15, 16}); - - f_wire_cost_map.resize(kDim); - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - f_wire_cost_map[x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1); - f_wire_cost_map[x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1); - } - } - } - } - - write_router_lookahead(kMapLookaheadBin); - - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - f_wire_cost_map[x][y][z][w].delay = 0.f; - f_wire_cost_map[x][y][z][w].congestion = 0.f; - } - } - } - } - - f_wire_cost_map.resize({0, 0, 0, 0}); - - read_router_lookahead(kMapLookaheadBin); - - for (size_t i = 0; i < kDim.size(); ++i) { - REQUIRE(f_wire_cost_map.dim_size(i) == kDim[i]); - } - - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - REQUIRE(f_wire_cost_map[x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1)); - REQUIRE(f_wire_cost_map[x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1)); - } - } - } - } -} - -#endif - -} // namespace