From 0dfeeef4b91d1d1e12995563119b351745dd8c2d Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 20 Jan 2020 17:05:35 +0100 Subject: [PATCH 01/41] route: added penalty cost to switches Signed-off-by: Alessandro Comodi --- libs/libarchfpga/src/physical_types.h | 2 ++ libs/libarchfpga/src/read_xml_arch_file.cpp | 11 ++++--- .../gen/rr_graph_uxsdcxx.capnp | 9 +++--- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/route/gen/rr_graph_uxsdcxx.h | 31 ++++++++++++++++--- vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h | 9 ++++-- .../route/gen/rr_graph_uxsdcxx_interface.h | 9 ++++-- vpr/src/route/router_lookahead.cpp | 7 ++++- vpr/src/route/rr_graph.cpp | 1 + vpr/src/route/rr_graph.xsd | 1 + vpr/src/route/rr_graph_indexed_data.cpp | 29 +++++++++++++++-- vpr/src/route/rr_graph_uxsdcxx_serializer.h | 7 +++++ vpr/src/route/rr_node.h | 1 + 13 files changed, 94 insertions(+), 24 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 75c39a0ecc1..71c3771f35b 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -1457,6 +1457,7 @@ struct t_arch_switch_inf { float Cin = 0.; float Cout = 0.; float Cinternal = 0.; + float penalty_cost = 0.; float mux_trans_size = 1.; BufferSize buf_size_type = BufferSize::AUTO; float buf_size = 0.; @@ -1521,6 +1522,7 @@ struct t_rr_switch_inf { float Cout = 0.; float Cinternal = 0.; float Tdel = 0.; + float penalty_cost = 0.; float mux_trans_size = 0.; float buf_size = 0.; const char* name = nullptr; diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index d8351141b81..2e194e6e95d 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -3987,23 +3987,23 @@ static void ProcessSwitches(pugi::xml_node Parent, SwitchType type = SwitchType::MUX; if (0 == strcmp(type_name, "mux")) { type = SwitchType::MUX; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "penalty_cost", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); } else if (0 == strcmp(type_name, "tristate")) { type = SwitchType::TRISTATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "penalty_cost", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); } else if (0 == strcmp(type_name, "buffer")) { type = SwitchType::BUFFER; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "penalty_cost", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); } else if (0 == strcmp(type_name, "pass_gate")) { type = SwitchType::PASS_GATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "penalty_cost"}, " with type '"s + type_name + "'"s, loc_data); } else if (0 == strcmp(type_name, "short")) { type = SwitchType::SHORT; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "penalty_cost"}, " with type "s + type_name + "'"s, loc_data); } else { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Invalid switch type '%s'.\n", type_name); @@ -4026,6 +4026,7 @@ static void ProcessSwitches(pugi::xml_node Parent, arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0); arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0); arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0); + arch_switch.penalty_cost = get_attribute(Node, "penalty_cost", loc_data, ReqOpt::OPTIONAL).as_float(0); if (arch_switch.type() == SwitchType::MUX) { //Only muxes have mux transistors diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 145245b3d49..e883d3bdd9b 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -2,11 +2,11 @@ # https://github.com/duck2/uxsdcxx # Modify only if your build process doesn't involve regenerating this file. # -# Cmdline: uxsdcxx/uxsdcap.py /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd -# Input file: /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd -# md5sum of input file: 40e83d2ea6556761d4e29f21324b1871 +# Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd +# Input file: rr_graph.xsd +# md5sum of input file: c4f47394efd27f5819c943829c111204 -@0xb803100e76d3342d; +@0x9e90feddf132e6a8; using Cxx = import "/capnp/c++.capnp"; $Cxx.namespace("ucap"); @@ -81,6 +81,7 @@ struct Timing { cout @2 :Float32; r @3 :Float32; tdel @4 :Float32; + penaltyCost @5 :Float32; } struct Sizing { diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 8bbdaee4989..1e86c28462a 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -287,6 +287,7 @@ static void SetupSwitches(const t_arch& Arch, device_ctx.arch_switch_inf[RoutingArch->delayless_switch].R = 0.; device_ctx.arch_switch_inf[RoutingArch->delayless_switch].Cin = 0.; device_ctx.arch_switch_inf[RoutingArch->delayless_switch].Cout = 0.; + device_ctx.arch_switch_inf[RoutingArch->delayless_switch].penalty_cost = 0.; device_ctx.arch_switch_inf[RoutingArch->delayless_switch].set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, 0.); device_ctx.arch_switch_inf[RoutingArch->delayless_switch].power_buffer_type = POWER_BUFFER_TYPE_NONE; device_ctx.arch_switch_inf[RoutingArch->delayless_switch].mux_trans_size = 0.; diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx.h b/vpr/src/route/gen/rr_graph_uxsdcxx.h index 78aade3b638..1db3e015433 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * Input file: /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * md5sum of input file: 40e83d2ea6556761d4e29f21324b1871 + * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd + * Input file: rr_graph.xsd + * md5sum of input file: c4f47394efd27f5819c943829c111204 */ #include @@ -236,8 +236,9 @@ enum class atok_t_timing { CIN, CINTERNAL, COUT, R, - TDEL }; -constexpr const char* atok_lookup_t_timing[] = {"Cin", "Cinternal", "Cout", "R", "Tdel"}; + TDEL, + PENALTY_COST }; +constexpr const char* atok_lookup_t_timing[] = {"Cin", "Cinternal", "Cout", "R", "Tdel", "penalty_cost"}; enum class atok_t_sizing { BUF_SIZE, MUX_TRANS_SIZE }; @@ -628,6 +629,21 @@ inline atok_t_timing lex_attr_t_timing(const char* in, const std::function\n"; } } diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h index 4d78a7cf016..48e160866e6 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcap.py /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * Input file: /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * md5sum of input file: 40e83d2ea6556761d4e29f21324b1871 + * Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd + * Input file: rr_graph.xsd + * md5sum of input file: c4f47394efd27f5819c943829c111204 */ #include @@ -420,6 +420,7 @@ inline void load_timing_capnp_type(const ucap::Timing::Reader& root, T& out, Con out.set_timing_Cout(root.getCout(), context); out.set_timing_R(root.getR(), context); out.set_timing_Tdel(root.getTdel(), context); + out.set_timing_penalty_cost(root.getPenaltyCost(), context); } template @@ -937,6 +938,8 @@ inline void write_switch_capnp_type(T& in, ucap::Switch::Builder& root, Context& switch_timing.setR(in.get_timing_R(child_context)); if ((bool)in.get_timing_Tdel(child_context)) switch_timing.setTdel(in.get_timing_Tdel(child_context)); + if ((bool)in.get_timing_penalty_cost(child_context)) + switch_timing.setPenaltyCost(in.get_timing_penalty_cost(child_context)); } { diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h index ed968c6ebcf..840f3cf6b45 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * Input file: /home/kmurray/trees/vtr/vpr/src/route/rr_graph.xsd - * md5sum of input file: 40e83d2ea6556761d4e29f21324b1871 + * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd + * Input file: rr_graph.xsd + * md5sum of input file: c4f47394efd27f5819c943829c111204 */ #include @@ -183,6 +183,7 @@ class RrGraphBase { * * * + * * */ virtual inline float get_timing_Cin(typename ContextTypes::TimingReadContext& ctx) = 0; @@ -195,6 +196,8 @@ class RrGraphBase { virtual inline void set_timing_R(float R, typename ContextTypes::TimingWriteContext& ctx) = 0; virtual inline float get_timing_Tdel(typename ContextTypes::TimingReadContext& ctx) = 0; virtual inline void set_timing_Tdel(float Tdel, typename ContextTypes::TimingWriteContext& ctx) = 0; + virtual inline float get_timing_penalty_cost(typename ContextTypes::TimingReadContext& ctx) = 0; + virtual inline void set_timing_penalty_cost(float penalty_cost, typename ContextTypes::TimingWriteContext& ctx) = 0; /** Generated for complex type "sizing": * diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index e38f48ee727..155f8d4a178 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -82,7 +82,12 @@ std::pair ClassicLookahead::get_expected_delay_and_cong(int node, + R_upstream * (num_segs_same_dir * same_data.C_load + num_segs_ortho_dir * ortho_data.C_load) + ipin_data.T_linear; - return std::make_pair(params.criticality * Tdel, (1 - params.criticality) * cong_cost); + float penalty_cost = num_segs_same_dir * same_data.penalty_cost + + num_segs_ortho_dir * ortho_data.penalty_cost + + ipin_data.penalty_cost + + sink_data.penalty_cost; + + return std::make_pair(penalty_cost + params.criticality * Tdel, (1 - params.criticality) * cong_cost); } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ return std::make_pair(0., device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 5ecba46ddfa..625cb518803 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -882,6 +882,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx, device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin; device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal; device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout; + device_ctx.rr_switch_inf[rr_switch_idx].penalty_cost = device_ctx.arch_switch_inf[arch_switch_idx].penalty_cost; device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel; device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size; if (device_ctx.arch_switch_inf[arch_switch_idx].buf_size_type == BufferSize::AUTO) { diff --git a/vpr/src/route/rr_graph.xsd b/vpr/src/route/rr_graph.xsd index 728ea747e3c..76279182408 100644 --- a/vpr/src/route/rr_graph.xsd +++ b/vpr/src/route/rr_graph.xsd @@ -101,6 +101,7 @@ + diff --git a/vpr/src/route/rr_graph_indexed_data.cpp b/vpr/src/route/rr_graph_indexed_data.cpp index d5f0c95df2d..a343a310643 100644 --- a/vpr/src/route/rr_graph_indexed_data.cpp +++ b/vpr/src/route/rr_graph_indexed_data.cpp @@ -77,9 +77,13 @@ void alloc_and_load_rr_indexed_data(const std::vector& segment_in device_ctx.rr_indexed_data[i].T_linear = 0.; device_ctx.rr_indexed_data[i].T_quadratic = 0.; device_ctx.rr_indexed_data[i].C_load = 0.; + device_ctx.rr_indexed_data[i].penalty_cost = 0.; } + device_ctx.rr_indexed_data[IPIN_COST_INDEX].T_linear = device_ctx.rr_switch_inf[wire_to_ipin_switch].Tdel; + device_ctx.rr_indexed_data[IPIN_COST_INDEX].penalty_cost = device_ctx.rr_switch_inf[wire_to_ipin_switch].penalty_cost; + /* X-directed segments. */ for (iseg = 0; iseg < num_segment; iseg++) { index = CHANX_COST_INDEX_START + iseg; @@ -319,8 +323,8 @@ static void load_rr_indexed_data_T_values(int index_start, * segment. */ int itrack, cost_index; - float *C_total, *R_total; /* [0..device_ctx.rr_indexed_data.size() - 1] */ - double *switch_R_total, *switch_T_total, *switch_Cinternal_total; /* [0..device_ctx.rr_indexed_data.size() - 1] */ + float *C_total, *R_total; /* [0..device_ctx.rr_indexed_data.size() - 1] */ + double *switch_R_total, *switch_T_total, *switch_Cinternal_total, *switch_penalty_cost_total; /* [0..device_ctx.rr_indexed_data.size() - 1] */ short* switches_buffered; int* num_nodes_of_index; /* [0..device_ctx.rr_indexed_data.size() - 1] */ float Rnode, Cnode, Rsw, Tsw, Cinternalsw; @@ -340,6 +344,7 @@ static void load_rr_indexed_data_T_values(int index_start, switch_R_total = (double*)vtr::calloc(device_ctx.rr_indexed_data.size(), sizeof(double)); switch_T_total = (double*)vtr::calloc(device_ctx.rr_indexed_data.size(), sizeof(double)); switch_Cinternal_total = (double*)vtr::calloc(device_ctx.rr_indexed_data.size(), sizeof(double)); + switch_penalty_cost_total = (double*)vtr::calloc(device_ctx.rr_indexed_data.size(), sizeof(double)); switches_buffered = (short*)vtr::calloc(device_ctx.rr_indexed_data.size(), sizeof(short)); /* initialize switches_buffered array */ @@ -364,9 +369,23 @@ static void load_rr_indexed_data_T_values(int index_start, double avg_switch_R = 0; double avg_switch_T = 0; double avg_switch_Cinternal = 0; + double avg_switch_penalty_cost = 0; + int num_edges = device_ctx.rr_nodes[inode].num_edges(); int num_switches = 0; short buffered = UNDEFINED; - calculate_average_switch(inode, avg_switch_R, avg_switch_T, avg_switch_Cinternal, num_switches, buffered); + for (int iedge = 0; iedge < num_edges; iedge++) { + int to_node_index = device_ctx.rr_nodes[inode].edge_sink_node(iedge); + /* want to get C/R/Tdel/Cinternal of switches that connect this track segment to other track segments */ + if (device_ctx.rr_nodes[to_node_index].type() == CHANX || device_ctx.rr_nodes[to_node_index].type() == CHANY) { + int switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge); + avg_switch_R += device_ctx.rr_switch_inf[switch_index].R; + avg_switch_T += device_ctx.rr_switch_inf[switch_index].Tdel; + avg_switch_Cinternal += device_ctx.rr_switch_inf[switch_index].Cinternal; + avg_switch_penalty_cost += device_ctx.rr_switch_inf[switch_index].penalty_cost; + + num_switches++; + } + } if (num_switches == 0) { VTR_LOG_WARN("Track %d had no out-going switches\n", itrack); @@ -377,6 +396,7 @@ static void load_rr_indexed_data_T_values(int index_start, switch_R_total[cost_index] += avg_switch_R; switch_T_total[cost_index] += avg_switch_T; switch_Cinternal_total[cost_index] += avg_switch_Cinternal; + switch_penalty_cost_total[cost_index] += avg_switch_penalty_cost; if (buffered == UNDEFINED) { /* this segment does not have any outgoing edges to other general routing wires */ continue; @@ -461,6 +481,8 @@ static void load_rr_indexed_data_T_values(int index_start, device_ctx.rr_indexed_data[cost_index].T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; } + + device_ctx.rr_indexed_data[cost_index].penalty_cost = (float)switch_penalty_cost_total[cost_index] / num_nodes_of_index[cost_index]; } } @@ -470,6 +492,7 @@ static void load_rr_indexed_data_T_values(int index_start, free(switch_R_total); free(switch_T_total); free(switch_Cinternal_total); + free(switch_penalty_cost_total); free(switches_buffered); } diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index b4c09cff670..61da19a0932 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -355,6 +355,13 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return sw->Tdel; } + inline void set_timing_penalty_cost(float penalty_cost, t_rr_switch_inf*& sw) final { + sw->penalty_cost = penalty_cost; + } + inline float get_timing_penalty_cost(const t_rr_switch_inf*& sw) final { + return sw->penalty_cost; + } + /** Generated for complex type "switch": * * diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index 99923c1acd0..41748eedb5d 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -185,6 +185,7 @@ struct t_rr_indexed_data { float T_linear = std::numeric_limits::quiet_NaN(); float T_quadratic = std::numeric_limits::quiet_NaN(); float C_load = std::numeric_limits::quiet_NaN(); + float penalty_cost; }; #include "rr_node_impl.h" From 44b8830ed111bf820687e546b1c44c5165a0980e Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 8 Jul 2019 13:49:21 -0700 Subject: [PATCH 02/41] Integrate upstream uxsdcxx parser with connection box lookahead. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrcapnproto/CMakeLists.txt | 1 + libs/libvtrcapnproto/connection_map.capnp | 19 + .../gen/rr_graph_uxsdcxx.capnp | 37 +- vpr/src/base/read_options.cpp | 10 +- vpr/src/base/vpr_context.h | 3 + vpr/src/base/vpr_types.h | 1 + vpr/src/route/connection_box.cpp | 145 ++++ vpr/src/route/connection_box.h | 80 +++ .../route/connection_box_lookahead_map.cpp | 583 +++++++++++++++++ vpr/src/route/connection_box_lookahead_map.h | 17 + vpr/src/route/gen/rr_graph_uxsdcxx.h | 617 +++++++++++++++++- vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h | 127 +++- .../route/gen/rr_graph_uxsdcxx_interface.h | 76 ++- vpr/src/route/router_lookahead.cpp | 3 + vpr/src/route/rr_graph.cpp | 1 + vpr/src/route/rr_graph.xsd | 29 + vpr/src/route/rr_graph_reader.cpp | 1 + vpr/src/route/rr_graph_uxsdcxx_serializer.h | 170 +++++ vpr/src/route/rr_graph_writer.cpp | 1 + 19 files changed, 1903 insertions(+), 18 deletions(-) create mode 100644 libs/libvtrcapnproto/connection_map.capnp create mode 100644 vpr/src/route/connection_box.cpp create mode 100644 vpr/src/route/connection_box.h create mode 100644 vpr/src/route/connection_box_lookahead_map.cpp create mode 100644 vpr/src/route/connection_box_lookahead_map.h diff --git a/libs/libvtrcapnproto/CMakeLists.txt b/libs/libvtrcapnproto/CMakeLists.txt index 8067ab14c65..8407ee9e676 100644 --- a/libs/libvtrcapnproto/CMakeLists.txt +++ b/libs/libvtrcapnproto/CMakeLists.txt @@ -19,6 +19,7 @@ endif() # Each schema used should appear here. set(CAPNP_DEFS place_delay_model.capnp + connection_map.capnp matrix.capnp gen/rr_graph_uxsdcxx.capnp map_lookahead.capnp diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp new file mode 100644 index 00000000000..ac03ddfc9c0 --- /dev/null +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -0,0 +1,19 @@ +@0x876ec83c2fea5a18; + +using Matrix = import "matrix.capnp"; + +struct VprCostEntry { + delay @0 :Float32; + congestion @1 :Float32; +} + +struct VprVector2D { + x @0 :Int64; + y @1 :Int64; +} + +struct VprCostMap { + costMap @0 :List(Matrix.Matrix(VprCostEntry)); + offset @1 :List(VprVector2D); + segmentMap @2 :List(Int64); +} diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index e883d3bdd9b..519512f25e9 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -4,9 +4,9 @@ # # Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd # Input file: rr_graph.xsd -# md5sum of input file: c4f47394efd27f5819c943829c111204 +# md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 -@0x9e90feddf132e6a8; +@0xa90e1cca7f71265c; using Cxx = import "/capnp/c++.capnp"; $Cxx.namespace("ucap"); @@ -138,6 +138,18 @@ struct BlockTypes { blockTypes @0 :List(BlockType); } +struct ConnectionBoxDeclaration { + id @0 :UInt32; + name @1 :Text; +} + +struct ConnectionBoxes { + numBoxes @0 :UInt32; + xDim @1 :UInt32; + yDim @2 :UInt32; + connectionBoxes @3 :List(ConnectionBoxDeclaration); +} + struct GridLoc { blockTypeId @0 :Int32; heightOffset @1 :Int32; @@ -177,6 +189,18 @@ struct Metadata { metas @0 :List(Meta); } +struct CanonicalLoc { + x @0 :UInt32; + y @1 :UInt32; +} + +struct ConnectionBoxAnnotation { + id @0 :UInt32; + sitePinDelay @1 :Float32; + x @2 :UInt32; + y @3 :UInt32; +} + struct Node { capacity @0 :UInt32; direction @1 :NodeDirection; @@ -186,6 +210,8 @@ struct Node { timing @5 :NodeTiming; segment @6 :NodeSegment; metadata @7 :Metadata; + canonicalLoc @8 :CanonicalLoc; + connectionBox @9 :ConnectionBoxAnnotation; } struct RrNodes { @@ -211,7 +237,8 @@ struct RrGraph { switches @4 :Switches; segments @5 :Segments; blockTypes @6 :BlockTypes; - grid @7 :GridLocs; - rrNodes @8 :RrNodes; - rrEdges @9 :RrEdges; + connectionBoxes @7 :ConnectionBoxes; + grid @8 :GridLocs; + rrNodes @9 :RrNodes; + rrEdges @10 :RrEdges; } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 7c9e23f29ee..bed702d31a0 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -775,6 +775,8 @@ struct ParseRouterLookahead { conv_value.set_value(e_router_lookahead::MAP); else if (str == "extended_map") conv_value.set_value(e_router_lookahead::EXTENDED_MAP); + else if (str == "connection_box_map") + conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP); else { std::stringstream msg; msg << "Invalid conversion from '" @@ -788,10 +790,12 @@ struct ParseRouterLookahead { ConvertedValue to_str(e_router_lookahead val) { ConvertedValue conv_value; - if (val == e_router_lookahead::CLASSIC) + if (val == e_router_lookahead::CLASSIC) { conv_value.set_value("classic"); - else if (val == e_router_lookahead::MAP) { + } else if (val == e_router_lookahead::MAP) { conv_value.set_value("map"); + } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) { + conv_value.set_value("connection_box_map"); } else { VTR_ASSERT(val == e_router_lookahead::EXTENDED_MAP); conv_value.set_value("extended_map"); @@ -800,7 +804,7 @@ struct ParseRouterLookahead { } std::vector default_choices() { - return {"classic", "map", "extended_map"}; + return {"classic", "map", "extended_map", "connection_box_map"}; } }; diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 752c96cc331..5f328290016 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -24,6 +24,7 @@ #include "place_macro.h" #include "compressed_grid.h" #include "metadata_storage.h" +#include "connection_box.h" /** * @brief A Context is collection of state relating to a particular part of VPR @@ -231,6 +232,8 @@ struct DeviceContext : public Context { * Used to determine when reading rrgraph if file is already loaded. */ std::string read_rr_graph_filename; + + ConnectionBoxes connection_boxes; }; /** diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 00a215ee681..b17036638de 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -113,6 +113,7 @@ enum class e_router_lookahead { CLASSIC, /// ConnectionBoxes::connection_box_grid_size() const { + return size_; +} + +const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const { + if (!bool(box)) { + return nullptr; + } + + size_t index = size_t(box); + if (index >= boxes_.size()) { + return nullptr; + } + + return &boxes_.at(index); +} + +bool ConnectionBoxes::find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location, + float* site_pin_delay) const { + VTR_ASSERT(box_id != nullptr); + VTR_ASSERT(box_location != nullptr); + + if (inode >= (ssize_t)ipin_map_.size()) { + return false; + } + + const auto& conn_box_loc = ipin_map_[inode]; + if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { + return false; + } + + *box_id = conn_box_loc.box_id; + *box_location = conn_box_loc.box_location; + *site_pin_delay = conn_box_loc.site_pin_delay; + return true; +} + +// Clear IPIN map and set connection box grid size and box ids. +void ConnectionBoxes::reset_boxes(std::pair size, + const std::vector boxes) { + clear(); + + size_ = size; + boxes_ = boxes; +} + +void ConnectionBoxes::resize_nodes(size_t rr_node_size) { + ipin_map_.resize(rr_node_size); + ipin_map_.shrink_to_fit(); + canonical_loc_map_.resize(rr_node_size, + std::make_pair(-1, -1)); + canonical_loc_map_.shrink_to_fit(); +} + +void ConnectionBoxes::clear() { + ipin_map_.clear(); + size_ = std::make_pair(0, 0); + boxes_.clear(); + canonical_loc_map_.clear(); + sink_to_ipin_.clear(); +} + +void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay) { + // Ensure that box location is in bounds + VTR_ASSERT(box_location.first < size_.first); + VTR_ASSERT(box_location.second < size_.second); + + // Bounds check box_id + VTR_ASSERT(bool(box_id)); + VTR_ASSERT(size_t(box_id) < boxes_.size()); + + // Make sure sink map will not be invalidated upon insertion. + VTR_ASSERT(sink_to_ipin_.size() == 0); + + if (inode >= (ssize_t)(ipin_map_.size())) { + ipin_map_.resize(inode + 1); + } + ipin_map_[inode] = ConnBoxLoc(box_location, site_pin_delay, box_id); +} + +void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { + VTR_ASSERT(loc.first < size_.first); + VTR_ASSERT(loc.second < size_.second); + if (inode >= (ssize_t)(canonical_loc_map_.size())) { + canonical_loc_map_.resize(inode + 1); + } + canonical_loc_map_[inode] = loc; +} + +const std::pair* ConnectionBoxes::find_canonical_loc(int inode) const { + if (inode >= (ssize_t)canonical_loc_map_.size()) { + return nullptr; + } + + const auto& canon_loc = canonical_loc_map_[inode]; + if (canon_loc.first == size_t(-1)) { + return nullptr; + } + + return &canon_loc; +} + +void ConnectionBoxes::create_sink_back_ref() { + const auto& device_ctx = g_vpr_ctx.device(); + + sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); + + for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) { + const auto& ipin_node = device_ctx.rr_nodes[i]; + if (ipin_node.type() != IPIN) { + continue; + } + + if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) { + continue; + } + + for (auto edge : ipin_node.edges()) { + int sink_inode = ipin_node.edge_sink_node(edge); + VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK); + VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4); + auto& sink_to_ipin = sink_to_ipin_[sink_inode]; + sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i; + } + } +} + +const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes( + int inode) const { + return sink_to_ipin_[inode]; +} diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h new file mode 100644 index 00000000000..25fd0ea422c --- /dev/null +++ b/vpr/src/route/connection_box.h @@ -0,0 +1,80 @@ +#ifndef CONNECTION_BOX_H +#define CONNECTION_BOX_H +// Some routing graphs have connectivity driven by types of connection boxes. +// This class relates IPIN rr nodes with connection box type and locations, used +// for connection box driven map lookahead. + +#include +#include "vtr_strong_id.h" +#include "vtr_flat_map.h" +#include "vtr_range.h" +#include + +struct connection_box_tag {}; +typedef vtr::StrongId ConnectionBoxId; + +struct ConnectionBox { + std::string name; +}; + +struct ConnBoxLoc { + ConnBoxLoc() + : box_location(std::make_pair(-1, -1)) {} + ConnBoxLoc( + const std::pair& a_box_location, + float a_site_pin_delay, + ConnectionBoxId a_box_id) + : box_location(a_box_location) + , box_id(a_box_id) + , site_pin_delay(a_site_pin_delay) {} + + std::pair box_location; + ConnectionBoxId box_id; + float site_pin_delay; +}; + +struct SinkToIpin { + int ipin_nodes[4]; + int ipin_count; +}; + +class ConnectionBoxes { + public: + ConnectionBoxes(); + + size_t num_connection_box_types() const; + std::pair connection_box_grid_size() const; + const ConnectionBox* get_connection_box(ConnectionBoxId box) const; + + bool find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location, + float* site_pin_delay) const; + const std::pair* find_canonical_loc(int inode) const; + + // Clear IPIN map and set connection box grid size and box ids. + void clear(); + void reset_boxes(std::pair size, + const std::vector boxes); + void resize_nodes(size_t rr_node_size); + + void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay); + void add_canonical_loc(int inode, std::pair loc); + + // Create map from SINK's back to IPIN's + // + // This must be called after all connection boxes have been added. + void create_sink_back_ref(); + const SinkToIpin& find_sink_connection_boxes( + int inode) const; + + private: + std::pair size_; + std::vector boxes_; + std::vector ipin_map_; + std::vector sink_to_ipin_; + std::vector> + canonical_loc_map_; +}; + +#endif diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp new file mode 100644 index 00000000000..2b1b3b48c34 --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -0,0 +1,583 @@ +#include "connection_box_lookahead_map.h" + +#include +#include + +#include "connection_box.h" +#include "rr_node.h" +#include "router_lookahead_map_utils.h" +#include "globals.h" +#include "vtr_math.h" +#include "vtr_time.h" +#include "echo_files.h" + +#include "route_timing.h" + +#include "capnp/serialize.h" +#include "connection_map.capnp.h" +#include "ndmatrix_serdes.h" +#include "mmap_file.h" +#include "serdes_utils.h" +#include "route_common.h" + +/* we're profiling routing cost over many tracks for each wire type, so we'll + * have many cost entries at each |dx|,|dy| offset. There are many ways to + * "boil down" the many costs at each offset to a single entry for a given + * (wire type, chan_type) combination we can take the smallest cost, the + * average, median, etc. This define selects the method we use. + * + * See e_representative_entry_method */ +#define REPRESENTATIVE_ENTRY_METHOD SMALLEST + +#define REF_X 25 +#define REF_Y 23 + +static int signum(int x) { + if (x > 0) return 1; + if (x < 0) + return -1; + else + return 0; +} + +typedef std::vector, Cost_Entry>> t_routing_cost_map; +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* cost_map); + +class CostMap { + public: + void set_segment_count(size_t seg_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize(seg_count); + offset_.resize(seg_count); + + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; + + segment_map_[i] = from_seg_index; + } + } + + int node_to_segment(int from_node_ind) { + return segment_map_[from_node_ind]; + } + + Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + int dx = delta_x - offset_[from_seg_index].first; + int dy = delta_y - offset_[from_seg_index].second; + const auto& cost_map = cost_map_[from_seg_index]; + + if (dx < 0) { + dx = 0; + } + if (dy < 0) { + dy = 0; + } + + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } + + return cost_map_[from_seg_index][dx][dy]; + } + + void set_cost_map(int from_seg_index, + const t_routing_cost_map& cost_map, + e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + + // Find coordinate offset for this segment. + int min_dx = 0; + int min_dy = 0; + int max_dx = 0; + int max_dy = 0; + for (const auto& entry : cost_map) { + min_dx = std::min(entry.first.first, min_dx); + min_dy = std::min(entry.first.second, min_dy); + + max_dx = std::max(entry.first.first, max_dx); + max_dy = std::max(entry.first.second, max_dy); + } + + offset_[from_seg_index].first = min_dx; + offset_[from_seg_index].second = min_dy; + size_t dim_x = max_dx - min_dx + 1; + size_t dim_y = max_dy - min_dy + 1; + + vtr::NdMatrix expansion_cost_map( + {dim_x, dim_y}); + + for (const auto& entry : cost_map) { + int x = entry.first.first - min_dx; + int y = entry.first.second - min_dy; + expansion_cost_map[x][y].add_cost_entry( + method, entry.second.delay, + entry.second.congestion); + } + + cost_map_[from_seg_index] = vtr::NdMatrix( + {dim_x, dim_y}); + + /* set the lookahead cost map entries with a representative cost + * entry from routing_cost_map */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + } + } + + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + + if (!cost_entry.valid()) { + Cost_Entry copied_entry = get_nearby_cost_entry( + from_seg_index, + offset_[from_seg_index].first + ix, + offset_[from_seg_index].second + iy); + cost_map_[from_seg_index][ix][iy] = copied_entry; + } + } + } + } + + Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + /* compute the slope from x,y to 0,0 and then move towards 0,0 by one + * unit to get the coordinates of the cost entry to be copied */ + + float slope; + int copy_x, copy_y; + if (x == 0 || y == 0) { + slope = std::numeric_limits::infinity(); + copy_x = x - signum(x); + copy_y = y - signum(y); + } else { + slope = (float)y / (float)x; + if (slope >= 1.0) { + copy_y = y - signum(y); + copy_x = vtr::nint((float)y / slope); + } else { + copy_x = x - signum(x); + copy_y = vtr::nint((float)x * slope); + } + } + + Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + + /* if the entry to be copied is also empty, recurse */ + if (copy_entry.valid()) { + return copy_entry; + } else if (copy_x == 0 && copy_y == 0) { + return Cost_Entry(); + } + + return get_nearby_cost_entry(segment_index, copy_x, copy_y); + } + + void print_cost_map(const std::vector& segment_inf, + const char* fname) { + FILE* fp = vtr::fopen(fname, "w"); + for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { + fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), + iseg, + offset_[iseg].first, + offset_[iseg].second); + for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { + for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { + fprintf(fp, "%.4g,\t", + cost_map_[iseg][ix][iy].delay); + } + fprintf(fp, "\n"); + } + fprintf(fp, "\n\n"); + } + + fclose(fp); + } + + void read(const std::string& file); + void write(const std::string& file) const; + + private: + std::vector> cost_map_; + std::vector> offset_; + std::vector segment_map_; +}; + +static CostMap g_cost_map; + +class StartNode { + public: + StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) + : start_x_(start_x) + , start_y_(start_y) + , rr_type_(rr_type) + , seg_index_(seg_index) + , index_(0) {} + int get_next_node() { + const auto& device_ctx = g_vpr_ctx.device(); + const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + + for (; index_ < channel_node_list.size(); index_++) { + int node_ind = channel_node_list[index_]; + + if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { + continue; + } + + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + continue; + } + + int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); + int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; + if (node_seg_ind == seg_index_) { + index_ += 1; + return node_ind; + } + } + + return UNDEFINED; + } + + private: + int start_x_; + int start_y_; + t_rr_type rr_type_; + int seg_index_; + size_t index_; +}; + +// Minimum size of search for channels to profile. kMinProfile results +// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. +// +// Making this value larger will increase the sample size, but also the runtime +// to produce the lookahead. +static constexpr int kMinProfile = 1; + +// Maximum size of search for channels to profile. Once search is outside of +// kMinProfile distance, lookahead will stop searching once: +// - At least one channel has been profiled +// - kMaxProfile is exceeded. +static constexpr int kMaxProfile = 7; + +static void compute_connection_box_lookahead( + const std::vector& segment_inf) { + size_t num_segments = segment_inf.size(); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + /* free previous delay map and allocate new one */ + g_cost_map.set_segment_count(segment_inf.size()); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + t_routing_cost_map cost_map; + + int count = 0; + + int dx = 0; + int dy = 0; + //int start_x = vtr::nint(device_ctx.grid.width()/2); + //int start_y = vtr::nint(device_ctx.grid.height()/2); + int start_x = REF_X; + int start_y = REF_Y; + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, &cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + if (count == 0) { + VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", + segment_inf[iseg].name.c_str(), iseg); + } + + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + g_cost_map.set_cost_map(iseg, cost_map, + REPRESENTATIVE_ENTRY_METHOD); + } + + if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { + g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); + } +} + +static float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) { + if (from_node_ind == to_node_ind) { + return 0.f; + } + + auto& device_ctx = g_vpr_ctx.device(); + + std::pair from_location; + std::pair to_location; + auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); + + if (to_node_type == SINK) { + const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); + if (sink_to_ipin.ipin_count > 1) { + float cost = std::numeric_limits::infinity(); + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_connection_box_lookahead_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + } + + return cost; + } else if (sink_to_ipin.ipin_count == 1) { + to_node_ind = sink_to_ipin.ipin_nodes[0]; + if (from_node_ind == to_node_ind) { + return 0.f; + } + } else { + return std::numeric_limits::infinity(); + } + } + + if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); + } + + to_location = box_location; + } else { + const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); + if (!to_canonical_loc) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); + } + + to_location = *to_canonical_loc; + } + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", + from_node_ind, to_node_ind); + } + + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + + int from_seg_index = g_cost_map.node_to_segment(from_node_ind); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + float expected_delay = cost_entry.delay; + float expected_congestion = cost_entry.congestion; + + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + return expected_cost; +} + +/* runs Dijkstra's algorithm from specified node until all nodes have been + * visited. Each time a pin is visited, the delay/congestion information + * to that pin is stored to an entry in the routing_cost_map */ +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* routing_cost_map) { + auto& device_ctx = g_vpr_ctx.device(); + + /* a list of boolean flags (one for each rr node) to figure out if a + * certain node has already been expanded */ + std::vector node_expanded(device_ctx.rr_nodes.size(), false); + /* for each node keep a list of the cost with which that node has been + * visited (used to determine whether to push a candidate node onto the + * expansion queue */ + std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + /* a priority queue for expansion */ + std::priority_queue pq; + + /* first entry has no upstream delay or congestion */ + util::PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + pq.push(first_entry); + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } + + /* now do routing */ + while (!pq.empty()) { + util::PQ_Entry current = pq.top(); + pq.pop(); + + int node_ind = current.rr_node_ind; + + /* check that we haven't already expanded from this node */ + if (node_expanded[node_ind]) { + continue; + } + + /* if this node is an ipin record its congestion/delay in the routing_cost_map */ + if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + + routing_cost_map->push_back(std::make_pair( + std::make_pair(delta_x, delta_y), + Cost_Entry( + current.delay, + current.congestion_upstream))); + } + + expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); + node_expanded[node_ind] = true; + } +} + +void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { + compute_connection_box_lookahead(segment_inf); +} + +float ConnectionBoxMapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { + auto& device_ctx = g_vpr_ctx.device(); + + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + + if (rr_type == CHANX || rr_type == CHANY) { + return get_connection_box_lookahead_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); + } +} + +void ConnectionBoxMapLookahead::read(const std::string& file) { + g_cost_map.read(file); +} +void ConnectionBoxMapLookahead::write(const std::string& file) const { + g_cost_map.write(file); +} + +static void ToCostEntry(Cost_Entry* out, const VprCostEntry::Reader& in) { + out->delay = in.getDelay(); + out->congestion = in.getCongestion(); +} + +static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { + out->setDelay(in.delay); + out->setCongestion(in.congestion); +} + +void CostMap::read(const std::string& file) { + MmapFile f(file); + + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + auto cost_map = reader.getRoot(); + + { + const auto& segment_map = cost_map.getSegmentMap(); + segment_map_.resize(segment_map.size()); + auto dst_iter = segment_map_.begin(); + for (const auto& src : segment_map) { + *dst_iter++ = src; + } + } + + { + const auto& offset = cost_map.getOffset(); + offset_.resize(offset.size()); + auto dst_iter = offset_.begin(); + for (const auto& src : offset) { + *dst_iter++ = std::make_pair(src.getX(), src.getY()); + } + } + + { + const auto& cost_maps = cost_map.getCostMap(); + cost_map_.resize(cost_maps.size()); + auto dst_iter = cost_map_.begin(); + for (const auto& src : cost_maps) { + ToNdMatrix<2, VprCostEntry, Cost_Entry>(&(*dst_iter++), src, ToCostEntry); + } + } +} + +void CostMap::write(const std::string& file) const { + ::capnp::MallocMessageBuilder builder; + + auto cost_map = builder.initRoot(); + + { + auto segment_map = cost_map.initSegmentMap(segment_map_.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + segment_map.set(i, segment_map_[i]); + } + } + + { + auto offset = cost_map.initOffset(offset_.size()); + for (size_t i = 0; i < offset_.size(); ++i) { + auto elem = offset[i]; + elem.setX(offset_[i].first); + elem.setY(offset_[i].second); + } + } + + { + auto cost_maps = cost_map.initCostMap(cost_map_.size()); + for (size_t i = 0; i < cost_map_.size(); ++i) { + Matrix::Builder elem = cost_maps[i]; + FromNdMatrix<2, VprCostEntry, Cost_Entry>( + &elem, cost_map_[i], FromCostEntry); + } + } + + writeMessageToFile(file, &builder); +} diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h new file mode 100644 index 00000000000..03e1a140f0b --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -0,0 +1,17 @@ +#ifndef CONNECTION_BOX_LOOKAHEAD_H_ +#define CONNECTION_BOX_LOOKAHEAD_H_ + +#include +#include "physical_types.h" +#include "router_lookahead.h" + +class ConnectionBoxMapLookahead : public RouterLookahead { + public: + float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + void compute(const std::vector& segment_inf) override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; +}; + +#endif diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx.h b/vpr/src/route/gen/rr_graph_uxsdcxx.h index 1db3e015433..e417e8fc578 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: c4f47394efd27f5819c943829c111204 + * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 */ #include @@ -80,6 +80,12 @@ inline void load_block_type_required_attributes(const pugi::xml_node& root, int* template inline void load_block_types(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error); +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error); +template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error); template @@ -98,6 +104,12 @@ inline void load_meta(const pugi::xml_node& root, T& out, Context& context, cons template inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template +inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error); +template +inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error); +template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error); template @@ -130,6 +142,8 @@ inline void write_block_type(T& in, std::ostream& os, const void* data, void* it template inline void write_block_types(T& in, std::ostream& os, const void* data, void* iter); template +inline void write_connection_boxes(T& in, std::ostream& os, const void* data, void* iter); +template inline void write_grid_locs(T& in, std::ostream& os, const void* data, void* iter); template inline void write_meta(T& in, std::ostream& os, const void* data, void* iter); @@ -287,6 +301,17 @@ constexpr const char* atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types { BLOCK_TYPE }; constexpr const char* gtok_lookup_t_block_types[] = {"block_type"}; +enum class atok_t_connection_box_declaration { ID, + NAME }; +constexpr const char* atok_lookup_t_connection_box_declaration[] = {"id", "name"}; + +enum class gtok_t_connection_boxes { CONNECTION_BOX }; +constexpr const char* gtok_lookup_t_connection_boxes[] = {"connection_box"}; +enum class atok_t_connection_boxes { NUM_BOXES, + X_DIM, + Y_DIM }; +constexpr const char* atok_lookup_t_connection_boxes[] = {"num_boxes", "x_dim", "y_dim"}; + enum class atok_t_grid_loc { BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, @@ -317,11 +342,24 @@ constexpr const char* atok_lookup_t_meta[] = {"name"}; enum class gtok_t_metadata { META }; constexpr const char* gtok_lookup_t_metadata[] = {"meta"}; + +enum class atok_t_canonical_loc { X, + Y }; +constexpr const char* atok_lookup_t_canonical_loc[] = {"x", "y"}; + +enum class atok_t_connection_box_annotation { ID, + SITE_PIN_DELAY, + X, + Y }; +constexpr const char* atok_lookup_t_connection_box_annotation[] = {"id", "site_pin_delay", "x", "y"}; + enum class gtok_t_node { LOC, TIMING, SEGMENT, - METADATA }; -constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata"}; + METADATA, + CANONICAL_LOC, + CONNECTION_BOX }; +constexpr const char* gtok_lookup_t_node[] = {"loc", "timing", "segment", "metadata", "canonical_loc", "connection_box"}; enum class atok_t_node { CAPACITY, DIRECTION, ID, @@ -343,10 +381,11 @@ enum class gtok_t_rr_graph { CHANNELS, SWITCHES, SEGMENTS, BLOCK_TYPES, + CONNECTION_BOXES, GRID, RR_NODES, RR_EDGES }; -constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "grid", "rr_nodes", "rr_edges"}; +constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "connection_boxes", "grid", "rr_nodes", "rr_edges"}; enum class atok_t_rr_graph { TOOL_COMMENT, TOOL_NAME, TOOL_VERSION }; @@ -1159,6 +1198,122 @@ inline gtok_t_block_types lex_node_t_block_types(const char* in, const std::func noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); } +inline atok_t_connection_box_declaration lex_attr_t_connection_box_declaration(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 2: + switch (in[0]) { + case onechar('i', 0, 8): + switch (in[1]) { + case onechar('d', 0, 8): + return atok_t_connection_box_declaration::ID; + break; + default: + break; + } + break; + default: + break; + } + break; + case 4: + switch (*((triehash_uu32*)&in[0])) { + case onechar('n', 0, 32) | onechar('a', 8, 32) | onechar('m', 16, 32) | onechar('e', 24, 32): + return atok_t_connection_box_declaration::NAME; + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline gtok_t_connection_boxes lex_node_t_connection_boxes(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 14: + switch (*((triehash_uu64*)&in[0])) { + case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): + switch (*((triehash_uu32*)&in[8])) { + case onechar('o', 0, 32) | onechar('n', 8, 32) | onechar('_', 16, 32) | onechar('b', 24, 32): + switch (in[12]) { + case onechar('o', 0, 8): + switch (in[13]) { + case onechar('x', 0, 8): + return gtok_t_connection_boxes::CONNECTION_BOX; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); +} +inline atok_t_connection_boxes lex_attr_t_connection_boxes(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 5: + switch (*((triehash_uu32*)&in[0])) { + case onechar('x', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::X_DIM; + break; + default: + break; + } + break; + case onechar('y', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::Y_DIM; + break; + default: + break; + } + break; + default: + break; + } + break; + case 9: + switch (*((triehash_uu64*)&in[0])) { + case onechar('n', 0, 64) | onechar('u', 8, 64) | onechar('m', 16, 64) | onechar('_', 24, 64) | onechar('b', 32, 64) | onechar('o', 40, 64) | onechar('x', 48, 64) | onechar('e', 56, 64): + switch (in[8]) { + case onechar('s', 0, 8): + return atok_t_connection_boxes::NUM_BOXES; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + inline atok_t_grid_loc lex_attr_t_grid_loc(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1405,6 +1560,90 @@ inline gtok_t_metadata lex_node_t_metadata(const char* in, const std::function.").c_str()); } +inline atok_t_canonical_loc lex_attr_t_canonical_loc(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 1: + switch (in[0]) { + case onechar('x', 0, 8): + return atok_t_canonical_loc::X; + break; + case onechar('y', 0, 8): + return atok_t_canonical_loc::Y; + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline atok_t_connection_box_annotation lex_attr_t_connection_box_annotation(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 1: + switch (in[0]) { + case onechar('x', 0, 8): + return atok_t_connection_box_annotation::X; + break; + case onechar('y', 0, 8): + return atok_t_connection_box_annotation::Y; + break; + default: + break; + } + break; + case 2: + switch (in[0]) { + case onechar('i', 0, 8): + switch (in[1]) { + case onechar('d', 0, 8): + return atok_t_connection_box_annotation::ID; + break; + default: + break; + } + break; + default: + break; + } + break; + case 14: + switch (*((triehash_uu64*)&in[0])) { + case onechar('s', 0, 64) | onechar('i', 8, 64) | onechar('t', 16, 64) | onechar('e', 24, 64) | onechar('_', 32, 64) | onechar('p', 40, 64) | onechar('i', 48, 64) | onechar('n', 56, 64): + switch (*((triehash_uu32*)&in[8])) { + case onechar('_', 0, 32) | onechar('d', 8, 32) | onechar('e', 16, 32) | onechar('l', 24, 32): + switch (in[12]) { + case onechar('a', 0, 8): + switch (in[13]) { + case onechar('y', 0, 8): + return atok_t_connection_box_annotation::SITE_PIN_DELAY; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1486,6 +1725,54 @@ inline gtok_t_node lex_node_t_node(const char* in, const std::function* report_error) { + std::bitset<2> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_box_declaration::ID: + *id = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_declaration::NAME: + /* Attribute name set after element init */ + break; + default: + break; /* Not possible. */ + } + } + std::bitset<2> test_astate = astate | std::bitset<2>(0b00); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_declaration, report_error); +} + +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error) { + std::bitset<3> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_boxes in = lex_attr_t_connection_boxes(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_boxes::NUM_BOXES: + *num_boxes = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::X_DIM: + *x_dim = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::Y_DIM: + *y_dim = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<3> test_astate = astate | std::bitset<3>(0b000); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_boxes, report_error); +} + inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error) { std::bitset<5> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -2504,6 +2855,58 @@ inline void load_node_segment_required_attributes(const pugi::xml_node& root, in if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_segment, report_error); } +inline void load_canonical_loc_required_attributes(const pugi::xml_node& root, unsigned int* x, unsigned int* y, const std::function* report_error) { + std::bitset<2> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_canonical_loc in = lex_attr_t_canonical_loc(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_canonical_loc::X: + *x = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_canonical_loc::Y: + *y = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<2> test_astate = astate | std::bitset<2>(0b00); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_canonical_loc, report_error); +} + +inline void load_connection_box_annotation_required_attributes(const pugi::xml_node& root, unsigned int* id, float* site_pin_delay, unsigned int* x, unsigned int* y, const std::function* report_error) { + std::bitset<4> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_annotation in = lex_attr_t_connection_box_annotation(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_box_annotation::ID: + *id = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::SITE_PIN_DELAY: + *site_pin_delay = load_float(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::X: + *x = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_annotation::Y: + *y = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<4> test_astate = astate | std::bitset<4>(0b0000); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_annotation, report_error); +} + inline void load_node_required_attributes(const pugi::xml_node& root, unsigned int* capacity, unsigned int* id, enum_node_type* type, const std::function* report_error) { std::bitset<4> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -3231,6 +3634,94 @@ inline void load_block_types(const pugi::xml_node& root, T& out, Context& contex if (state != 0) dfa_error("end of input", gstate_t_block_types[state], gtok_lookup_t_block_types, 1, report_error); } +template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + switch (in) { + case atok_t_connection_box_declaration::ID: + /* Attribute id is already set */ + break; + case atok_t_connection_box_declaration::NAME: + out.set_connection_box_declaration_name(attr.value(), context); + break; + default: + break; /* Not possible. */ + } + } + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + +constexpr int NUM_T_CONNECTION_BOXES_STATES = 2; +constexpr const int NUM_T_CONNECTION_BOXES_INPUTS = 1; +constexpr int gstate_t_connection_boxes[NUM_T_CONNECTION_BOXES_STATES][NUM_T_CONNECTION_BOXES_INPUTS] = { + {0}, + {0}, +}; +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + // Preallocate arrays by counting child nodes (if any) + size_t connection_box_count = 0; + { + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: + connection_box_count += 1; + break; + default: + break; /* Not possible. */ + } + } + + out.preallocate_connection_boxes_connection_box(context, connection_box_count); + } + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: { + unsigned int connection_box_declaration_id; + memset(&connection_box_declaration_id, 0, sizeof(connection_box_declaration_id)); + load_connection_box_declaration_required_attributes(node, &connection_box_declaration_id, report_error); + auto child_context = out.add_connection_boxes_connection_box(context, connection_box_declaration_id); + load_connection_box_declaration(node, out, child_context, report_error, offset_debug); + out.finish_connection_boxes_connection_box(child_context); + } break; + default: + break; /* Not possible. */ + } + } + if (state != 0) dfa_error("end of input", gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); +} + template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3467,6 +3958,32 @@ inline void load_metadata(const pugi::xml_node& root, T& out, Context& context, if (state != 0) dfa_error("end of input", gstate_t_metadata[state], gtok_lookup_t_metadata, 1, report_error); } +template +inline void load_canonical_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + +template +inline void load_connection_box_annotation(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + template inline void load_node(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3496,7 +4013,7 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons } } - std::bitset<4> gstate = 0; + std::bitset<6> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_node in = lex_node_t_node(node.name(), report_error); @@ -3544,11 +4061,35 @@ inline void load_node(const pugi::xml_node& root, T& out, Context& context, cons load_metadata(node, out, child_context, report_error, offset_debug); out.finish_node_metadata(child_context); } break; + case gtok_t_node::CANONICAL_LOC: { + unsigned int canonical_loc_x; + memset(&canonical_loc_x, 0, sizeof(canonical_loc_x)); + unsigned int canonical_loc_y; + memset(&canonical_loc_y, 0, sizeof(canonical_loc_y)); + load_canonical_loc_required_attributes(node, &canonical_loc_x, &canonical_loc_y, report_error); + auto child_context = out.init_node_canonical_loc(context, canonical_loc_x, canonical_loc_y); + load_canonical_loc(node, out, child_context, report_error, offset_debug); + out.finish_node_canonical_loc(child_context); + } break; + case gtok_t_node::CONNECTION_BOX: { + unsigned int connection_box_annotation_id; + memset(&connection_box_annotation_id, 0, sizeof(connection_box_annotation_id)); + float connection_box_annotation_site_pin_delay; + memset(&connection_box_annotation_site_pin_delay, 0, sizeof(connection_box_annotation_site_pin_delay)); + unsigned int connection_box_annotation_x; + memset(&connection_box_annotation_x, 0, sizeof(connection_box_annotation_x)); + unsigned int connection_box_annotation_y; + memset(&connection_box_annotation_y, 0, sizeof(connection_box_annotation_y)); + load_connection_box_annotation_required_attributes(node, &connection_box_annotation_id, &connection_box_annotation_site_pin_delay, &connection_box_annotation_x, &connection_box_annotation_y, report_error); + auto child_context = out.init_node_connection_box(context, connection_box_annotation_id, connection_box_annotation_site_pin_delay, connection_box_annotation_x, connection_box_annotation_y); + load_connection_box_annotation(node, out, child_context, report_error, offset_debug); + out.finish_node_connection_box(child_context); + } break; default: break; /* Not possible. */ } } - std::bitset<4> test_gstate = gstate | std::bitset<4>(0b1110); + std::bitset<6> test_gstate = gstate | std::bitset<6>(0b111110); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_node, report_error); } @@ -3745,7 +4286,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, } } - std::bitset<7> gstate = 0; + std::bitset<8> gstate = 0; for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { *offset_debug = node.offset_debug(); gtok_t_rr_graph in = lex_node_t_rr_graph(node.name(), report_error); @@ -3774,6 +4315,18 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, load_block_types(node, out, child_context, report_error, offset_debug); out.finish_rr_graph_block_types(child_context); } break; + case gtok_t_rr_graph::CONNECTION_BOXES: { + unsigned int connection_boxes_num_boxes; + memset(&connection_boxes_num_boxes, 0, sizeof(connection_boxes_num_boxes)); + unsigned int connection_boxes_x_dim; + memset(&connection_boxes_x_dim, 0, sizeof(connection_boxes_x_dim)); + unsigned int connection_boxes_y_dim; + memset(&connection_boxes_y_dim, 0, sizeof(connection_boxes_y_dim)); + load_connection_boxes_required_attributes(node, &connection_boxes_num_boxes, &connection_boxes_x_dim, &connection_boxes_y_dim, report_error); + auto child_context = out.init_rr_graph_connection_boxes(context, connection_boxes_num_boxes, connection_boxes_x_dim, connection_boxes_y_dim); + load_connection_boxes(node, out, child_context, report_error, offset_debug); + out.finish_rr_graph_connection_boxes(child_context); + } break; case gtok_t_rr_graph::GRID: { auto child_context = out.init_rr_graph_grid(context); load_grid_locs(node, out, child_context, report_error, offset_debug); @@ -3793,7 +4346,7 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, break; /* Not possible. */ } } - std::bitset<7> test_gstate = gstate | std::bitset<7>(0b0000000); + std::bitset<8> test_gstate = gstate | std::bitset<8>(0b00010000); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_rr_graph, report_error); } @@ -3984,6 +4537,22 @@ inline void write_block_types(T& in, std::ostream& os, Context& context) { } } +template +inline void write_connection_boxes(T& in, std::ostream& os, Context& context) { + (void)in; + (void)os; + (void)context; + { + for (size_t i = 0, n = in.num_connection_boxes_connection_box(context); i < n; i++) { + auto child_context = in.get_connection_boxes_connection_box(i, context); + os << "\n"; + } + } +} + template inline void write_grid_locs(T& in, std::ostream& os, Context& context) { (void)in; @@ -4070,6 +4639,26 @@ inline void write_node(T& in, std::ostream& os, Context& context) { os << "\n"; } } + { + if (in.has_node_canonical_loc(context)) { + auto child_context = in.get_node_canonical_loc(context); + os << "\n"; + } + } + { + if (in.has_node_connection_box(context)) { + auto child_context = in.get_node_connection_box(context); + os << "\n"; + } + } } template @@ -4156,6 +4745,18 @@ inline void write_rr_graph(T& in, std::ostream& os, Context& context) { write_block_types(in, os, child_context); os << "\n"; } + { + if (in.has_rr_graph_connection_boxes(context)) { + auto child_context = in.get_rr_graph_connection_boxes(context); + os << ""; + write_connection_boxes(in, os, child_context); + os << "\n"; + } + } { auto child_context = in.get_rr_graph_grid(context); os << "\n"; diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h index 48e160866e6..f6ad76c9cf8 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: c4f47394efd27f5819c943829c111204 + * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 */ #include @@ -54,6 +54,10 @@ void load_block_type_capnp_type(const ucap::BlockType::Reader& root, T& out, Con template void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template +void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template +void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_grid_locs_capnp_type(const ucap::GridLocs::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -68,6 +72,10 @@ void load_meta_capnp_type(const ucap::Meta::Reader& root, T& out, Context& conte template void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template +void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template +void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_rr_nodes_capnp_type(const ucap::RrNodes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -98,6 +106,8 @@ inline void write_block_type_capnp_type(T& in, ucap::BlockType::Builder& root, C template inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, Context& context); template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context); +template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context); template inline void write_meta_capnp_type(T& in, ucap::Meta::Builder& root, Context& context); @@ -615,6 +625,39 @@ inline void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& stack->pop_back(); } +template +inline void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + out.set_connection_box_declaration_name(root.getName().cStr(), context); +} + +template +inline void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + stack->push_back(std::make_pair("getConnectionBox", 0)); + { + auto data = root.getConnectionBoxes(); + out.preallocate_connection_boxes_connection_box(context, data.size()); + for (const auto& el : data) { + auto child_context = out.add_connection_boxes_connection_box(context, el.getId()); + load_connection_box_declaration_capnp_type(el, out, child_context, report_error, stack); + out.finish_connection_boxes_connection_box(child_context); + stack->back().second += 1; + } + } + stack->pop_back(); +} + template inline void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -711,6 +754,24 @@ inline void load_metadata_capnp_type(const ucap::Metadata::Reader& root, T& out, stack->pop_back(); } +template +inline void load_canonical_loc_capnp_type(const ucap::CanonicalLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; +} + +template +inline void load_connection_box_annotation_capnp_type(const ucap::ConnectionBoxAnnotation::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; +} + template inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -752,6 +813,22 @@ inline void load_node_capnp_type(const ucap::Node::Reader& root, T& out, Context out.finish_node_metadata(child_context); } stack->pop_back(); + stack->push_back(std::make_pair("getCanonicalLoc", 0)); + if (root.hasCanonicalLoc()) { + auto child_el = root.getCanonicalLoc(); + auto child_context = out.init_node_canonical_loc(context, child_el.getX(), child_el.getY()); + load_canonical_loc_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_node_canonical_loc(child_context); + } + stack->pop_back(); + stack->push_back(std::make_pair("getConnectionBox", 0)); + if (root.hasConnectionBox()) { + auto child_el = root.getConnectionBox(); + auto child_context = out.init_node_connection_box(context, child_el.getId(), child_el.getSitePinDelay(), child_el.getX(), child_el.getY()); + load_connection_box_annotation_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_node_connection_box(child_context); + } + stack->pop_back(); } template @@ -859,6 +936,14 @@ inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, out.finish_rr_graph_block_types(child_context); } stack->pop_back(); + stack->push_back(std::make_pair("getConnectionBoxes", 0)); + if (root.hasConnectionBoxes()) { + auto child_el = root.getConnectionBoxes(); + auto child_context = out.init_rr_graph_connection_boxes(context, child_el.getNumBoxes(), child_el.getXDim(), child_el.getYDim()); + load_connection_boxes_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_rr_graph_connection_boxes(child_context); + } + stack->pop_back(); stack->push_back(std::make_pair("getGrid", 0)); if (root.hasGrid()) { auto child_el = root.getGrid(); @@ -1054,6 +1139,21 @@ inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, } } +template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context) { + (void)in; + (void)root; + + size_t num_connection_boxes_connection_boxes = in.num_connection_boxes_connection_box(context); + auto connection_boxes_connection_boxes = root.initConnectionBoxes(num_connection_boxes_connection_boxes); + for (size_t i = 0; i < num_connection_boxes_connection_boxes; i++) { + auto connection_boxes_connection_box = connection_boxes_connection_boxes[i]; + auto child_context = in.get_connection_boxes_connection_box(i, context); + connection_boxes_connection_box.setId(in.get_connection_box_declaration_id(child_context)); + connection_boxes_connection_box.setName(in.get_connection_box_declaration_name(child_context)); + } +} + template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context) { (void)in; @@ -1129,6 +1229,22 @@ inline void write_node_capnp_type(T& in, ucap::Node::Builder& root, Context& con auto child_context = in.get_node_metadata(context); write_metadata_capnp_type(in, node_metadata, child_context); } + + if (in.has_node_canonical_loc(context)) { + auto node_canonical_loc = root.initCanonicalLoc(); + auto child_context = in.get_node_canonical_loc(context); + node_canonical_loc.setX(in.get_canonical_loc_x(child_context)); + node_canonical_loc.setY(in.get_canonical_loc_y(child_context)); + } + + if (in.has_node_connection_box(context)) { + auto node_connection_box = root.initConnectionBox(); + auto child_context = in.get_node_connection_box(context); + node_connection_box.setId(in.get_connection_box_annotation_id(child_context)); + node_connection_box.setSitePinDelay(in.get_connection_box_annotation_site_pin_delay(child_context)); + node_connection_box.setX(in.get_connection_box_annotation_x(child_context)); + node_connection_box.setY(in.get_connection_box_annotation_y(child_context)); + } } template @@ -1208,6 +1324,15 @@ inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Conte write_block_types_capnp_type(in, rr_graph_block_types, child_context); } + if (in.has_rr_graph_connection_boxes(context)) { + auto rr_graph_connection_boxes = root.initConnectionBoxes(); + auto child_context = in.get_rr_graph_connection_boxes(context); + rr_graph_connection_boxes.setNumBoxes(in.get_connection_boxes_num_boxes(child_context)); + rr_graph_connection_boxes.setXDim(in.get_connection_boxes_x_dim(child_context)); + rr_graph_connection_boxes.setYDim(in.get_connection_boxes_y_dim(child_context)); + write_connection_boxes_capnp_type(in, rr_graph_connection_boxes, child_context); + } + { auto child_context = in.get_rr_graph_grid(context); auto rr_graph_grid = root.initGrid(); diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h index 840f3cf6b45..4406f7531bb 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: c4f47394efd27f5819c943829c111204 + * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 */ #include @@ -68,6 +68,8 @@ struct DefaultRrGraphContextTypes { using PinClassReadContext = void*; using BlockTypeReadContext = void*; using BlockTypesReadContext = void*; + using ConnectionBoxDeclarationReadContext = void*; + using ConnectionBoxesReadContext = void*; using GridLocReadContext = void*; using GridLocsReadContext = void*; using NodeLocReadContext = void*; @@ -75,6 +77,8 @@ struct DefaultRrGraphContextTypes { using NodeSegmentReadContext = void*; using MetaReadContext = void*; using MetadataReadContext = void*; + using CanonicalLocReadContext = void*; + using ConnectionBoxAnnotationReadContext = void*; using NodeReadContext = void*; using RrNodesReadContext = void*; using EdgeReadContext = void*; @@ -95,6 +99,8 @@ struct DefaultRrGraphContextTypes { using PinClassWriteContext = void*; using BlockTypeWriteContext = void*; using BlockTypesWriteContext = void*; + using ConnectionBoxDeclarationWriteContext = void*; + using ConnectionBoxesWriteContext = void*; using GridLocWriteContext = void*; using GridLocsWriteContext = void*; using NodeLocWriteContext = void*; @@ -102,6 +108,8 @@ struct DefaultRrGraphContextTypes { using NodeSegmentWriteContext = void*; using MetaWriteContext = void*; using MetadataWriteContext = void*; + using CanonicalLocWriteContext = void*; + using ConnectionBoxAnnotationWriteContext = void*; using NodeWriteContext = void*; using RrNodesWriteContext = void*; using EdgeWriteContext = void*; @@ -351,6 +359,35 @@ class RrGraphBase { virtual inline size_t num_block_types_block_type(typename ContextTypes::BlockTypesReadContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypeReadContext get_block_types_block_type(int n, typename ContextTypes::BlockTypesReadContext& ctx) = 0; + /** Generated for complex type "connection_box_declaration": + * + * + * + * + */ + virtual inline unsigned int get_connection_box_declaration_id(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline const char* get_connection_box_declaration_name(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline void set_connection_box_declaration_name(const char* name, typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + + /** Generated for complex type "connection_boxes": + * + * + * + * + * + * + * + * + */ + virtual inline unsigned int get_connection_boxes_num_boxes(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_x_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_y_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline void preallocate_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, size_t size) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationWriteContext add_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, unsigned int id) = 0; + virtual inline void finish_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + virtual inline size_t num_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationReadContext get_connection_boxes_connection_box(int n, typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + /** Generated for complex type "grid_loc": * * @@ -440,6 +477,28 @@ class RrGraphBase { virtual inline size_t num_metadata_meta(typename ContextTypes::MetadataReadContext& ctx) = 0; virtual inline typename ContextTypes::MetaReadContext get_metadata_meta(int n, typename ContextTypes::MetadataReadContext& ctx) = 0; + /** Generated for complex type "canonical_loc": + * + * + * + * + */ + virtual inline unsigned int get_canonical_loc_x(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; + virtual inline unsigned int get_canonical_loc_y(typename ContextTypes::CanonicalLocReadContext& ctx) = 0; + + /** Generated for complex type "connection_box_annotation": + * + * + * + * + * + * + */ + virtual inline unsigned int get_connection_box_annotation_id(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline float get_connection_box_annotation_site_pin_delay(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline unsigned int get_connection_box_annotation_x(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + virtual inline unsigned int get_connection_box_annotation_y(typename ContextTypes::ConnectionBoxAnnotationReadContext& ctx) = 0; + /** Generated for complex type "node": * * @@ -447,6 +506,8 @@ class RrGraphBase { * * * + * + * * * * @@ -474,6 +535,14 @@ class RrGraphBase { virtual inline void finish_node_metadata(typename ContextTypes::MetadataWriteContext& ctx) = 0; virtual inline typename ContextTypes::MetadataReadContext get_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; virtual inline bool has_node_metadata(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline typename ContextTypes::CanonicalLocWriteContext init_node_canonical_loc(typename ContextTypes::NodeWriteContext& ctx, unsigned int x, unsigned int y) = 0; + virtual inline void finish_node_canonical_loc(typename ContextTypes::CanonicalLocWriteContext& ctx) = 0; + virtual inline typename ContextTypes::CanonicalLocReadContext get_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline bool has_node_canonical_loc(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxAnnotationWriteContext init_node_connection_box(typename ContextTypes::NodeWriteContext& ctx, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) = 0; + virtual inline void finish_node_connection_box(typename ContextTypes::ConnectionBoxAnnotationWriteContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxAnnotationReadContext get_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; + virtual inline bool has_node_connection_box(typename ContextTypes::NodeReadContext& ctx) = 0; /** Generated for complex type "rr_nodes": * @@ -526,6 +595,7 @@ class RrGraphBase { * * * + * * * * @@ -553,6 +623,10 @@ class RrGraphBase { virtual inline typename ContextTypes::BlockTypesWriteContext init_rr_graph_block_types(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_block_types(typename ContextTypes::BlockTypesWriteContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypesReadContext get_rr_graph_block_types(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesWriteContext init_rr_graph_connection_boxes(typename ContextTypes::RrGraphWriteContext& ctx, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) = 0; + virtual inline void finish_rr_graph_connection_boxes(typename ContextTypes::ConnectionBoxesWriteContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesReadContext get_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline bool has_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsWriteContext init_rr_graph_grid(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_grid(typename ContextTypes::GridLocsWriteContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsReadContext get_rr_graph_grid(typename ContextTypes::RrGraphReadContext& ctx) = 0; diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index 155f8d4a178..3d9e2612619 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -2,6 +2,7 @@ #include "router_lookahead_map.h" #include "router_lookahead_extended_map.h" +#include "connection_box_lookahead_map.h" #include "vpr_error.h" #include "globals.h" #include "route_timing.h" @@ -16,6 +17,8 @@ static std::unique_ptr make_router_lookahead_object(e_router_lo return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::EXTENDED_MAP) { return std::make_unique(); + } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::NO_OP) { return std::make_unique(); } diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 625cb518803..3afa14c6462 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -32,6 +32,7 @@ #include "rr_graph_writer.h" #include "rr_graph_reader.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "rr_graph_clock.h" #include "edge_groups.h" diff --git a/vpr/src/route/rr_graph.xsd b/vpr/src/route/rr_graph.xsd index 76279182408..464ccf9677f 100644 --- a/vpr/src/route/rr_graph.xsd +++ b/vpr/src/route/rr_graph.xsd @@ -262,12 +262,26 @@ + + + + + + + + + + + + + + @@ -284,6 +298,20 @@ + + + + + + + + + + + + + + @@ -331,6 +359,7 @@ + diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index d76ca4fdcf9..681e3520bb5 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -62,6 +62,7 @@ void load_rr_file(const t_graph_type graph_type, &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, + &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index 61da19a0932..0a1f5168938 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -230,6 +230,9 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using NodeReadContext = const t_rr_node; using EdgeReadContext = const EdgeWalker*; using RrEdgesReadContext = EdgeWalker; + using ConnectionBoxDeclarationReadContext = ConnectionBoxId; + using CanonicalLocReadContext = const std::pair*; + using ConnectionBoxAnnotationReadContext = const std::tuple, float>; using TimingWriteContext = t_rr_switch_inf*; using SizingWriteContext = t_rr_switch_inf*; using SwitchWriteContext = t_rr_switch_inf*; @@ -245,6 +248,7 @@ struct RrGraphContextTypes : public uxsd::DefaultRrGraphContextTypes { using MetadataWriteContext = MetadataBind; using NodeWriteContext = int; using EdgeWriteContext = MetadataBind; + using ConnectionBoxDeclarationWriteContext = ConnectionBox*; }; class RrGraphSerializer final : public uxsd::RrGraphBase { @@ -262,6 +266,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf, std::vector* rr_indexed_data, t_rr_node_indices* rr_node_indices, + ConnectionBoxes* connection_boxes, const size_t num_arch_switches, const t_arch_switch_inf* arch_switch_inf, const std::vector& segment_inf, @@ -276,6 +281,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { , rr_switch_inf_(rr_switch_inf) , rr_indexed_data_(rr_indexed_data) , rr_node_indices_(rr_node_indices) + , connection_boxes_(connection_boxes) , read_rr_graph_filename_(read_rr_graph_filename) , graph_type_(graph_type) , base_cost_type_(base_cost_type) @@ -596,6 +602,160 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { } } + inline unsigned int get_connection_box_annotation_id(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return (size_t)box_id; + } + inline float get_connection_box_annotation_site_pin_delay(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return site_pin_delay; + } + inline unsigned int get_connection_box_annotation_x(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return box_location.first; + } + inline unsigned int get_connection_box_annotation_y(const std::tuple, float>& box_info) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + std::tie(box_id, box_location, site_pin_delay) = box_info; + + return box_location.second; + } + + inline unsigned int get_canonical_loc_x(const std::pair*& data) final { + return data->first; + } + inline unsigned int get_canonical_loc_y(const std::pair*& data) final { + return data->second; + } + + inline const std::pair* get_node_canonical_loc(const t_rr_node& node) final { + return connection_boxes_->find_canonical_loc(get_node_id(node)); + } + inline bool has_node_canonical_loc(const t_rr_node& node) final { + return connection_boxes_->find_canonical_loc(get_node_id(node)) != nullptr; + } + inline const std::tuple, float> get_node_connection_box(const t_rr_node& node) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + + if (!connection_boxes_->find_connection_box(get_node_id(node), + &box_id, &box_location, &site_pin_delay)) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, + "No connection box for %d", node); + } + return std::make_tuple(box_id, box_location, site_pin_delay); + } + inline bool has_node_connection_box(const t_rr_node& node) final { + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + + return connection_boxes_->find_connection_box(get_node_id(node), + &box_id, &box_location, &site_pin_delay); + } + inline void* init_node_canonical_loc(int& inode, unsigned int x, unsigned int y) final { + connection_boxes_->add_canonical_loc(inode, std::make_pair(x, y)); + return nullptr; + } + inline void finish_node_canonical_loc(void*& /*ctx*/) final {} + + inline void* init_node_connection_box(int& inode, unsigned int id, float site_pin_delay, unsigned int x, unsigned int y) final { + connection_boxes_->add_connection_box(inode, + ConnectionBoxId(id), + std::make_pair(x, y), + site_pin_delay); + return nullptr; + } + inline void finish_node_connection_box(void*& /*ctx*/) final {} + + /** Generated for complex type "connection_box_declaration": + * + * + * + * + */ + inline void set_connection_box_declaration_name(const char* name, ConnectionBox*& box) final { + box->name.assign(name); + } + + inline unsigned int get_connection_box_declaration_id(ConnectionBoxId& id) final { + return (size_t)id; + } + inline const char* get_connection_box_declaration_name(ConnectionBoxId& id) final { + return connection_boxes_->get_connection_box(id)->name.c_str(); + } + inline unsigned int get_connection_boxes_num_boxes(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types(); + } + inline unsigned int get_connection_boxes_x_dim(void*& /*ctx*/) final { + return connection_boxes_->connection_box_grid_size().first; + } + inline unsigned int get_connection_boxes_y_dim(void*& /*ctx*/) final { + return connection_boxes_->connection_box_grid_size().second; + } + inline size_t num_connection_boxes_connection_box(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types(); + } + inline ConnectionBoxId get_connection_boxes_connection_box(int n, void*& /*ctx*/) final { + return ConnectionBoxId(n); + } + + /** Generated for complex type "connection_boxes": + * + * + * + * + * + * + * + * + */ + inline void preallocate_connection_boxes_connection_box(void*& /*ctx*/, size_t size) final { + if (size != boxes_.size()) { + report_error("Number of connection_box %zu is greater than num_boxes %zu on ", + size, boxes_.size()); + } + } + inline ConnectionBox* add_connection_boxes_connection_box(void*& /*ctx*/, unsigned int id) final { + if (id >= boxes_.size()) { + report_error("ConnectionBox id %u is greater than num_boxes on ", + id); + } + return &boxes_[id]; + } + inline void finish_connection_boxes_connection_box(ConnectionBox*& /*ctx*/) final {} + + private: + int box_x_dim_; + int box_y_dim_; + std::vector boxes_; + + public: + inline void* init_rr_graph_connection_boxes(void*& /*ctx*/, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) final { + box_x_dim_ = x_dim; + box_y_dim_ = y_dim; + boxes_.resize(num_boxes); + return nullptr; + } + inline void finish_rr_graph_connection_boxes(void*& /*ctx*/) final { + connection_boxes_->reset_boxes(std::make_pair(box_x_dim_, box_y_dim_), std::move(boxes_)); + } + /** Generated for complex type "node_timing": * * @@ -771,6 +931,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // If make_room_in_vector was used for allocation, this ensures that // the final storage has no overhead. rr_nodes_->shrink_to_fit(); + connection_boxes_->resize_nodes(rr_nodes_->size()); } /** Generated for complex type "rr_edges": @@ -1457,6 +1618,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline void* get_rr_graph_block_types(void*& /*ctx*/) final { return nullptr; } + inline void* get_rr_graph_connection_boxes(void*& /*ctx*/) final { + return nullptr; + } + inline bool has_rr_graph_connection_boxes(void*& /*ctx*/) final { + return connection_boxes_->num_connection_box_types() > 0; + } inline void* get_rr_graph_grid(void*& /*ctx*/) final { return nullptr; } @@ -1483,6 +1650,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { (*rr_indexed_data_)[i].seg_index = seg_index_[i]; } + connection_boxes_->create_sink_back_ref(); + VTR_ASSERT(read_rr_graph_filename_ != nullptr); VTR_ASSERT(read_rr_graph_name_ != nullptr); read_rr_graph_filename_->assign(read_rr_graph_name_); @@ -1851,6 +2020,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { std::vector* rr_switch_inf_; std::vector* rr_indexed_data_; t_rr_node_indices* rr_node_indices_; + ConnectionBoxes* connection_boxes_; std::string* read_rr_graph_filename_; // Constant data for loads and writes. diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp index 86fed376c05..57d2bb9c0e8 100644 --- a/vpr/src/route/rr_graph_writer.cpp +++ b/vpr/src/route/rr_graph_writer.cpp @@ -40,6 +40,7 @@ void write_rr_graph(const char* file_name) { &device_ctx.rr_switch_inf, &device_ctx.rr_indexed_data, &device_ctx.rr_node_indices, + &device_ctx.connection_boxes, device_ctx.num_arch_switches, device_ctx.arch_switch_inf, device_ctx.rr_segments, From ba534461c0231299ae115251b31069af9e9f597e Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 15 Aug 2019 18:09:05 -0700 Subject: [PATCH 03/41] Complete connection box lookahead to be aware of connection box. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrcapnproto/connection_map.capnp | 4 +- .../route/connection_box_lookahead_map.cpp | 313 ++++++++++-------- 2 files changed, 179 insertions(+), 138 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index ac03ddfc9c0..1ca672108c8 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -13,7 +13,7 @@ struct VprVector2D { } struct VprCostMap { - costMap @0 :List(Matrix.Matrix(VprCostEntry)); - offset @1 :List(VprVector2D); + costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); + offset @1 :Matrix.Matrix(VprVector2D); segmentMap @2 :List(Int64); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 2b1b3b48c34..a95fa385893 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -12,14 +12,16 @@ #include "echo_files.h" #include "route_timing.h" - -#include "capnp/serialize.h" -#include "connection_map.capnp.h" -#include "ndmatrix_serdes.h" -#include "mmap_file.h" -#include "serdes_utils.h" #include "route_common.h" +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "connection_map.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif + /* we're profiling routing cost over many tracks for each wire type, so we'll * have many cost entries at each |dx|,|dy| offset. There are many ways to * "boil down" the many costs at each offset to a single entry for a given @@ -40,17 +42,17 @@ static int signum(int x) { return 0; } -typedef std::vector, Cost_Entry>> t_routing_cost_map; +typedef std::vector, std::pair, Cost_Entry, ConnectionBoxId>> t_routing_cost_map; static void run_dijkstra(int start_node_ind, - t_routing_cost_map* cost_map); + t_routing_cost_map* routing_cost_map); class CostMap { public: - void set_segment_count(size_t seg_count) { + void set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); - cost_map_.resize(seg_count); - offset_.resize(seg_count); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); @@ -68,11 +70,11 @@ class CostMap { return segment_map_[from_node_ind]; } - Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - int dx = delta_x - offset_[from_seg_index].first; - int dy = delta_y - offset_[from_seg_index].second; - const auto& cost_map = cost_map_[from_seg_index]; + int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; + int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; + const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; if (dx < 0) { dx = 0; @@ -88,12 +90,21 @@ class CostMap { dy = cost_map.dim_size(1) - 1; } - return cost_map_[from_seg_index][dx][dy]; + return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } void set_cost_map(int from_seg_index, const t_routing_cost_map& cost_map, e_representative_entry_method method) { + const auto& device_ctx = g_vpr_ctx.device(); + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + ++box_id) { + set_cost_map(from_seg_index, ConnectionBoxId(box_id), cost_map, method); + } + } + + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const t_routing_cost_map& cost_map, e_representative_entry_method method) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); // Find coordinate offset for this segment. @@ -102,15 +113,18 @@ class CostMap { int max_dx = 0; int max_dy = 0; for (const auto& entry : cost_map) { - min_dx = std::min(entry.first.first, min_dx); - min_dy = std::min(entry.first.second, min_dy); + if (std::get<3>(entry) != box_id) { + continue; + } + min_dx = std::min(std::get<1>(entry).first, min_dx); + min_dy = std::min(std::get<1>(entry).second, min_dy); - max_dx = std::max(entry.first.first, max_dx); - max_dy = std::max(entry.first.second, max_dy); + max_dx = std::max(std::get<1>(entry).first, max_dx); + max_dy = std::max(std::get<1>(entry).second, max_dy); } - offset_[from_seg_index].first = min_dx; - offset_[from_seg_index].second = min_dy; + offset_[from_seg_index][size_t(box_id)].first = min_dx; + offset_[from_seg_index][size_t(box_id)].second = min_dy; size_t dim_x = max_dx - min_dx + 1; size_t dim_y = max_dy - min_dy + 1; @@ -118,41 +132,45 @@ class CostMap { {dim_x, dim_y}); for (const auto& entry : cost_map) { - int x = entry.first.first - min_dx; - int y = entry.first.second - min_dy; + if (std::get<3>(entry) != box_id) { + continue; + } + int x = std::get<1>(entry).first - min_dx; + int y = std::get<1>(entry).second - min_dy; expansion_cost_map[x][y].add_cost_entry( - method, entry.second.delay, - entry.second.congestion); + method, std::get<2>(entry).delay, + std::get<2>(entry).congestion); } - cost_map_[from_seg_index] = vtr::NdMatrix( + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( {dim_x, dim_y}); /* set the lookahead cost map entries with a representative cost * entry from routing_cost_map */ for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + cost_map_[from_seg_index][size_t(box_id)][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); } } /* find missing cost entries and fill them in by copying a nearby cost entry */ for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + Cost_Entry cost_entry = cost_map_[from_seg_index][size_t(box_id)][ix][iy]; if (!cost_entry.valid()) { Cost_Entry copied_entry = get_nearby_cost_entry( from_seg_index, - offset_[from_seg_index].first + ix, - offset_[from_seg_index].second + iy); - cost_map_[from_seg_index][ix][iy] = copied_entry; + box_id, + offset_[from_seg_index][size_t(box_id)].first + ix, + offset_[from_seg_index][size_t(box_id)].second + iy); + cost_map_[from_seg_index][size_t(box_id)][ix][iy] = copied_entry; } } } } - Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + Cost_Entry get_nearby_cost_entry(int segment_index, ConnectionBoxId box_id, int x, int y) { /* compute the slope from x,y to 0,0 and then move towards 0,0 by one * unit to get the coordinates of the cost entry to be copied */ @@ -173,7 +191,7 @@ class CostMap { } } - Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + Cost_Entry copy_entry = find_cost(segment_index, box_id, copy_x, copy_y); /* if the entry to be copied is also empty, recurse */ if (copy_entry.valid()) { @@ -182,41 +200,31 @@ class CostMap { return Cost_Entry(); } - return get_nearby_cost_entry(segment_index, copy_x, copy_y); - } - - void print_cost_map(const std::vector& segment_inf, - const char* fname) { - FILE* fp = vtr::fopen(fname, "w"); - for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { - fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), - iseg, - offset_[iseg].first, - offset_[iseg].second); - for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { - for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { - fprintf(fp, "%.4g,\t", - cost_map_[iseg][ix][iy].delay); - } - fprintf(fp, "\n"); - } - fprintf(fp, "\n\n"); - } - - fclose(fp); + return get_nearby_cost_entry(segment_index, box_id, copy_x, copy_y); } void read(const std::string& file); void write(const std::string& file) const; private: - std::vector> cost_map_; - std::vector> offset_; + vtr::NdMatrix, 2> cost_map_; + vtr::NdMatrix, 2> offset_; std::vector segment_map_; }; static CostMap g_cost_map; +const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { + const auto& device_ctx = g_vpr_ctx.device(); + if (rr_type == CHANX) { + return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; + } else if (rr_type == CHANY) { + return device_ctx.rr_node_indices[rr_type][start_x][start_y][0]; + } else { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown channel type %d", rr_type); + } +} + class StartNode { public: StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) @@ -227,7 +235,8 @@ class StartNode { , index_(0) {} int get_next_node() { const auto& device_ctx = g_vpr_ctx.device(); - const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + const std::vector& channel_node_list = get_rr_node_indcies( + rr_type_, start_x_, start_y_); for (; index_ < channel_node_list.size(); index_++) { int node_ind = channel_node_list[index_]; @@ -273,13 +282,45 @@ static constexpr int kMinProfile = 1; // - kMaxProfile is exceeded. static constexpr int kMaxProfile = 7; +static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { + int count = 0; + int dx = 0; + int dy = 0; + + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + return count; +} + static void compute_connection_box_lookahead( const std::vector& segment_inf) { size_t num_segments = segment_inf.size(); vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); /* free previous delay map and allocate new one */ - g_cost_map.set_segment_count(segment_inf.size()); + auto& device_ctx = g_vpr_ctx.device(); + g_cost_map.set_counts(segment_inf.size(), + device_ctx.connection_boxes.num_connection_box_types()); /* run Dijkstra's algorithm for each segment type & channel type combination */ for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { @@ -289,48 +330,33 @@ static void compute_connection_box_lookahead( t_routing_cost_map cost_map; int count = 0; - - int dx = 0; - int dy = 0; - //int start_x = vtr::nint(device_ctx.grid.width()/2); - //int start_y = vtr::nint(device_ctx.grid.height()/2); - int start_x = REF_X; - int start_y = REF_Y; - while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { - for (e_rr_type chan_type : {CHANX, CHANY}) { - StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); - - for (int start_node_ind = start_node.get_next_node(); - start_node_ind != UNDEFINED; - start_node_ind = start_node.get_next_node()) { - count += 1; - - /* run Dijkstra's algorithm */ - run_dijkstra(start_node_ind, &cost_map); - } - } - - if (dy < dx) { - dy += 1; - } else { - dx += 1; - } - } + count += search_at(iseg, REF_X, REF_Y, &cost_map); + count += search_at(iseg, REF_Y, REF_X, &cost_map); + count += search_at(iseg, 1, 1, &cost_map); + count += search_at(iseg, 76, 1, &cost_map); + count += search_at(iseg, 25, 25, &cost_map); + count += search_at(iseg, 25, 27, &cost_map); + count += search_at(iseg, 75, 26, &cost_map); if (count == 0) { VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", segment_inf[iseg].name.c_str(), iseg); } +#if 0 + for(const auto & e : cost_map) { + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + std::get<0>(e).first, std::get<0>(e).second, + std::get<1>(e).first, std::get<1>(e).second, + std::get<2>(e).delay, std::get<2>(e).congestion); + } +#endif + /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ g_cost_map.set_cost_map(iseg, cost_map, REPRESENTATIVE_ENTRY_METHOD); } - - if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { - g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); - } } static float get_connection_box_lookahead_map_cost(int from_node_ind, @@ -369,24 +395,16 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, } } - if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - bool found = device_ctx.connection_boxes.find_connection_box( - to_node_ind, &box_id, &box_location, &site_pin_delay); - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); - } - - to_location = box_location; - } else { - const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); - if (!to_canonical_loc) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); - } - - to_location = *to_canonical_loc; + if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { + VPR_THROW(VPR_ERROR_ROUTE, "Not an IPIN/SINK, is %d", to_node_ind); + } + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); } const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); @@ -395,11 +413,11 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, from_node_ind, to_node_ind); } - ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); - ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); int from_seg_index = g_cost_map.node_to_segment(from_node_ind); - Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, box_id, dx, dy); float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; @@ -461,11 +479,13 @@ static void run_dijkstra(int start_node_ind, int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); - routing_cost_map->push_back(std::make_pair( + routing_cost_map->push_back(std::make_tuple( + std::make_pair(start_node_ind, node_ind), std::make_pair(delta_x, delta_y), Cost_Entry( current.delay, - current.congestion_upstream))); + current.congestion_upstream), + box_id)); } expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); @@ -496,6 +516,17 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +#ifndef VTR_ENABLE_CAPNPROTO + +void ConnectionBoxMapLookahead::read(const std::string& file) { + VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::read not implemented"); +} +void ConnectionBoxMapLookahead::write(const std::string& file) const { + VPR_THROW(VPR_ERROR_ROUTE, "ConnectionBoxMapLookahead::write not implemented"); +} + +#else + void ConnectionBoxMapLookahead::read(const std::string& file) { g_cost_map.read(file); } @@ -513,6 +544,27 @@ static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { out->setCongestion(in.congestion); } +static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { + *out = std::make_pair(in.getX(), in.getY()); +} + +static void FromVprVector2D(VprVector2D::Builder* out, const std::pair& in) { + out->setX(in.first); + out->setY(in.second); +} + +static void ToMatrixCostEntry(vtr::NdMatrix* out, + const Matrix::Reader& in) { + ToNdMatrix<2, VprCostEntry, Cost_Entry>(out, in, ToCostEntry); +} + +static void FromMatrixCostEntry( + Matrix::Builder* out, + const vtr::NdMatrix& in) { + FromNdMatrix<2, VprCostEntry, Cost_Entry>( + out, in, FromCostEntry); +} + void CostMap::read(const std::string& file) { MmapFile f(file); @@ -532,20 +584,14 @@ void CostMap::read(const std::string& file) { { const auto& offset = cost_map.getOffset(); - offset_.resize(offset.size()); - auto dst_iter = offset_.begin(); - for (const auto& src : offset) { - *dst_iter++ = std::make_pair(src.getX(), src.getY()); - } + ToNdMatrix<2, VprVector2D, std::pair>( + &offset_, offset, ToVprVector2D); } { const auto& cost_maps = cost_map.getCostMap(); - cost_map_.resize(cost_maps.size()); - auto dst_iter = cost_map_.begin(); - for (const auto& src : cost_maps) { - ToNdMatrix<2, VprCostEntry, Cost_Entry>(&(*dst_iter++), src, ToCostEntry); - } + ToNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_map_, cost_maps, ToMatrixCostEntry); } } @@ -562,22 +608,17 @@ void CostMap::write(const std::string& file) const { } { - auto offset = cost_map.initOffset(offset_.size()); - for (size_t i = 0; i < offset_.size(); ++i) { - auto elem = offset[i]; - elem.setX(offset_[i].first); - elem.setY(offset_[i].second); - } + auto offset = cost_map.initOffset(); + FromNdMatrix<2, VprVector2D, std::pair>( + &offset, offset_, FromVprVector2D); } { - auto cost_maps = cost_map.initCostMap(cost_map_.size()); - for (size_t i = 0; i < cost_map_.size(); ++i) { - Matrix::Builder elem = cost_maps[i]; - FromNdMatrix<2, VprCostEntry, Cost_Entry>( - &elem, cost_map_[i], FromCostEntry); - } + auto cost_maps = cost_map.initCostMap(); + FromNdMatrix<2, Matrix, vtr::NdMatrix>( + &cost_maps, cost_map_, FromMatrixCostEntry); } writeMessageToFile(file, &builder); } +#endif From 202b6924bffc9a9353d43aed06fd6cdd15cf2d2c Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 27 Sep 2019 11:08:58 -0700 Subject: [PATCH 04/41] Mark file location function as static. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a95fa385893..b82efe419ed 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -214,7 +214,7 @@ class CostMap { static CostMap g_cost_map; -const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { +static const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { const auto& device_ctx = g_vpr_ctx.device(); if (rr_type == CHANX) { return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; From a29f76c577d5593917df36c4595d260097f20c10 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 11 Oct 2019 16:43:37 -0700 Subject: [PATCH 05/41] Add bounds checking when search grid. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index b82efe419ed..33ac7e3aa57 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -283,11 +283,20 @@ static constexpr int kMinProfile = 1; static constexpr int kMaxProfile = 7; static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { + const auto& device_ctx = g_vpr_ctx.device(); + int count = 0; int dx = 0; int dy = 0; while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + if (start_x + dx >= device_ctx.grid.width()) { + break; + } + if (start_y + dy >= device_ctx.grid.height()) { + break; + } + for (e_rr_type chan_type : {CHANX, CHANY}) { StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); From bc730a47caaf249359801ab217f1720f8ab53b67 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 11 Oct 2019 16:28:47 -0700 Subject: [PATCH 06/41] Updates to connection box lookahead map. - New sampling method for connection box lookahead map. - New spiral hole filling algorithm - Use a lightweight version of PQ_Entry (PQ_Entry_Lite) - Use maps in run_dijkstra() - Move context of router_lookahead_map_utils inside of util namespace. Signed-off-by: Dusty DeWeese Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- .../route/connection_box_lookahead_map.cpp | 812 ++++++++++-------- vpr/src/route/connection_box_lookahead_map.h | 57 ++ 2 files changed, 524 insertions(+), 345 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 33ac7e3aa57..cff9fe2971b 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -9,6 +9,7 @@ #include "globals.h" #include "vtr_math.h" #include "vtr_time.h" +#include "vtr_geometry.h" #include "echo_files.h" #include "route_timing.h" @@ -29,348 +30,261 @@ * average, median, etc. This define selects the method we use. * * See e_representative_entry_method */ -#define REPRESENTATIVE_ENTRY_METHOD SMALLEST +#define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST -#define REF_X 25 -#define REF_Y 23 +static constexpr int SAMPLE_GRID_SIZE = 4; -static int signum(int x) { - if (x > 0) return 1; - if (x < 0) - return -1; - else - return 0; -} +typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; -typedef std::vector, std::pair, Cost_Entry, ConnectionBoxId>> t_routing_cost_map; static void run_dijkstra(int start_node_ind, - t_routing_cost_map* routing_cost_map); - -class CostMap { - public: - void set_counts(size_t seg_count, size_t box_count) { - cost_map_.clear(); - offset_.clear(); - cost_map_.resize({seg_count, box_count}); - offset_.resize({seg_count, box_count}); - - const auto& device_ctx = g_vpr_ctx.device(); - segment_map_.resize(device_ctx.rr_nodes.size()); - for (size_t i = 0; i < segment_map_.size(); ++i) { - auto& from_node = device_ctx.rr_nodes[i]; + std::vector* routing_costs); +static void find_inodes_for_segment_types(std::vector* inodes_for_segment); - int from_cost_index = from_node.cost_index(); - int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; +void CostMap::set_counts(size_t seg_count, size_t box_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); - segment_map_[i] = from_seg_index; - } - } + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; - int node_to_segment(int from_node_ind) { - return segment_map_[from_node_ind]; + segment_map_[i] = from_seg_index; } +} - Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; - int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; +int CostMap::node_to_segment(int from_node_ind) const { + return segment_map_[from_node_ind]; +} - if (dx < 0) { - dx = 0; - } - if (dy < 0) { - dy = 0; - } +util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; + if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { + return util::Cost_Entry(); + } - if (dx >= (ssize_t)cost_map.dim_size(0)) { - dx = cost_map.dim_size(0) - 1; - } - if (dy >= (ssize_t)cost_map.dim_size(1)) { - dy = cost_map.dim_size(1) - 1; - } + int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; + int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; + if (dx < 0) { + dx = 0; } - - void set_cost_map(int from_seg_index, - const t_routing_cost_map& cost_map, - e_representative_entry_method method) { - const auto& device_ctx = g_vpr_ctx.device(); - for (size_t box_id = 0; - box_id < device_ctx.connection_boxes.num_connection_box_types(); - ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), cost_map, method); - } + if (dy < 0) { + dy = 0; } - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const t_routing_cost_map& cost_map, e_representative_entry_method method) { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - - // Find coordinate offset for this segment. - int min_dx = 0; - int min_dy = 0; - int max_dx = 0; - int max_dy = 0; - for (const auto& entry : cost_map) { - if (std::get<3>(entry) != box_id) { - continue; - } - min_dx = std::min(std::get<1>(entry).first, min_dx); - min_dy = std::min(std::get<1>(entry).second, min_dy); + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } - max_dx = std::max(std::get<1>(entry).first, max_dx); - max_dy = std::max(std::get<1>(entry).second, max_dy); - } + return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; +} - offset_[from_seg_index][size_t(box_id)].first = min_dx; - offset_[from_seg_index][size_t(box_id)].second = min_dy; - size_t dim_x = max_dx - min_dx + 1; - size_t dim_y = max_dy - min_dy + 1; +void CostMap::set_cost_map(int from_seg_index, + const RoutingCosts& costs, + util::e_representative_entry_method method) { + // sort the entries + const auto& device_ctx = g_vpr_ctx.device(); + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + ++box_id) { + set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs, method); + } +} - vtr::NdMatrix expansion_cost_map( - {dim_x, dim_y}); +void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - for (const auto& entry : cost_map) { - if (std::get<3>(entry) != box_id) { - continue; - } - int x = std::get<1>(entry).first - min_dx; - int y = std::get<1>(entry).second - min_dy; - expansion_cost_map[x][y].add_cost_entry( - method, std::get<2>(entry).delay, - std::get<2>(entry).congestion); + // calculate the bounding box + vtr::Rect bounds; + for (const auto& entry : costs) { + if (entry.first.box_id == box_id) { + bounds.expand_bounding_box(vtr::Rect(entry.first.delta)); } + } - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {dim_x, dim_y}); + if (bounds.empty()) { + offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( + {size_t(0), size_t(0)}); + return; + } - /* set the lookahead cost map entries with a representative cost - * entry from routing_cost_map */ - for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { - for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - cost_map_[from_seg_index][size_t(box_id)][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); - } - } + offset_[from_seg_index][size_t(box_id)] = std::make_pair(bounds.xmin(), bounds.ymin()); - /* find missing cost entries and fill them in by copying a nearby cost entry */ - for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { - for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { - Cost_Entry cost_entry = cost_map_[from_seg_index][size_t(box_id)][ix][iy]; - - if (!cost_entry.valid()) { - Cost_Entry copied_entry = get_nearby_cost_entry( - from_seg_index, - box_id, - offset_[from_seg_index][size_t(box_id)].first + ix, - offset_[from_seg_index][size_t(box_id)].second + iy); - cost_map_[from_seg_index][size_t(box_id)][ix][iy] = copied_entry; - } - } + cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( + {size_t(bounds.width()), size_t(bounds.height())}); + auto& matrix = cost_map_[from_seg_index][size_t(box_id)]; + + for (const auto& entry : costs) { + if (entry.first.box_id == box_id) { + int x = entry.first.delta.x() - bounds.xmin(); + int y = entry.first.delta.y() - bounds.ymin(); + matrix[x][y] = entry.second.cost_entry; } } - Cost_Entry get_nearby_cost_entry(int segment_index, ConnectionBoxId box_id, int x, int y) { - /* compute the slope from x,y to 0,0 and then move towards 0,0 by one - * unit to get the coordinates of the cost entry to be copied */ - - float slope; - int copy_x, copy_y; - if (x == 0 || y == 0) { - slope = std::numeric_limits::infinity(); - copy_x = x - signum(x); - copy_y = y - signum(y); - } else { - slope = (float)y / (float)x; - if (slope >= 1.0) { - copy_y = y - signum(y); - copy_x = vtr::nint((float)y / slope); - } else { - copy_x = x - signum(x); - copy_y = vtr::nint((float)x * slope); + std::list> missing; + bool couldnt_fill = false; + auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (!cost_entry.valid()) { + // maximum search radius + util::Cost_Entry filler = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler.valid()) { + missing.push_back(std::make_tuple(ix, iy, filler)); + } else { + couldnt_fill = true; + } } } - - Cost_Entry copy_entry = find_cost(segment_index, box_id, copy_x, copy_y); - - /* if the entry to be copied is also empty, recurse */ - if (copy_entry.valid()) { - return copy_entry; - } else if (copy_x == 0 && copy_y == 0) { - return Cost_Entry(); - } - - return get_nearby_cost_entry(segment_index, box_id, copy_x, copy_y); } - void read(const std::string& file); - void write(const std::string& file) const; - - private: - vtr::NdMatrix, 2> cost_map_; - vtr::NdMatrix, 2> offset_; - std::vector segment_map_; -}; - -static CostMap g_cost_map; - -static const std::vector& get_rr_node_indcies(t_rr_type rr_type, int start_x, int start_y) { - const auto& device_ctx = g_vpr_ctx.device(); - if (rr_type == CHANX) { - return device_ctx.rr_node_indices[rr_type][start_y][start_x][0]; - } else if (rr_type == CHANY) { - return device_ctx.rr_node_indices[rr_type][start_x][start_y][0]; - } else { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown channel type %d", rr_type); + // write back the missing entries + for (auto& xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); } -} -class StartNode { - public: - StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) - : start_x_(start_x) - , start_y_(start_y) - , rr_type_(rr_type) - , seg_index_(seg_index) - , index_(0) {} - int get_next_node() { - const auto& device_ctx = g_vpr_ctx.device(); - const std::vector& channel_node_list = get_rr_node_indcies( - rr_type_, start_x_, start_y_); - - for (; index_ < channel_node_list.size(); index_++) { - int node_ind = channel_node_list[index_]; - - if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { - continue; + if (couldnt_fill) { + VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld\n", + from_seg_index, size_t(box_id)); + for (unsigned y = 0; y < matrix.dim_size(1); y++) { + for (unsigned x = 0; x < matrix.dim_size(0); x++) { + VTR_ASSERT(!matrix[x][y].valid()); } - - const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); - if (loc == nullptr) { - continue; - } - - int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); - int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; - if (node_seg_ind == seg_index_) { - index_ += 1; - return node_ind; + } +#if 0 + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + if(matrix[ix][iy].valid()) { + printf("O"); + } else { + printf("."); + } } + printf("\n"); } - - return UNDEFINED; +#endif } +} - private: - int start_x_; - int start_y_; - t_rr_type rr_type_; - int seg_index_; - size_t index_; -}; - -// Minimum size of search for channels to profile. kMinProfile results -// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. -// -// Making this value larger will increase the sample size, but also the runtime -// to produce the lookahead. -static constexpr int kMinProfile = 1; - -// Maximum size of search for channels to profile. Once search is outside of -// kMinProfile distance, lookahead will stop searching once: -// - At least one channel has been profiled -// - kMaxProfile is exceeded. -static constexpr int kMaxProfile = 7; - -static int search_at(int iseg, int start_x, int start_y, t_routing_cost_map* cost_map) { +void CostMap::print(int iseg) const { const auto& device_ctx = g_vpr_ctx.device(); - - int count = 0; - int dx = 0; - int dy = 0; - - while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { - if (start_x + dx >= device_ctx.grid.width()) { - break; + for (size_t box_id = 0; + box_id < device_ctx.connection_boxes.num_connection_box_types(); + box_id++) { + auto& matrix = cost_map_[iseg][box_id]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { + printf("cost EMPTY for box_id = %lu\n", box_id); + continue; } - if (start_y + dy >= device_ctx.grid.height()) { - break; + printf("cost for box_id = %lu\n", box_id); + double sum = 0.0; + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (entry.valid()) { + sum += entry.delay; + } + } } - - for (e_rr_type chan_type : {CHANX, CHANY}) { - StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); - VTR_LOG("Searching for %d at (%d, %d)\n", iseg, start_x + dx, start_y + dy); - - for (int start_node_ind = start_node.get_next_node(); - start_node_ind != UNDEFINED; - start_node_ind = start_node.get_next_node()) { - count += 1; - - /* run Dijkstra's algorithm */ - run_dijkstra(start_node_ind, cost_map); + double avg = sum / ((double)matrix.dim_size(0) * (double)matrix.dim_size(1)); + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + const auto& entry = matrix[ix][iy]; + if (!entry.valid()) { + printf("*"); + } else if (entry.delay > avg) { + printf("o"); + } else { + printf("."); + } } + printf("\n"); } + } +} - if (dy < dx) { - dy += 1; - } else { - dx += 1; +std::list> CostMap::list_empty() const { + std::list> results; + for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { + for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { + auto& matrix = cost_map_[iseg][box_id]; + if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) results.push_back(std::make_pair(iseg, box_id)); } } - - return count; + return results; } -static void compute_connection_box_lookahead( - const std::vector& segment_inf) { - size_t num_segments = segment_inf.size(); - vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); - - /* free previous delay map and allocate new one */ - auto& device_ctx = g_vpr_ctx.device(); - g_cost_map.set_counts(segment_inf.size(), - device_ctx.connection_boxes.num_connection_box_types()); +static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) { + if (src.delay < dst.delay) dst.delay = src.delay; + if (src.congestion < dst.congestion) dst.congestion = src.congestion; +} - /* run Dijkstra's algorithm for each segment type & channel type combination */ - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { - VTR_LOG("Creating cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); - /* allocate the cost map for this iseg/chan_type */ - t_routing_cost_map cost_map; - - int count = 0; - count += search_at(iseg, REF_X, REF_Y, &cost_map); - count += search_at(iseg, REF_Y, REF_X, &cost_map); - count += search_at(iseg, 1, 1, &cost_map); - count += search_at(iseg, 76, 1, &cost_map); - count += search_at(iseg, 25, 25, &cost_map); - count += search_at(iseg, 25, 27, &cost_map); - count += search_at(iseg, 75, 26, &cost_map); - - if (count == 0) { - VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", - segment_inf[iseg].name.c_str(), iseg); +util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, + int cx, + int cy, + const vtr::Rect& bounds) { + // spiral around (cx, cy) looking for a nearby entry + int n = 1, x, y; + bool in_bounds; + util::Cost_Entry entry; + + do { + in_bounds = false; + y = cy - n; // top + // left -> right + for (x = cx - n; x < cx + n; x++) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } } - -#if 0 - for(const auto & e : cost_map) { - VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - std::get<0>(e).first, std::get<0>(e).second, - std::get<1>(e).first, std::get<1>(e).second, - std::get<2>(e).delay, std::get<2>(e).congestion); + x = cx + n; // right + // top -> bottom + for (; y < cy + n; y++) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } } -#endif - - /* boil down the cost list in routing_cost_map at each coordinate to a - * representative cost entry and store it in the lookahead cost map */ - g_cost_map.set_cost_map(iseg, cost_map, - REPRESENTATIVE_ENTRY_METHOD); - } + y = cy + n; // bottom + // right -> left + for (; x > cx - n; x--) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } + } + x = cx - n; // left + // bottom -> top + for (; y > cy - n; y--) { + if (bounds.contains(vtr::Point(x, y))) { + assign_min_entry(entry, matrix[x][y]); + in_bounds = true; + } + } + if (entry.valid()) return entry; + n++; + } while (in_bounds); + return util::Cost_Entry(); } -static float get_connection_box_lookahead_map_cost(int from_node_ind, - int to_node_ind, - float criticality_fac) { +float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) const { if (from_node_ind == to_node_ind) { return 0.f; } @@ -383,25 +297,18 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, if (to_node_type == SINK) { const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); - if (sink_to_ipin.ipin_count > 1) { - float cost = std::numeric_limits::infinity(); - // Find cheapest cost from from_node_ind to IPINs for this SINK. - for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { - cost = std::min(cost, - get_connection_box_lookahead_map_cost( - from_node_ind, - sink_to_ipin.ipin_nodes[i], criticality_fac)); - } - - return cost; - } else if (sink_to_ipin.ipin_count == 1) { - to_node_ind = sink_to_ipin.ipin_nodes[0]; - if (from_node_ind == to_node_ind) { - return 0.f; - } - } else { - return std::numeric_limits::infinity(); + float cost = std::numeric_limits::infinity(); + + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + if (cost <= 0.f) break; } + + return cost; } if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { @@ -425,12 +332,23 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); - int from_seg_index = g_cost_map.node_to_segment(from_node_ind); - Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, box_id, dx, dy); + int from_seg_index = cost_map_.node_to_segment(from_node_ind); + util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy); + + if (!cost_entry.valid()) { + // there is no route + // VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + // from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + // to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + // (int)size_t(box_id), (int)box_location.first, (int)box_location.second); + return std::numeric_limits::infinity(); + } + float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + VTR_ASSERT(std::isfinite(expected_cost) && expected_cost >= 0.f); return expected_cost; } @@ -438,7 +356,7 @@ static float get_connection_box_lookahead_map_cost(int from_node_ind, * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ static void run_dijkstra(int start_node_ind, - t_routing_cost_map* routing_cost_map) { + std::vector* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); /* a list of boolean flags (one for each rr node) to figure out if a @@ -447,24 +365,18 @@ static void run_dijkstra(int start_node_ind, /* for each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the * expansion queue */ - std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + std::unordered_map paths; /* a priority queue for expansion */ - std::priority_queue pq; + std::priority_queue, std::greater> pq; /* first entry has no upstream delay or congestion */ - util::PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); pq.push(first_entry); - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - start_node_ind); - } - /* now do routing */ while (!pq.empty()) { - util::PQ_Entry current = pq.top(); + auto current = pq.top(); pq.pop(); int node_ind = current.rr_node_ind; @@ -485,25 +397,155 @@ static void run_dijkstra(int start_node_ind, VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); } - int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); - int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + // reconstruct the path + std::vector path; + for (int i = node_ind; i != start_node_ind; path.push_back(i = paths[i].parent)) + ; + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + // recalculate the path with congestion + util::PQ_Entry current_full = parent_entry; + int parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + current_full = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), current_full.delay, + current_full.R_upstream, current_full.congestion_upstream, false); + parent = *it; + } - routing_cost_map->push_back(std::make_tuple( - std::make_pair(start_node_ind, node_ind), - std::make_pair(delta_x, delta_y), - Cost_Entry( - current.delay, - current.congestion_upstream), - box_id)); - } + // add each node along the path subtracting the incremental costs from the current costs + parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + parent); + } + + vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), + ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); + RoutingCostKey key = { + delta, + box_id}; + RoutingCost val = { + parent, + node_ind, + util::Cost_Entry( + current_full.delay - parent_entry.delay, + current_full.congestion_upstream - parent_entry.congestion_upstream)}; + + const auto& x = (*routing_costs)[seg_index].find(key); + + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == (*routing_costs)[seg_index].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + (*routing_costs)[seg_index][key] = val; + } - expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); - node_expanded[node_ind] = true; + parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent = *it; + } + } else { + expand_dijkstra_neighbours(current, paths, node_expanded, pq); + node_expanded[node_ind] = true; + } } } void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { - compute_connection_box_lookahead(segment_inf); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + size_t num_segments = segment_inf.size(); + std::vector inodes_for_segment(num_segments); + find_inodes_for_segment_types(&inodes_for_segment); + + /* free previous delay map and allocate new one */ + auto& device_ctx = g_vpr_ctx.device(); + cost_map_.set_counts(segment_inf.size(), + device_ctx.connection_boxes.num_connection_box_types()); + + VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); + std::vector all_costs(num_segments); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + std::vector costs(num_segments); + for (const auto& row : inodes_for_segment[iseg]) { + for (auto cell : row) { + for (auto node_ind : cell) { + run_dijkstra(node_ind, &costs); + } + } + } + + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + for (const auto& cost : costs[iseg]) { + const auto& key = cost.first; + const auto& val = cost.second; + const auto& x = all_costs[iseg].find(key); + + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == all_costs[iseg].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[iseg][key] = val; + } + } + } + } + + VTR_LOG("Combining results\n"); + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { +#if 0 + for (auto &e : cost_map_per_segment[iseg]) { + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + std::get<0>(e).first, std::get<0>(e).second, + std::get<1>(e).first, std::get<1>(e).second, + std::get<2>(e).delay, std::get<2>(e).congestion); + } +#endif + const auto& costs = all_costs[iseg]; + if (costs.empty()) { + // check that there were no start nodes + bool empty = true; + for (const auto& row : inodes_for_segment[iseg]) { + for (auto cell : row) { + if (!cell.empty()) { + empty = false; + break; + } + } + if (!empty) break; + } + if (empty) { + VTR_LOG_WARN("Segment %s(%d) found no routes\n", + segment_inf[iseg].name.c_str(), iseg); + } else { + VTR_LOG_WARN("Segment %s(%d) found no routes, even though there are some matching nodes\n", + segment_inf[iseg].name.c_str(), iseg); + } + } else { + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + cost_map_.set_cost_map(iseg, costs, + REPRESENTATIVE_ENTRY_METHOD); + } + /* + * VTR_LOG("cost map for %s(%d)\n", + * segment_inf[iseg].name.c_str(), iseg); + * cost_map_.print(iseg); + */ + } + + for (std::pair p : cost_map_.list_empty()) { + int iseg, box_id; + std::tie(iseg, box_id) = p; + VTR_LOG("cost map for %s(%d), connection box %d EMPTY\n", + segment_inf[iseg].name.c_str(), iseg, box_id); + } } float ConnectionBoxMapLookahead::get_expected_cost( @@ -516,7 +558,7 @@ float ConnectionBoxMapLookahead::get_expected_cost( t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); if (rr_type == CHANX || rr_type == CHANY) { - return get_connection_box_lookahead_map_cost( + return get_map_cost( current_node, target_node, params.criticality); } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); @@ -525,6 +567,86 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +static int manhattan_distance(const t_rr_node& node, int x, int y) { + int node_center_x = (node.xhigh() + node.xlow()) / 2; + int node_center_y = (node.yhigh() + node.ylow()) / 2; + return abs(node_center_x - x) + abs(node_center_y - y); +} + +static vtr::Rect bounding_box_for_node(const t_rr_node& node) { + return vtr::Rect(node.xlow(), node.ylow(), + node.xhigh() + 1, node.yhigh() + 1); +} + +static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { + auto& device_ctx = g_vpr_ctx.device(); + auto& rr_nodes = device_ctx.rr_nodes; + const int num_segments = inodes_for_segment->size(); + + // compute bounding boxes for each segment type + std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(node)); + } + + // select an inode near the center of the bounding box for each segment type + inodes_for_segment->clear(); + inodes_for_segment->resize(num_segments); + for (auto& grid : *inodes_for_segment) { + for (auto& row : grid) { + for (auto& cell : row) { + cell = std::vector(); + } + } + } + + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + auto& grid = (*inodes_for_segment)[seg_index]; + for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { + for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { + auto& stored_inodes = grid[sy][sx]; + if (stored_inodes.empty()) { + stored_inodes.push_back(i); + goto next_rr_node; + } + + auto& first_stored_node = rr_nodes[stored_inodes.front()]; + if (first_stored_node.xhigh() >= node.xhigh() && first_stored_node.xlow() <= node.xlow() && first_stored_node.yhigh() >= node.yhigh() && first_stored_node.ylow() <= node.ylow()) { + stored_inodes.push_back(i); + goto next_rr_node; + } + + vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); + int distance_new = manhattan_distance(node, target.x(), target.y()); + int distance_stored = manhattan_distance(first_stored_node, target.x(), target.y()); + if (distance_new < distance_stored) { + stored_inodes.clear(); + stored_inodes.push_back(i); + goto next_rr_node; + } + } + } + next_rr_node: + continue; + } +} + #ifndef VTR_ENABLE_CAPNPROTO void ConnectionBoxMapLookahead::read(const std::string& file) { @@ -537,18 +659,18 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { #else void ConnectionBoxMapLookahead::read(const std::string& file) { - g_cost_map.read(file); + cost_map_.read(file); } void ConnectionBoxMapLookahead::write(const std::string& file) const { - g_cost_map.write(file); + cost_map_.write(file); } -static void ToCostEntry(Cost_Entry* out, const VprCostEntry::Reader& in) { +static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { out->delay = in.getDelay(); out->congestion = in.getCongestion(); } -static void FromCostEntry(VprCostEntry::Builder* out, const Cost_Entry& in) { +static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { out->setDelay(in.delay); out->setCongestion(in.congestion); } @@ -562,15 +684,15 @@ static void FromVprVector2D(VprVector2D::Builder* out, const std::pair out->setY(in.second); } -static void ToMatrixCostEntry(vtr::NdMatrix* out, +static void ToMatrixCostEntry(vtr::NdMatrix* out, const Matrix::Reader& in) { - ToNdMatrix<2, VprCostEntry, Cost_Entry>(out, in, ToCostEntry); + ToNdMatrix<2, VprCostEntry, util::Cost_Entry>(out, in, ToCostEntry); } static void FromMatrixCostEntry( Matrix::Builder* out, - const vtr::NdMatrix& in) { - FromNdMatrix<2, VprCostEntry, Cost_Entry>( + const vtr::NdMatrix& in) { + FromNdMatrix<2, VprCostEntry, util::Cost_Entry>( out, in, FromCostEntry); } @@ -599,7 +721,7 @@ void CostMap::read(const std::string& file) { { const auto& cost_maps = cost_map.getCostMap(); - ToNdMatrix<2, Matrix, vtr::NdMatrix>( + ToNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_map_, cost_maps, ToMatrixCostEntry); } } @@ -624,7 +746,7 @@ void CostMap::write(const std::string& file) const { { auto cost_maps = cost_map.initCostMap(); - FromNdMatrix<2, Matrix, vtr::NdMatrix>( + FromNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_maps, cost_map_, FromMatrixCostEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 03e1a140f0b..814a57b4d49 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -2,16 +2,73 @@ #define CONNECTION_BOX_LOOKAHEAD_H_ #include +#include #include "physical_types.h" #include "router_lookahead.h" +#include "router_lookahead_map_utils.h" +#include "connection_box.h" +#include "vtr_geometry.h" + +struct RoutingCostKey { + vtr::Point delta; + ConnectionBoxId box_id; + bool operator==(const RoutingCostKey& other) const { + return delta == other.delta && box_id == other.box_id; + } +}; +struct RoutingCost { + int from_node, to_node; + util::Cost_Entry cost_entry; +}; + +// specialization of std::hash for RoutingCostKey +namespace std { +template<> +struct hash { + typedef RoutingCostKey argument_type; + typedef std::size_t result_type; + result_type operator()(argument_type const& s) const noexcept { + result_type const h1(std::hash{}(s.delta.x())); + result_type const h2(std::hash{}(s.delta.y())); + result_type const h3(std::hash{}(size_t(s.box_id))); + return h1 ^ ((h2 ^ (h3 << 1)) << 1); + } +}; +} // namespace std + +typedef std::unordered_map RoutingCosts; + +class CostMap { + public: + void set_counts(size_t seg_count, size_t box_count); + int node_to_segment(int from_node_ind) const; + util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; + void set_cost_map(int from_seg_index, + const RoutingCosts& costs, + util::e_representative_entry_method method); + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method); + util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); + void read(const std::string& file); + void write(const std::string& file) const; + void print(int iseg) const; + std::list> list_empty() const; + + private: + vtr::NdMatrix, 2> cost_map_; + vtr::NdMatrix, 2> offset_; + std::vector segment_map_; +}; class ConnectionBoxMapLookahead : public RouterLookahead { public: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; void compute(const std::vector& segment_inf) override; void read(const std::string& file) override; void write(const std::string& file) const override; + + CostMap cost_map_; }; #endif From 1fd64341ea6994c893d27d68901ec4418392f714 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 13:25:41 -0700 Subject: [PATCH 07/41] add documentation Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 79 ++++++++++++------- vpr/src/route/connection_box_lookahead_map.h | 15 +++- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index cff9fe2971b..fc769412c3e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -29,9 +29,12 @@ * (wire type, chan_type) combination we can take the smallest cost, the * average, median, etc. This define selects the method we use. * + * NOTE: Currently, only SMALLEST is supported. + * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST +/* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ static constexpr int SAMPLE_GRID_SIZE = 4; typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; @@ -40,6 +43,7 @@ static void run_dijkstra(int start_node_ind, std::vector* routing_costs); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +// resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); @@ -58,10 +62,12 @@ void CostMap::set_counts(size_t seg_count, size_t box_count) { } } +// cached node -> segment map int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; } +// get a cost entry for a segment type, connection box type, and offset util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; @@ -89,6 +95,7 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } +// set all cost maps for the segment type void CostMap::set_cost_map(int from_seg_index, const RoutingCosts& costs, util::e_representative_entry_method method) { @@ -101,6 +108,7 @@ void CostMap::set_cost_map(int from_seg_index, } } +// set the cost map for a segment type and connection box type, filling holes void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); @@ -113,6 +121,7 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } if (bounds.empty()) { + // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( {size_t(0), size_t(0)}); @@ -133,10 +142,10 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } } + // find missing cost entries and fill them in by copying a nearby cost entry std::list> missing; bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); - /* find missing cost entries and fill them in by copying a nearby cost entry */ for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { util::Cost_Entry& cost_entry = matrix[ix][iy]; @@ -165,21 +174,13 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou VTR_ASSERT(!matrix[x][y].valid()); } } -#if 0 - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { - if(matrix[ix][iy].valid()) { - printf("O"); - } else { - printf("."); - } - } - printf("\n"); - } -#endif } } +// prints an ASCII diagram of each cost map for a segment type (debug) +// o => above average +// . => at or below average +// * => invalid (missing) void CostMap::print(int iseg) const { const auto& device_ctx = g_vpr_ctx.device(); for (size_t box_id = 0; @@ -217,6 +218,7 @@ void CostMap::print(int iseg) const { } } +// list segment type and connection box type pairs that have empty cost maps (debug) std::list> CostMap::list_empty() const { std::list> results; for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { @@ -233,6 +235,7 @@ static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) if (src.congestion < dst.congestion) dst.congestion = src.congestion; } +// find the minimum cost entry from the nearest manhattan distance neighbor util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, @@ -282,6 +285,7 @@ util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix %d (%s, %d, (%d, %d))\n", - // from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - // to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), - // (int)size_t(box_id), (int)box_location.first, (int)box_location.second); +#if 0 + // useful for debugging but can be really noisy + VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + (int)size_t(box_id), (int)box_location.first, (int)box_location.second); +#endif return std::numeric_limits::infinity(); } @@ -362,9 +369,10 @@ static void run_dijkstra(int start_node_ind, /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ std::vector node_expanded(device_ctx.rr_nodes.size(), false); - /* for each node keep a list of the cost with which that node has been + /* For each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the - * expansion queue */ + * expansion queue. + * Also store the parent node so we can reconstruct a specific path. */ std::unordered_map paths; /* a priority queue for expansion */ std::priority_queue, std::greater> pq; @@ -454,6 +462,7 @@ static void run_dijkstra(int start_node_ind, } } +// compute the cost maps for lookahead void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); @@ -473,7 +482,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("Creating cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); - /* allocate the cost map for this iseg/chan_type */ + std::vector costs(num_segments); for (const auto& row : inodes_for_segment[iseg]) { for (auto cell : row) { @@ -483,6 +492,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } + // combine the cost map from this run with the final cost maps for each segment for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { for (const auto& cost : costs[iseg]) { const auto& key = cost.first; @@ -500,11 +510,13 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen VTR_LOG("Combining results\n"); for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { #if 0 - for (auto &e : cost_map_per_segment[iseg]) { + for (auto &cost : all_costs[iseg]) { + const auto& key = cost.first; + const auto& val = cost.second; VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - std::get<0>(e).first, std::get<0>(e).second, - std::get<1>(e).first, std::get<1>(e).second, - std::get<2>(e).delay, std::get<2>(e).congestion); + val.from_node, val.to_node, + key.delta.x(), key.delta.y(), + val.cost_entry.delay, val.cost_entry.congestion); } #endif const auto& costs = all_costs[iseg]; @@ -533,21 +545,25 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen cost_map_.set_cost_map(iseg, costs, REPRESENTATIVE_ENTRY_METHOD); } - /* - * VTR_LOG("cost map for %s(%d)\n", - * segment_inf[iseg].name.c_str(), iseg); - * cost_map_.print(iseg); - */ + +#if 0 + VTR_LOG("cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + cost_map_.print(iseg); +#endif } +#if 0 for (std::pair p : cost_map_.list_empty()) { int iseg, box_id; std::tie(iseg, box_id) = p; VTR_LOG("cost map for %s(%d), connection box %d EMPTY\n", segment_inf[iseg].name.c_str(), iseg, box_id); } +#endif } +// get an expected minimum cost for routing from the current node to the target node float ConnectionBoxMapLookahead::get_expected_cost( int current_node, int target_node, @@ -567,6 +583,7 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +// also known as the L1 norm static int manhattan_distance(const t_rr_node& node, int x, int y) { int node_center_x = (node.xhigh() + node.xlow()) / 2; int node_center_y = (node.yhigh() + node.ylow()) / 2; @@ -578,6 +595,8 @@ static vtr::Rect bounding_box_for_node(const t_rr_node& node) { node.xhigh() + 1, node.yhigh() + 1); } +// for each segment type, find the nearest nodes to an equally spaced grid of points +// within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; @@ -642,6 +661,8 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se } } } + + // to break out from the inner loop next_rr_node: continue; } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 814a57b4d49..200e7d79980 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -9,19 +9,29 @@ #include "connection_box.h" #include "vtr_geometry.h" +// Keys in the RoutingCosts map struct RoutingCostKey { + // offset of the destination connection box from the starting segment vtr::Point delta; + + // type of the destination connection box ConnectionBoxId box_id; + bool operator==(const RoutingCostKey& other) const { return delta == other.delta && box_id == other.box_id; } }; + +// Data in the RoutingCosts map struct RoutingCost { + // source and destination node indices int from_node, to_node; + + // cost entry for the route util::Cost_Entry cost_entry; }; -// specialization of std::hash for RoutingCostKey +// Specialization of std::hash for RoutingCostKey namespace std { template<> struct hash { @@ -36,8 +46,10 @@ struct hash { }; } // namespace std +// Map used to store intermediate routing costs typedef std::unordered_map RoutingCosts; +// Dense cost maps per source segment and destination connection box types class CostMap { public: void set_counts(size_t seg_count, size_t box_count); @@ -59,6 +71,7 @@ class CostMap { std::vector segment_map_; }; +// Implementation of RouterLookahead based on source segment and destination connection box types class ConnectionBoxMapLookahead : public RouterLookahead { public: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; From 4e908f4ee91987192524776d88c1a2a61e7962b2 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 15:04:39 -0700 Subject: [PATCH 08/41] un-shadow a variable Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fc769412c3e..1448f17fbc6 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -493,15 +493,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } // combine the cost map from this run with the final cost maps for each segment - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { - for (const auto& cost : costs[iseg]) { + for (int i = 0; i < (ssize_t)num_segments; i++) { + for (const auto& cost : costs[i]) { const auto& key = cost.first; const auto& val = cost.second; - const auto& x = all_costs[iseg].find(key); + const auto& x = all_costs[i].find(key); // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs[iseg].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[iseg][key] = val; + if (x == all_costs[i].end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[i][key] = val; } } } From c722f2ab7ae760531f6889717693db8181c8c74b Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 17:00:44 -0700 Subject: [PATCH 09/41] remove method from set_cost_map Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 10 ++++------ vpr/src/route/connection_box_lookahead_map.h | 5 ++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 1448f17fbc6..d6aa98ca656 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -97,19 +97,18 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, // set all cost maps for the segment type void CostMap::set_cost_map(int from_seg_index, - const RoutingCosts& costs, - util::e_representative_entry_method method) { + const RoutingCosts& costs) { // sort the entries const auto& device_ctx = g_vpr_ctx.device(); for (size_t box_id = 0; box_id < device_ctx.connection_boxes.num_connection_box_types(); ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs, method); + set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs); } } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method) { +void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs) { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); // calculate the bounding box @@ -542,8 +541,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } else { /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(iseg, costs, - REPRESENTATIVE_ENTRY_METHOD); + cost_map_.set_cost_map(iseg, costs); } #if 0 diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 200e7d79980..eb1c0c0756f 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -56,9 +56,8 @@ class CostMap { int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; void set_cost_map(int from_seg_index, - const RoutingCosts& costs, - util::e_representative_entry_method method); - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs, util::e_representative_entry_method method); + const RoutingCosts& costs); + void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs); util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; From 712eb80959b2ed1eff93f808d9d4fcadd55138fe Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 24 Oct 2019 12:51:01 -0700 Subject: [PATCH 10/41] replace std::list with std::vector Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 6 +++--- vpr/src/route/connection_box_lookahead_map.h | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d6aa98ca656..99b294de6ca 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -142,7 +142,7 @@ void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const Rou } // find missing cost entries and fill them in by copying a nearby cost entry - std::list> missing; + std::vector> missing; bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { @@ -218,8 +218,8 @@ void CostMap::print(int iseg) const { } // list segment type and connection box type pairs that have empty cost maps (debug) -std::list> CostMap::list_empty() const { - std::list> results; +std::vector> CostMap::list_empty() const { + std::vector> results; for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { auto& matrix = cost_map_[iseg][box_id]; diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index eb1c0c0756f..411394c89d5 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -2,7 +2,6 @@ #define CONNECTION_BOX_LOOKAHEAD_H_ #include -#include #include "physical_types.h" #include "router_lookahead.h" #include "router_lookahead_map_utils.h" @@ -62,7 +61,7 @@ class CostMap { void read(const std::string& file); void write(const std::string& file) const; void print(int iseg) const; - std::list> list_empty() const; + std::vector> list_empty() const; private: vtr::NdMatrix, 2> cost_map_; From 81f9d5898e77548c5618c91f0b85d8c2b6c85098 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 23 Oct 2019 09:25:19 -0700 Subject: [PATCH 11/41] parallelize connection box lookahead Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 99b294de6ca..717db0771b6 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -23,6 +23,11 @@ # include "serdes_utils.h" #endif +#if defined(VPR_USE_TBB) +# include +# include +#endif + /* we're profiling routing cost over many tracks for each wire type, so we'll * have many cost entries at each |dx|,|dy| offset. There are many ways to * "boil down" the many costs at each offset to a single entry for a given @@ -478,7 +483,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen std::vector all_costs(num_segments); /* run Dijkstra's algorithm for each segment type & channel type combination */ +#if defined(VPR_USE_TBB) + tbb::mutex all_costs_mutex; + tbb::parallel_for(size_t(0), size_t(num_segments), [&](int iseg) { +#else for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { +#endif VTR_LOG("Creating cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); @@ -491,6 +501,10 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } +#if defined(VPR_USE_TBB) + all_costs_mutex.lock(); +#endif + // combine the cost map from this run with the final cost maps for each segment for (int i = 0; i < (ssize_t)num_segments; i++) { for (const auto& cost : costs[i]) { @@ -504,7 +518,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } } } + +#if defined(VPR_USE_TBB) + all_costs_mutex.unlock(); +#endif +#if !defined(VPR_USE_TBB) } +#else + }); +#endif VTR_LOG("Combining results\n"); for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { From 48fc62cba8f163fd78604da4e4ff05ab2cc6e856 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 24 Oct 2019 18:46:28 -0700 Subject: [PATCH 12/41] performance improvements Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 528 +++++++++++------- vpr/src/route/connection_box_lookahead_map.h | 74 ++- 2 files changed, 386 insertions(+), 216 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 717db0771b6..fa34c205b43 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -24,7 +24,7 @@ #endif #if defined(VPR_USE_TBB) -# include +# include # include #endif @@ -38,22 +38,93 @@ * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST +// #define FILL_LIMIT 30 /* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ -static constexpr int SAMPLE_GRID_SIZE = 4; +static constexpr int SAMPLE_GRID_SIZE = 3; +static constexpr float COST_LIMIT = std::numeric_limits::infinity(); +static constexpr int DIJKSTRA_CACHE_SIZE = 64; +static constexpr int DIJKSTRA_CACHE_WINDOW = 3; +static constexpr bool BREAK_ON_MISS = false; +static constexpr float PENALTY_FACTOR = 1.f; + +struct SamplePoint { + uint64_t order; + vtr::Point location; + std::vector samples; + SamplePoint() + : location(0, 0) {} +}; + +struct SampleGrid { + SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; +}; + +template +class SimpleCache { + public: + SimpleCache() + : pos(0) + , hits(0) + , misses(0) {} + bool get(K key, V* value) { + for (int i = 0; i < N; i++) { + auto& k = keys[i]; + if (k == key) { + auto& v = values[i]; +#if 0 + // preserve the found key by pushing it back + int last = (pos + N - 1) % N; + std::swap(k, keys[last]); + std::swap(v, values[last]); +#endif + *value = v; + hits++; + return true; + } + } + misses++; + return false; + } + void insert(K key, V val) { + keys[pos] = key; + values[pos] = val; + pos = (pos + 1) % N; + } + float hit_ratio() { + return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; + } + float miss_ratio() { + return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; + } -typedef std::array, SAMPLE_GRID_SIZE>, SAMPLE_GRID_SIZE> SampleGrid; + private: + std::array keys; // keep keys together for faster scanning + std::array values; + size_t pos; + uint64_t hits; + uint64_t misses; +}; -static void run_dijkstra(int start_node_ind, - std::vector* routing_costs); +static float run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + SimpleCache* cache, + float max_cost); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +// also known as the L1 norm +static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { + return abs(b.x() - a.x()) + abs(b.y() - a.y()); +} + // resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); cost_map_.resize({seg_count, box_count}); offset_.resize({seg_count, box_count}); + seg_count_ = seg_count; + box_count_ = box_count; const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); @@ -100,82 +171,118 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; } -// set all cost maps for the segment type -void CostMap::set_cost_map(int from_seg_index, - const RoutingCosts& costs) { - // sort the entries - const auto& device_ctx = g_vpr_ctx.device(); - for (size_t box_id = 0; - box_id < device_ctx.connection_boxes.num_connection_box_types(); - ++box_id) { - set_cost_map(from_seg_index, ConnectionBoxId(box_id), costs); - } +static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, + entry.congestion); } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs) { - VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); - - // calculate the bounding box - vtr::Rect bounds; +void CostMap::set_cost_map(const RoutingCosts& costs) { + // calculate the bounding boxes + vtr::Matrix> bounds({seg_count_, box_count_}); for (const auto& entry : costs) { - if (entry.first.box_id == box_id) { - bounds.expand_bounding_box(vtr::Rect(entry.first.delta)); + bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + + // store bounds + for (size_t seg = 0; seg < seg_count_; seg++) { + for (size_t box = 0; box < box_count_; box++) { + const auto& seg_box_bounds = bounds[seg][box]; + if (seg_box_bounds.empty()) { + // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. + offset_[seg][box] = std::make_pair(0, 0); + cost_map_[seg][box] = vtr::NdMatrix( + {size_t(0), size_t(0)}); + continue; + } else { + offset_[seg][box] = std::make_pair(seg_box_bounds.xmin(), seg_box_bounds.ymin()); + cost_map_[seg][box] = vtr::NdMatrix( + {size_t(seg_box_bounds.width()), size_t(seg_box_bounds.height())}); + } } } - if (bounds.empty()) { - // Didn't find any sample routes, so routing isn't possible between these segment/connection box types. - offset_[from_seg_index][size_t(box_id)] = std::make_pair(0, 0); - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {size_t(0), size_t(0)}); - return; - } - - offset_[from_seg_index][size_t(box_id)] = std::make_pair(bounds.xmin(), bounds.ymin()); - - cost_map_[from_seg_index][size_t(box_id)] = vtr::NdMatrix( - {size_t(bounds.width()), size_t(bounds.height())}); - auto& matrix = cost_map_[from_seg_index][size_t(box_id)]; - + // store entries into the matrices for (const auto& entry : costs) { - if (entry.first.box_id == box_id) { - int x = entry.first.delta.x() - bounds.xmin(); - int y = entry.first.delta.y() - bounds.ymin(); - matrix[x][y] = entry.second.cost_entry; - } - } + int seg = entry.first.seg_index; + int box = size_t(entry.first.box_id); + const auto& seg_box_bounds = bounds[seg][box]; + int x = entry.first.delta.x() - seg_box_bounds.xmin(); + int y = entry.first.delta.y() - seg_box_bounds.ymin(); + cost_map_[seg][box][x][y] = entry.second.cost_entry; + } + + // fill the holes + for (size_t seg = 0; seg < seg_count_; seg++) { + for (size_t box = 0; box < box_count_; box++) { + const auto& seg_box_bounds = bounds[seg][box]; + if (seg_box_bounds.empty()) { + continue; + } + auto& matrix = cost_map_[seg][box]; - // find missing cost entries and fill them in by copying a nearby cost entry - std::vector> missing; - bool couldnt_fill = false; - auto shifted_bounds = vtr::Rect(0, 0, bounds.width(), bounds.height()); - for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { - for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { - util::Cost_Entry& cost_entry = matrix[ix][iy]; - if (!cost_entry.valid()) { - // maximum search radius - util::Cost_Entry filler = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); - if (filler.valid()) { - missing.push_back(std::make_tuple(ix, iy, filler)); - } else { - couldnt_fill = true; + // calculate delay penalty + float min_delay = 0.f, max_delay = 0.f; + vtr::Point min_location(0, 0), max_location(0, 0); + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (cost_entry.valid()) { + if (cost_entry.delay < min_delay) { + min_delay = cost_entry.delay; + min_location = vtr::Point(ix, iy); + } + if (cost_entry.delay > max_delay) { + max_delay = cost_entry.delay; + max_location = vtr::Point(ix, iy); + } + } + } + } + float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + + // find missing cost entries and fill them in by copying a nearby cost entry + std::vector> missing; + bool couldnt_fill = false; + auto shifted_bounds = vtr::Rect(0, 0, seg_box_bounds.width(), seg_box_bounds.height()); + int max_fill = 0; + for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { + util::Cost_Entry& cost_entry = matrix[ix][iy]; + if (!cost_entry.valid()) { + // maximum search radius + util::Cost_Entry filler; + int distance; + std::tie(filler, distance) = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler.valid()) { + missing.push_back(std::make_tuple(ix, iy, penalize(filler, distance, delay_penalty))); + max_fill = std::max(max_fill, distance); + } else { + couldnt_fill = true; + } + } } } - } - } - // write back the missing entries - for (auto& xy_entry : missing) { - matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); - } + if (!couldnt_fill) { + VTR_LOG("At %d -> %d: max_fill = %d, delay_penalty = %e\n", seg, box, max_fill, delay_penalty); + } - if (couldnt_fill) { - VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld\n", - from_seg_index, size_t(box_id)); - for (unsigned y = 0; y < matrix.dim_size(1); y++) { - for (unsigned x = 0; x < matrix.dim_size(0); x++) { - VTR_ASSERT(!matrix[x][y].valid()); + // write back the missing entries + for (auto& xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); + } + + if (couldnt_fill) { + VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", + seg, box, seg_box_bounds.width(), seg_box_bounds.height()); +#if !defined(FILL_LIMIT) + for (unsigned y = 0; y < matrix.dim_size(1); y++) { + for (unsigned x = 0; x < matrix.dim_size(0); x++) { + VTR_ASSERT(!matrix[x][y].valid()); + } + } +#endif } } } @@ -211,10 +318,14 @@ void CostMap::print(int iseg) const { const auto& entry = matrix[ix][iy]; if (!entry.valid()) { printf("*"); + } else if (entry.delay * 4 > avg * 5) { + printf("O"); } else if (entry.delay > avg) { printf("o"); - } else { + } else if (entry.delay * 4 > avg * 3) { printf("."); + } else { + printf(" "); } } printf("\n"); @@ -240,53 +351,40 @@ static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) } // find the minimum cost entry from the nearest manhattan distance neighbor -util::Cost_Entry CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, - int cx, - int cy, - const vtr::Rect& bounds) { +std::pair CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, + int cx, + int cy, + const vtr::Rect& bounds) { // spiral around (cx, cy) looking for a nearby entry - int n = 1, x, y; + int n = 1; bool in_bounds; util::Cost_Entry entry; do { in_bounds = false; - y = cy - n; // top - // left -> right - for (x = cx - n; x < cx + n; x++) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); + for (int ox = -n; ox <= n; ox++) { + int x = cx + ox; + int oy = n - abs(ox); + int yp = cy + oy; + int yn = cy - oy; + if (bounds.contains(vtr::Point(x, yp))) { + assign_min_entry(entry, matrix[x][yp]); in_bounds = true; } - } - x = cx + n; // right - // top -> bottom - for (; y < cy + n; y++) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); - in_bounds = true; - } - } - y = cy + n; // bottom - // right -> left - for (; x > cx - n; x--) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); + if (bounds.contains(vtr::Point(x, yn))) { + assign_min_entry(entry, matrix[x][yn]); in_bounds = true; } } - x = cx - n; // left - // bottom -> top - for (; y > cy - n; y--) { - if (bounds.contains(vtr::Point(x, y))) { - assign_min_entry(entry, matrix[x][y]); - in_bounds = true; - } - } - if (entry.valid()) return entry; + if (entry.valid()) return std::make_pair(entry, n); n++; +#if defined(FILL_LIMIT) + if (n > FILL_LIMIT) { + break; + } +#endif } while (in_bounds); - return util::Cost_Entry(); + return std::make_pair(util::Cost_Entry(), n); } // derive a cost from the map between two nodes @@ -366,8 +464,10 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ -static void run_dijkstra(int start_node_ind, - std::vector* routing_costs) { +static float run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + SimpleCache* cache, + float cost_limit) { auto& device_ctx = g_vpr_ctx.device(); /* a list of boolean flags (one for each rr node) to figure out if a @@ -384,6 +484,8 @@ static void run_dijkstra(int start_node_ind, /* first entry has no upstream delay or congestion */ util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); + float max_cost = 0.f; + pq.push(first_entry); /* now do routing */ @@ -439,8 +541,10 @@ static void run_dijkstra(int start_node_ind, vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); RoutingCostKey key = { - delta, - box_id}; + seg_index, + box_id, + delta}; + CompressedRoutingCostKey compressed_key(key); RoutingCost val = { parent, node_ind, @@ -448,11 +552,41 @@ static void run_dijkstra(int start_node_ind, current_full.delay - parent_entry.delay, current_full.congestion_upstream - parent_entry.congestion_upstream)}; - const auto& x = (*routing_costs)[seg_index].find(key); - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == (*routing_costs)[seg_index].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - (*routing_costs)[seg_index][key] = val; + float cost = 0.f; + bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; + if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { + // the sample was not cheaper than the cached sample + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + } else { + const auto& x = routing_costs->find(key); + if (x != routing_costs->end()) { + if (x->second.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } else { + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + if (in_window) { + cache->insert(compressed_key, x->second.cost_entry.delay); + } + } + } else { + // this sample is new + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } } parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, @@ -463,7 +597,13 @@ static void run_dijkstra(int start_node_ind, expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; } + + max_cost = std::max(max_cost, current.delay_cost); + if (max_cost > cost_limit) { + break; + } } + return max_cost; } // compute the cost maps for lookahead @@ -471,8 +611,30 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); size_t num_segments = segment_inf.size(); - std::vector inodes_for_segment(num_segments); - find_inodes_for_segment_types(&inodes_for_segment); + std::vector sample_points; + { + std::vector inodes_for_segment(num_segments); + find_inodes_for_segment_types(&inodes_for_segment); + + // collapse into a vector + for (auto& grid : inodes_for_segment) { + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + auto& point = grid.point[y][x]; + if (!point.samples.empty()) { + point.order = point.samples[0]; + sample_points.push_back(point); + } + } + } + } + } + + // sort by VPR coordinate + std::sort(sample_points.begin(), sample_points.end(), + [](const SamplePoint& a, const SamplePoint& b) { + return a.order < b.order; + }); /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); @@ -480,98 +642,72 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen device_ctx.connection_boxes.num_connection_box_types()); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); - std::vector all_costs(num_segments); + RoutingCosts all_costs; /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) tbb::mutex all_costs_mutex; - tbb::parallel_for(size_t(0), size_t(num_segments), [&](int iseg) { + tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) { #else - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + for (const auto& point : sample_points) { #endif - VTR_LOG("Creating cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); - - std::vector costs(num_segments); - for (const auto& row : inodes_for_segment[iseg]) { - for (auto cell : row) { - for (auto node_ind : cell) { - run_dijkstra(node_ind, &costs); - } - } + float max_cost = 0.f; + RoutingCosts costs; + SimpleCache cache; + for (auto node_ind : point.samples) { + max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif + VTR_LOG("Expanded sample point (%d, %d) %e miss %g\n", + point.location.x(), point.location.y(), max_cost, cache.miss_ratio()); + // combine the cost map from this run with the final cost maps for each segment - for (int i = 0; i < (ssize_t)num_segments; i++) { - for (const auto& cost : costs[i]) { - const auto& key = cost.first; - const auto& val = cost.second; - const auto& x = all_costs[i].find(key); + for (const auto& cost : costs) { + const auto& key = cost.first; + const auto& val = cost.second; + const auto& x = all_costs.find(key); - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs[i].end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[i][key] = val; - } + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + if (x == all_costs.end() || x->second.cost_entry.delay > val.cost_entry.delay) { + all_costs[key] = val; } } #if defined(VPR_USE_TBB) all_costs_mutex.unlock(); -#endif -#if !defined(VPR_USE_TBB) - } -#else }); +#else + } #endif VTR_LOG("Combining results\n"); - for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + cost_map_.set_cost_map(all_costs); + +// diagnostics #if 0 - for (auto &cost : all_costs[iseg]) { - const auto& key = cost.first; - const auto& val = cost.second; - VTR_LOG("%d -> %d (%d, %d): %g, %g\n", - val.from_node, val.to_node, - key.delta.x(), key.delta.y(), - val.cost_entry.delay, val.cost_entry.congestion); - } + for (auto &cost : all_costs) { + const auto& key = cost.first; + const auto& val = cost.second; + VTR_LOG("%d -> %d (%d, %d): %g, %g\n", + val.from_node, val.to_node, + key.delta.x(), key.delta.y(), + val.cost_entry.delay, val.cost_entry.congestion); + } #endif - const auto& costs = all_costs[iseg]; - if (costs.empty()) { - // check that there were no start nodes - bool empty = true; - for (const auto& row : inodes_for_segment[iseg]) { - for (auto cell : row) { - if (!cell.empty()) { - empty = false; - break; - } - } - if (!empty) break; - } - if (empty) { - VTR_LOG_WARN("Segment %s(%d) found no routes\n", - segment_inf[iseg].name.c_str(), iseg); - } else { - VTR_LOG_WARN("Segment %s(%d) found no routes, even though there are some matching nodes\n", - segment_inf[iseg].name.c_str(), iseg); - } - } else { - /* boil down the cost list in routing_cost_map at each coordinate to a - * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(iseg, costs); - } -#if 0 +#if 1 + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("cost map for %s(%d)\n", - segment_inf[iseg].name.c_str(), iseg); + segment_inf[iseg].name.c_str(), iseg); cost_map_.print(iseg); -#endif } +#endif #if 0 for (std::pair p : cost_map_.list_empty()) { @@ -603,18 +739,17 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } -// also known as the L1 norm -static int manhattan_distance(const t_rr_node& node, int x, int y) { - int node_center_x = (node.xhigh() + node.xlow()) / 2; - int node_center_y = (node.yhigh() + node.ylow()) / 2; - return abs(node_center_x - x) + abs(node_center_y - y); -} - static vtr::Rect bounding_box_for_node(const t_rr_node& node) { return vtr::Rect(node.xlow(), node.ylow(), node.xhigh() + 1, node.yhigh() + 1); } +static vtr::Point point_for_node(const t_rr_node& node) { + int x = (node.xhigh() + node.xlow()) / 2; + int y = (node.yhigh() + node.ylow()) / 2; + return vtr::Point(x, y); +} + // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { @@ -639,15 +774,16 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); for (auto& grid : *inodes_for_segment) { - for (auto& row : grid) { - for (auto& cell : row) { - cell = std::vector(); + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + grid.point[y][x].samples = std::vector(); } } } for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; + vtr::Rect node_bounds = bounding_box_for_node(node); if (node.type() != CHANX && node.type() != CHANY) continue; if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; @@ -659,24 +795,24 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& grid = (*inodes_for_segment)[seg_index]; for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - auto& stored_inodes = grid[sy][sx]; - if (stored_inodes.empty()) { - stored_inodes.push_back(i); + auto& point = grid.point[sy][sx]; + if (point.samples.empty()) { + point.samples.push_back(i); + point.location = vtr::Point(node.xlow(), node.ylow()); goto next_rr_node; } - auto& first_stored_node = rr_nodes[stored_inodes.front()]; - if (first_stored_node.xhigh() >= node.xhigh() && first_stored_node.xlow() <= node.xlow() && first_stored_node.yhigh() >= node.yhigh() && first_stored_node.ylow() <= node.ylow()) { - stored_inodes.push_back(i); + if (node_bounds.contains(point.location)) { + point.samples.push_back(i); goto next_rr_node; } vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); - int distance_new = manhattan_distance(node, target.x(), target.y()); - int distance_stored = manhattan_distance(first_stored_node, target.x(), target.y()); + int distance_new = manhattan_distance(point_for_node(node), target); + int distance_stored = manhattan_distance(point.location, target); if (distance_new < distance_stored) { - stored_inodes.clear(); - stored_inodes.push_back(i); + point.samples.clear(); + point.samples.push_back(i); goto next_rr_node; } } diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 411394c89d5..3f9ed424924 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -10,14 +10,48 @@ // Keys in the RoutingCosts map struct RoutingCostKey { - // offset of the destination connection box from the starting segment - vtr::Point delta; + // segment type index + int seg_index; // type of the destination connection box ConnectionBoxId box_id; + // offset of the destination connection box from the starting segment + vtr::Point delta; + + RoutingCostKey() + : seg_index(-1) + , delta(0, 0) {} + RoutingCostKey(int seg_index_arg, ConnectionBoxId box_id_arg, vtr::Point delta_arg) + : seg_index(seg_index_arg) + , box_id(box_id_arg) + , delta(delta_arg) {} + bool operator==(const RoutingCostKey& other) const { - return delta == other.delta && box_id == other.box_id; + return seg_index == other.seg_index && box_id == other.box_id && delta == other.delta; + } +}; + +// compressed version of RoutingCostKey +// TODO add bounds checks +struct CompressedRoutingCostKey { + uint32_t data; + + CompressedRoutingCostKey() { + data = -1; + } + CompressedRoutingCostKey(const RoutingCostKey& key) { + data = key.seg_index & 0xff; + data <<= 8; + data |= size_t(key.box_id) & 0xff; + data <<= 8; + data |= key.delta.x() & 0xff; + data <<= 8; + data |= key.delta.y() & 0xff; + } + + bool operator==(CompressedRoutingCostKey other) const { + return data == other.data; } }; @@ -30,23 +64,23 @@ struct RoutingCost { util::Cost_Entry cost_entry; }; -// Specialization of std::hash for RoutingCostKey -namespace std { -template<> -struct hash { - typedef RoutingCostKey argument_type; - typedef std::size_t result_type; - result_type operator()(argument_type const& s) const noexcept { - result_type const h1(std::hash{}(s.delta.x())); - result_type const h2(std::hash{}(s.delta.y())); - result_type const h3(std::hash{}(size_t(s.box_id))); - return h1 ^ ((h2 ^ (h3 << 1)) << 1); +// hash implementation for RoutingCostKey +struct HashRoutingCostKey { + std::size_t operator()(RoutingCostKey const& key) const noexcept { + uint64_t data; + data = key.seg_index & 0xffff; + data <<= 16; + data |= size_t(key.box_id) & 0xffff; + data <<= 16; + data |= key.delta.x() & 0xffff; + data <<= 16; + data |= key.delta.y() & 0xffff; + return std::hash{}(data); } }; -} // namespace std // Map used to store intermediate routing costs -typedef std::unordered_map RoutingCosts; +typedef std::unordered_map RoutingCosts; // Dense cost maps per source segment and destination connection box types class CostMap { @@ -54,10 +88,8 @@ class CostMap { void set_counts(size_t seg_count, size_t box_count); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; - void set_cost_map(int from_seg_index, - const RoutingCosts& costs); - void set_cost_map(int from_seg_index, ConnectionBoxId box_id, const RoutingCosts& costs); - util::Cost_Entry get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); + void set_cost_map(const RoutingCosts& costs); + std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; void print(int iseg) const; @@ -67,6 +99,8 @@ class CostMap { vtr::NdMatrix, 2> cost_map_; vtr::NdMatrix, 2> offset_; std::vector segment_map_; + size_t seg_count_; + size_t box_count_; }; // Implementation of RouterLookahead based on source segment and destination connection box types From 768d06afa3de612ce25368d800a62192721a97f8 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 30 Oct 2019 16:25:13 -0700 Subject: [PATCH 13/41] suggested changes Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fa34c205b43..f00dddfb36e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -345,9 +345,11 @@ std::vector> CostMap::list_empty() const { return results; } -static void assign_min_entry(util::Cost_Entry& dst, const util::Cost_Entry& src) { - if (src.delay < dst.delay) dst.delay = src.delay; - if (src.congestion < dst.congestion) dst.congestion = src.congestion; +static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { + if (src.delay < dst->delay) { + dst->delay = src.delay; + dst->congestion = src.congestion; + } } // find the minimum cost entry from the nearest manhattan distance neighbor @@ -368,11 +370,11 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat int yp = cy + oy; int yn = cy - oy; if (bounds.contains(vtr::Point(x, yp))) { - assign_min_entry(entry, matrix[x][yp]); + assign_min_entry(&entry, matrix[x][yp]); in_bounds = true; } if (bounds.contains(vtr::Point(x, yn))) { - assign_min_entry(entry, matrix[x][yn]); + assign_min_entry(&entry, matrix[x][yn]); in_bounds = true; } } @@ -457,7 +459,10 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_congestion = cost_entry.congestion; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; - VTR_ASSERT(std::isfinite(expected_cost) && expected_cost >= 0.f); + if (!std::isfinite(expected_cost) || expected_cost < 0.f) { + VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); + VTR_ASSERT(0); + } return expected_cost; } From 620903db4f1e2b029742026bc0e8aa553bc87a00 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 30 Oct 2019 18:18:10 -0700 Subject: [PATCH 14/41] changes suggested in PR Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 263 +++++++++++------- vpr/src/route/route_timing.h | 2 + 2 files changed, 159 insertions(+), 106 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index f00dddfb36e..824b8422179 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -40,26 +40,41 @@ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST // #define FILL_LIMIT 30 -/* Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE */ +#define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS + +// Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE static constexpr int SAMPLE_GRID_SIZE = 3; + +// Stop Dijkstra expansion after reaching COST_LIMIT static constexpr float COST_LIMIT = std::numeric_limits::infinity(); + +// Number of entries in the routing cost cache static constexpr int DIJKSTRA_CACHE_SIZE = 64; + +// Only entries with a delta inside the window (+- DIJKSTRA_CACHE_WINDOW x/y) are cached static constexpr int DIJKSTRA_CACHE_WINDOW = 3; + +// Don't continue storing a path after hitting a lower-or-same cost entry. static constexpr bool BREAK_ON_MISS = false; + +// Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; +// a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { - uint64_t order; + uint64_t order; // used to order sample points vtr::Point location; std::vector samples; SamplePoint() : location(0, 0) {} }; +// a grid of sample points struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; +// implements a simple cache of key(K)/value(V) pairs of N entries template class SimpleCache { public: @@ -67,12 +82,14 @@ class SimpleCache { : pos(0) , hits(0) , misses(0) {} + + // O(N) lookup bool get(K key, V* value) { for (int i = 0; i < N; i++) { auto& k = keys[i]; if (k == key) { auto& v = values[i]; -#if 0 +#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS) // preserve the found key by pushing it back int last = (pos + N - 1) % N; std::swap(k, keys[last]); @@ -86,14 +103,20 @@ class SimpleCache { misses++; return false; } + + // O(1) insertion (overwriting an older entry) void insert(K key, V val) { keys[pos] = key; values[pos] = val; pos = (pos + 1) % N; } + + // ratio of successful lookups float hit_ratio() { return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; } + + // ratio of unsuccessful lookups float miss_ratio() { return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; } @@ -246,7 +269,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { bool couldnt_fill = false; auto shifted_bounds = vtr::Rect(0, 0, seg_box_bounds.width(), seg_box_bounds.height()); int max_fill = 0; - for (unsigned ix = 0; ix < matrix.dim_size(0) && !couldnt_fill; ix++) { + for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { util::Cost_Entry& cost_entry = matrix[ix][iy]; if (!cost_entry.valid()) { @@ -262,6 +285,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } } +#if !defined(FILL_LIMIT) + if (couldnt_fill) { + break; + } +#endif } if (!couldnt_fill) { @@ -445,13 +473,11 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, if (!cost_entry.valid()) { // there is no route -#if 0 - // useful for debugging but can be really noisy - VTR_LOG_WARN("Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", - from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), - (int)size_t(box_id), (int)box_location.first, (int)box_location.second); -#endif + VTR_LOGV_DEBUG(f_router_debug, + "Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", + from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, + to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + (int)size_t(box_id), (int)box_location.first, (int)box_location.second); return std::numeric_limits::infinity(); } @@ -466,6 +492,111 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, return expected_cost; } +// add a best cost routing path from start_node_ind to node_ind to routing costs +static void add_paths(int start_node_ind, + int node_ind, + std::unordered_map* paths, + RoutingCosts* routing_costs, + SimpleCache* cache) { + auto& device_ctx = g_vpr_ctx.device(); + ConnectionBoxId box_id; + std::pair box_location; + float site_pin_delay; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location, &site_pin_delay); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + // reconstruct the path + std::vector path; + for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) + ; + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + // recalculate the path with congestion + util::PQ_Entry current_full = parent_entry; + int parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, + current_full.R_upstream, current_full.congestion_upstream, false); + parent = *it; + } + + // add each node along the path subtracting the incremental costs from the current costs + parent = start_node_ind; + for (auto it = path.rbegin(); it != path.rend(); it++) { + auto& parent_node = device_ctx.rr_nodes[parent]; + int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + parent); + } + + vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), + ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); + RoutingCostKey key = { + seg_index, + box_id, + delta}; + CompressedRoutingCostKey compressed_key(key); + RoutingCost val = { + parent, + node_ind, + util::Cost_Entry( + current_full.delay - parent_entry.delay, + current_full.congestion_upstream - parent_entry.congestion_upstream)}; + + // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + + // use a cache for a small window around a delta of (0, 0) + float cost = 0.f; + bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; + if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { + // the sample was not cheaper than the cached sample + const auto& x = routing_costs->find(key); + VTR_ASSERT(x != routing_costs->end()); + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + } else { + const auto& x = routing_costs->find(key); + if (x != routing_costs->end()) { + if (x->second.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } else { + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; + } + if (in_window) { + cache->insert(compressed_key, x->second.cost_entry.delay); + } + } + } else { + // this sample is new + (*routing_costs)[key] = val; + if (in_window) { + cache->insert(compressed_key, val.cost_entry.delay); + } + } + } + + // update parent data + parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent = *it; + } +} + /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ @@ -507,97 +638,7 @@ static float run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { - ConnectionBoxId box_id; - std::pair box_location; - float site_pin_delay; - bool found = device_ctx.connection_boxes.find_connection_box( - node_ind, &box_id, &box_location, &site_pin_delay); - if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); - } - - // reconstruct the path - std::vector path; - for (int i = node_ind; i != start_node_ind; path.push_back(i = paths[i].parent)) - ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); - - // recalculate the path with congestion - util::PQ_Entry current_full = parent_entry; - int parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - current_full = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false); - parent = *it; - } - - // add each node along the path subtracting the incremental costs from the current costs - parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); - if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - parent); - } - - vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), - ssize_t(from_canonical_loc->second) - ssize_t(box_location.second)); - RoutingCostKey key = { - seg_index, - box_id, - delta}; - CompressedRoutingCostKey compressed_key(key); - RoutingCost val = { - parent, - node_ind, - util::Cost_Entry( - current_full.delay - parent_entry.delay, - current_full.congestion_upstream - parent_entry.congestion_upstream)}; - - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - float cost = 0.f; - bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; - if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { - // the sample was not cheaper than the cached sample - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - } else { - const auto& x = routing_costs->find(key); - if (x != routing_costs->end()) { - if (x->second.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - if (in_window) { - cache->insert(compressed_key, x->second.cost_entry.delay); - } - } - } else { - // this sample is new - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } - } - - parent_entry = util::PQ_Entry(*it, parent_node.edge_switch(paths[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); - parent = *it; - } + add_paths(start_node_ind, node_ind, &paths, routing_costs, cache); } else { expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; @@ -657,8 +698,15 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (const auto& point : sample_points) { #endif float max_cost = 0.f; + + // holds the cost entries for a run RoutingCosts costs; + + // a cache to avoid hammering the RoutingCosts map, since lookups will be dominated by a few keys + // must be consistent with `costs` i.e. any entry in the cache should also be in `costs` + // NOTE: this is used as a write-through cache, maybe try write-back SimpleCache cache; + for (auto node_ind : point.samples) { max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); } @@ -695,8 +743,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen cost_map_.set_cost_map(all_costs); // diagnostics -#if 0 - for (auto &cost : all_costs) { +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) + for (auto& cost : all_costs) { const auto& key = cost.first; const auto& val = cost.second; VTR_LOG("%d -> %d (%d, %d): %g, %g\n", @@ -706,7 +754,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } #endif -#if 1 +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS) for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { VTR_LOG("cost map for %s(%d)\n", segment_inf[iseg].name.c_str(), iseg); @@ -714,7 +762,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen } #endif -#if 0 +#if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_EMPTY_MAPS) for (std::pair p : cost_map_.list_empty()) { int iseg, box_id; std::tie(iseg, box_id) = p; @@ -744,11 +792,14 @@ float ConnectionBoxMapLookahead::get_expected_cost( } } +// the smallest bounding box containing a node static vtr::Rect bounding_box_for_node(const t_rr_node& node) { return vtr::Rect(node.xlow(), node.ylow(), node.xhigh() + 1, node.yhigh() + 1); } +// the center point for a node +// it is unknown where the the node starts, so use the average static vtr::Point point_for_node(const t_rr_node& node) { int x = (node.xhigh() + node.xlow()) / 2; int y = (node.yhigh() + node.ylow()) / 2; diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index 90756a02e19..98ab811cf46 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -16,6 +16,8 @@ extern bool f_router_debug; +extern bool f_router_debug; + int get_max_pins_per_net(); bool try_timing_driven_route(const t_router_opts& router_opts, From 4015a513891e704da0603042b8d0e831f6efffc9 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 31 Oct 2019 18:19:10 -0700 Subject: [PATCH 15/41] add penalty to Cost_Map::find_cost for points outside of the map Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 54 +++++++++++-------- vpr/src/route/connection_box_lookahead_map.h | 5 +- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 824b8422179..d1ee992583d 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -140,12 +140,29 @@ static int manhattan_distance(const vtr::Point& a, const vtr::Point& b return abs(b.x() - a.x()) + abs(b.y() - a.y()); } +template +constexpr const T& clamp(const T& v, const T& lo, const T& hi) { + return std::min(std::max(v, lo), hi); +} + +template +static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Point& p) { + if (r.empty()) { + return vtr::Point(0, 0); + } else { + return vtr::Point(clamp(p.x(), r.xmin(), r.xmax() - 1), + clamp(p.y(), r.ymin(), r.ymax() - 1)); + } +} + // resize internal data structures void CostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); + penalty_.clear(); cost_map_.resize({seg_count, box_count}); offset_.resize({seg_count, box_count}); + penalty_.resize({seg_count, box_count}); seg_count_ = seg_count; box_count_ = box_count; @@ -166,6 +183,11 @@ int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; } +static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, + entry.congestion); +} + // get a cost entry for a segment type, connection box type, and offset util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); @@ -174,29 +196,14 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, return util::Cost_Entry(); } - int dx = delta_x - offset_[from_seg_index][size_t(box_id)].first; - int dy = delta_y - offset_[from_seg_index][size_t(box_id)].second; - - if (dx < 0) { - dx = 0; - } - if (dy < 0) { - dy = 0; - } - - if (dx >= (ssize_t)cost_map.dim_size(0)) { - dx = cost_map.dim_size(0) - 1; - } - if (dy >= (ssize_t)cost_map.dim_size(1)) { - dy = cost_map.dim_size(1) - 1; - } - - return cost_map_[from_seg_index][size_t(box_id)][dx][dy]; -} - -static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { - return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, - entry.congestion); + vtr::Point coord(delta_x - offset_[from_seg_index][size_t(box_id)].first, + delta_y - offset_[from_seg_index][size_t(box_id)].second); + vtr::Rect bounds(0, 0, cost_map.dim_size(0), cost_map.dim_size(1)); + auto closest = closest_point_in_rect(bounds, coord); + auto cost = cost_map_[from_seg_index][size_t(box_id)][closest.x()][closest.y()]; + float penalty = penalty_[from_seg_index][size_t(box_id)]; + auto distance = manhattan_distance(closest, coord); + return penalize(cost, distance, penalty); } // set the cost map for a segment type and connection box type, filling holes @@ -263,6 +270,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } float delay_penalty = (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + penalty_[seg][box] = delay_penalty; // find missing cost entries and fill them in by copying a nearby cost entry std::vector> missing; diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 3f9ed424924..9b5bdb0afea 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -96,8 +96,9 @@ class CostMap { std::vector> list_empty() const; private: - vtr::NdMatrix, 2> cost_map_; - vtr::NdMatrix, 2> offset_; + vtr::Matrix> cost_map_; + vtr::Matrix> offset_; + vtr::Matrix penalty_; std::vector segment_map_; size_t seg_count_; size_t box_count_; From af96b4c4cbe73f367da84464bbb7e913345eb54c Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Fri, 1 Nov 2019 18:07:41 -0700 Subject: [PATCH 16/41] store penalties Signed-off-by: Dusty DeWeese --- libs/libvtrcapnproto/connection_map.capnp | 5 ++++ .../route/connection_box_lookahead_map.cpp | 25 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index 1ca672108c8..bc445a30e70 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -12,8 +12,13 @@ struct VprVector2D { y @1 :Int64; } +struct VprFloatEntry { + value @0 :Float32; +} + struct VprCostMap { costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); offset @1 :Matrix.Matrix(VprVector2D); segmentMap @2 :List(Int64); + penalty @3 :Matrix.Matrix(VprFloatEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d1ee992583d..fc852adc9ea 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -245,6 +245,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { // fill the holes for (size_t seg = 0; seg < seg_count_; seg++) { for (size_t box = 0; box < box_count_; box++) { + penalty_[seg][box] = std::numeric_limits::infinity(); const auto& seg_box_bounds = bounds[seg][box]; if (seg_box_bounds.empty()) { continue; @@ -252,7 +253,7 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { auto& matrix = cost_map_[seg][box]; // calculate delay penalty - float min_delay = 0.f, max_delay = 0.f; + float min_delay = std::numeric_limits::infinity(), max_delay = 0.f; vtr::Point min_location(0, 0), max_location(0, 0); for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { @@ -937,6 +938,16 @@ static void FromMatrixCostEntry( out, in, FromCostEntry); } +static void ToFloat(float* out, const VprFloatEntry::Reader& in) { + // Getting a scalar field is always "get()". + *out = in.getValue(); +} + +static void FromFloat(VprFloatEntry::Builder* out, const float& in) { + // Setting a scalar field is always "set(value)". + out->setValue(in); +} + void CostMap::read(const std::string& file) { MmapFile f(file); @@ -965,6 +976,12 @@ void CostMap::read(const std::string& file) { ToNdMatrix<2, Matrix, vtr::NdMatrix>( &cost_map_, cost_maps, ToMatrixCostEntry); } + + { + const auto& penalty = cost_map.getPenalty(); + ToNdMatrix<2, VprFloatEntry, float>( + &penalty_, penalty, ToFloat); + } } void CostMap::write(const std::string& file) const { @@ -991,6 +1008,12 @@ void CostMap::write(const std::string& file) const { &cost_maps, cost_map_, FromMatrixCostEntry); } + { + auto penalty = cost_map.initPenalty(); + FromNdMatrix<2, VprFloatEntry, float>( + &penalty, penalty_, FromFloat); + } + writeMessageToFile(file, &builder); } #endif From 58f58b2a0d1d79b1b67c2d7b7df2502eb704da59 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Mon, 4 Nov 2019 12:33:36 -0800 Subject: [PATCH 17/41] printf -> VTR_LOG Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index fc852adc9ea..824a344fb7a 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -336,10 +336,10 @@ void CostMap::print(int iseg) const { box_id++) { auto& matrix = cost_map_[iseg][box_id]; if (matrix.dim_size(0) == 0 || matrix.dim_size(1) == 0) { - printf("cost EMPTY for box_id = %lu\n", box_id); + VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); continue; } - printf("cost for box_id = %lu\n", box_id); + VTR_LOG("cost for box_id = %lu\n", box_id); double sum = 0.0; for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { @@ -354,18 +354,18 @@ void CostMap::print(int iseg) const { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { const auto& entry = matrix[ix][iy]; if (!entry.valid()) { - printf("*"); + VTR_LOG("*"); } else if (entry.delay * 4 > avg * 5) { - printf("O"); + VTR_LOG("O"); } else if (entry.delay > avg) { - printf("o"); + VTR_LOG("o"); } else if (entry.delay * 4 > avg * 3) { - printf("."); + VTR_LOG("."); } else { - printf(" "); + VTR_LOG(" "); } } - printf("\n"); + VTR_LOG("\n"); } } } From f6aa3c4cb646a81e8ff459bb516ce10f491925c2 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 12:01:55 -0800 Subject: [PATCH 18/41] change sampling method Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 121 ++++++++++++------ 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 824a344fb7a..410918a8b6e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -802,17 +802,48 @@ float ConnectionBoxMapLookahead::get_expected_cost( } // the smallest bounding box containing a node -static vtr::Rect bounding_box_for_node(const t_rr_node& node) { - return vtr::Rect(node.xlow(), node.ylow(), - node.xhigh() + 1, node.yhigh() + 1); +static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_boxes, int node_ind) { + const std::pair* loc = connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + return vtr::Rect(); + } else { + return vtr::Rect(vtr::Point(loc->first, loc->second)); + } } -// the center point for a node -// it is unknown where the the node starts, so use the average -static vtr::Point point_for_node(const t_rr_node& node) { - int x = (node.xhigh() + node.xlow()) / 2; - int y = (node.yhigh() + node.ylow()) / 2; - return vtr::Point(x, y); +static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& bounding_box, int sx, int sy, int n) { + vtr::Rect window(sample(bounding_box, sx, sy, n), + sample(bounding_box, sx + 1, sy + 1, n)); + vtr::Point center = sample(window, 1, 1, 2); + vtr::Point sample_point = center; + int sample_distance = 0; + int sample_count = counts[sample_point.x()][sample_point.y()]; + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + vtr::Point here(x, y); + int count = counts[x][y]; + if (count < sample_count) continue; + int distance = manhattan_distance(center, here); + if (count > sample_count || (count == sample_count && distance < sample_distance)) { + sample_point = here; + sample_count = count; + sample_distance = distance; + } + } + } + return sample_point; +} + +// linear lookup, so consider something more sophisticated for large SAMPLE_GRID_SIZEs +static std::pair grid_lookup(const SampleGrid& grid, vtr::Point point) { + for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { + for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { + if (grid.point[sy][sx].location == point) { + return std::make_pair(sx, sy); + } + } + } + return std::make_pair(-1, -1); } // for each segment type, find the nearest nodes to an equally spaced grid of points @@ -821,6 +852,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; const int num_segments = inodes_for_segment->size(); + std::vector> segment_counts(num_segments); // compute bounding boxes for each segment type std::vector> bounding_box_for_segment(num_segments, vtr::Rect()); @@ -832,10 +864,16 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(node)); + bounding_box_for_segment[seg_index].expand_bounding_box(bounding_box_for_node(device_ctx.connection_boxes, i)); } - // select an inode near the center of the bounding box for each segment type + // initialize counts + for (int seg = 0; seg < num_segments; seg++) { + const auto& box = bounding_box_for_segment[seg]; + segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); + } + + // initialize the samples vector for each sample point inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); for (auto& grid : *inodes_for_segment) { @@ -846,46 +884,51 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se } } + // count sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; - vtr::Rect node_bounds = bounding_box_for_node(node); if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0 || device_ctx.connection_boxes.find_canonical_loc(i) == nullptr) continue; + if (node.capacity() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + segment_counts[seg_index][loc->first][loc->second] += 1; VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); + } - auto& grid = (*inodes_for_segment)[seg_index]; - for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { - for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - auto& point = grid.point[sy][sx]; - if (point.samples.empty()) { - point.samples.push_back(i); - point.location = vtr::Point(node.xlow(), node.ylow()); - goto next_rr_node; - } - - if (node_bounds.contains(point.location)) { - point.samples.push_back(i); - goto next_rr_node; - } - - vtr::Point target = sample(bounding_box_for_segment[seg_index], sx + 1, sy + 1, SAMPLE_GRID_SIZE + 1); - int distance_new = manhattan_distance(point_for_node(node), target); - int distance_stored = manhattan_distance(point.location, target); - if (distance_new < distance_stored) { - point.samples.clear(); - point.samples.push_back(i); - goto next_rr_node; - } + // select sample points + for (int i = 0; i < num_segments; i++) { + const auto& counts = segment_counts[i]; + const auto& bounding_box = bounding_box_for_segment[i]; + auto& grid = (*inodes_for_segment)[i]; + for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { + for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { + grid.point[y][x].location = choose_point(counts, bounding_box, x, y, SAMPLE_GRID_SIZE); } } + } - // to break out from the inner loop - next_rr_node: - continue; + // select an inode near the center of the bounding box for each segment type + for (size_t i = 0; i < rr_nodes.size(); i++) { + auto& node = rr_nodes[i]; + if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0) continue; + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); + if (loc == nullptr) continue; + + int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; + + VTR_ASSERT(seg_index != OPEN); + VTR_ASSERT(seg_index < num_segments); + + auto& grid = (*inodes_for_segment)[seg_index]; + auto grid_loc = grid_lookup(grid, vtr::Point(loc->first, loc->second)); + if (grid_loc.first >= 0) { + grid.point[grid_loc.first][grid_loc.second].samples.push_back(i); + } } } From 988e3e5e0f6b89b70a30422fd37b2748185c019a Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 13:40:27 -0800 Subject: [PATCH 19/41] remove unnecessary lookups Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 410918a8b6e..57b1c691e35 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -565,18 +565,17 @@ static void add_paths(int start_node_ind, bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { // the sample was not cheaper than the cached sample - const auto& x = routing_costs->find(key); - VTR_ASSERT(x != routing_costs->end()); if (BREAK_ON_MISS) { // don't store the rest of the path break; } } else { - const auto& x = routing_costs->find(key); - if (x != routing_costs->end()) { - if (x->second.cost_entry.delay > val.cost_entry.delay) { + auto result = routing_costs->insert(std::make_pair(key, val)); + if (!result.second) { + auto& existing = result.first->second; + if (existing.cost_entry.delay > val.cost_entry.delay) { // this sample is cheaper - (*routing_costs)[key] = val; + existing = val; if (in_window) { cache->insert(compressed_key, val.cost_entry.delay); } @@ -587,7 +586,7 @@ static void add_paths(int start_node_ind, break; } if (in_window) { - cache->insert(compressed_key, x->second.cost_entry.delay); + cache->insert(compressed_key, existing.cost_entry.delay); } } } else { @@ -731,11 +730,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (const auto& cost : costs) { const auto& key = cost.first; const auto& val = cost.second; - const auto& x = all_costs.find(key); + const auto& result = all_costs.insert(std::make_pair(key, val)); + auto& existing = result.first->second; // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (x == all_costs.end() || x->second.cost_entry.delay > val.cost_entry.delay) { - all_costs[key] = val; + if (!result.second || existing.cost_entry.delay > val.cost_entry.delay) { + existing = val; } } From 1ce27628c40625e09f422b4d0dd07975117abb8f Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 6 Nov 2019 17:39:47 -0800 Subject: [PATCH 20/41] remove SimpleCache, use vtr::hash_combine, add timing stats Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 164 +++++------------- vpr/src/route/connection_box_lookahead_map.h | 37 +--- 2 files changed, 52 insertions(+), 149 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 57b1c691e35..35e3b224844 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -48,14 +48,8 @@ static constexpr int SAMPLE_GRID_SIZE = 3; // Stop Dijkstra expansion after reaching COST_LIMIT static constexpr float COST_LIMIT = std::numeric_limits::infinity(); -// Number of entries in the routing cost cache -static constexpr int DIJKSTRA_CACHE_SIZE = 64; - -// Only entries with a delta inside the window (+- DIJKSTRA_CACHE_WINDOW x/y) are cached -static constexpr int DIJKSTRA_CACHE_WINDOW = 3; - // Don't continue storing a path after hitting a lower-or-same cost entry. -static constexpr bool BREAK_ON_MISS = false; +static constexpr bool BREAK_ON_MISS = true; // Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; @@ -74,65 +68,9 @@ struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; -// implements a simple cache of key(K)/value(V) pairs of N entries -template -class SimpleCache { - public: - SimpleCache() - : pos(0) - , hits(0) - , misses(0) {} - - // O(N) lookup - bool get(K key, V* value) { - for (int i = 0; i < N; i++) { - auto& k = keys[i]; - if (k == key) { - auto& v = values[i]; -#if defined(CONNECTION_BOX_LOOKAHEAD_PUSH_BACK_HITS) - // preserve the found key by pushing it back - int last = (pos + N - 1) % N; - std::swap(k, keys[last]); - std::swap(v, values[last]); -#endif - *value = v; - hits++; - return true; - } - } - misses++; - return false; - } - - // O(1) insertion (overwriting an older entry) - void insert(K key, V val) { - keys[pos] = key; - values[pos] = val; - pos = (pos + 1) % N; - } - - // ratio of successful lookups - float hit_ratio() { - return hits ? static_cast(hits) / static_cast(hits + misses) : 0.f; - } - - // ratio of unsuccessful lookups - float miss_ratio() { - return misses ? static_cast(misses) / static_cast(hits + misses) : 0.f; - } - - private: - std::array keys; // keep keys together for faster scanning - std::array values; - size_t pos; - uint64_t hits; - uint64_t misses; -}; - -static float run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - SimpleCache* cache, - float max_cost); +static std::pair run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + float max_cost); static void find_inodes_for_segment_types(std::vector* inodes_for_segment); // also known as the L1 norm @@ -505,8 +443,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, static void add_paths(int start_node_ind, int node_ind, std::unordered_map* paths, - RoutingCosts* routing_costs, - SimpleCache* cache) { + RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; std::pair box_location; @@ -550,7 +487,6 @@ static void add_paths(int start_node_ind, seg_index, box_id, delta}; - CompressedRoutingCostKey compressed_key(key); RoutingCost val = { parent, node_ind, @@ -560,42 +496,22 @@ static void add_paths(int start_node_ind, // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - // use a cache for a small window around a delta of (0, 0) - float cost = 0.f; - bool in_window = abs(delta.x()) <= DIJKSTRA_CACHE_WINDOW && abs(delta.y()) <= DIJKSTRA_CACHE_WINDOW; - if (in_window && cache->get(compressed_key, &cost) && cost <= val.cost_entry.delay) { - // the sample was not cheaper than the cached sample - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - } else { - auto result = routing_costs->insert(std::make_pair(key, val)); - if (!result.second) { - auto& existing = result.first->second; - if (existing.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - existing = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); - } - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } - if (in_window) { - cache->insert(compressed_key, existing.cost_entry.delay); - } - } + auto result = routing_costs->insert(std::make_pair(key, val)); + if (!result.second) { + auto& existing = result.first->second; + if (existing.cost_entry.delay > val.cost_entry.delay) { + // this sample is cheaper + existing = val; } else { - // this sample is new - (*routing_costs)[key] = val; - if (in_window) { - cache->insert(compressed_key, val.cost_entry.delay); + // this sample is not cheaper + if (BREAK_ON_MISS) { + // don't store the rest of the path + break; } } + } else { + // this sample is new + (*routing_costs)[key] = val; } // update parent data @@ -608,11 +524,11 @@ static void add_paths(int start_node_ind, /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ -static float run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - SimpleCache* cache, - float cost_limit) { +static std::pair run_dijkstra(int start_node_ind, + RoutingCosts* routing_costs, + float cost_limit) { auto& device_ctx = g_vpr_ctx.device(); + int path_count = 0; /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ @@ -646,7 +562,8 @@ static float run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { - add_paths(start_node_ind, node_ind, &paths, routing_costs, cache); + path_count++; + add_paths(start_node_ind, node_ind, &paths, routing_costs); } else { expand_dijkstra_neighbours(current, paths, node_expanded, pq); node_expanded[node_ind] = true; @@ -657,7 +574,17 @@ static float run_dijkstra(int start_node_ind, break; } } - return max_cost; + return std::make_pair(max_cost, path_count); +} + +static uint64_t interleave(uint32_t x) { + uint64_t i = x; + i = (i ^ (i << 16)) & 0x0000ffff0000ffff; + i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; + i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; + i = (i ^ (i << 2)) & 0x3333333333333333; + i = (i ^ (i << 1)) & 0x5555555555555555; + return i; } // compute the cost maps for lookahead @@ -676,7 +603,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { auto& point = grid.point[y][x]; if (!point.samples.empty()) { - point.order = point.samples[0]; + point.order = interleave(point.location.x()) | (interleave(point.location.y()) << 1); sample_points.push_back(point); } } @@ -710,21 +637,23 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen // holds the cost entries for a run RoutingCosts costs; - // a cache to avoid hammering the RoutingCosts map, since lookups will be dominated by a few keys - // must be consistent with `costs` i.e. any entry in the cache should also be in `costs` - // NOTE: this is used as a write-through cache, maybe try write-back - SimpleCache cache; - + vtr::Timer run_timer; + int path_count = 0; for (auto node_ind : point.samples) { - max_cost = std::max(max_cost, run_dijkstra(node_ind, &costs, &cache, COST_LIMIT)); + auto result = run_dijkstra(node_ind, &costs, COST_LIMIT); + max_cost = std::max(max_cost, result.first); + path_count += result.second; } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - VTR_LOG("Expanded sample point (%d, %d) %e miss %g\n", - point.location.x(), point.location.y(), max_cost, cache.miss_ratio()); + VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e (%g paths/sec)\n", + path_count, + point.location.x(), point.location.y(), + max_cost, + path_count / run_timer.elapsed_sec()); // combine the cost map from this run with the final cost maps for each segment for (const auto& cost : costs) { @@ -903,6 +832,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (int i = 0; i < num_segments; i++) { const auto& counts = segment_counts[i]; const auto& bounding_box = bounding_box_for_segment[i]; + if (bounding_box.empty()) continue; auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 9b5bdb0afea..b04c8131800 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -32,29 +32,6 @@ struct RoutingCostKey { } }; -// compressed version of RoutingCostKey -// TODO add bounds checks -struct CompressedRoutingCostKey { - uint32_t data; - - CompressedRoutingCostKey() { - data = -1; - } - CompressedRoutingCostKey(const RoutingCostKey& key) { - data = key.seg_index & 0xff; - data <<= 8; - data |= size_t(key.box_id) & 0xff; - data <<= 8; - data |= key.delta.x() & 0xff; - data <<= 8; - data |= key.delta.y() & 0xff; - } - - bool operator==(CompressedRoutingCostKey other) const { - return data == other.data; - } -}; - // Data in the RoutingCosts map struct RoutingCost { // source and destination node indices @@ -67,15 +44,11 @@ struct RoutingCost { // hash implementation for RoutingCostKey struct HashRoutingCostKey { std::size_t operator()(RoutingCostKey const& key) const noexcept { - uint64_t data; - data = key.seg_index & 0xffff; - data <<= 16; - data |= size_t(key.box_id) & 0xffff; - data <<= 16; - data |= key.delta.x() & 0xffff; - data <<= 16; - data |= key.delta.y() & 0xffff; - return std::hash{}(data); + std::size_t hash = std::hash{}(key.seg_index); + vtr::hash_combine(hash, key.box_id); + vtr::hash_combine(hash, key.delta.x()); + vtr::hash_combine(hash, key.delta.y()); + return hash; } }; From c4bb0f6858cda999ddfd5d85c310e7ef57ba8461 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Fri, 8 Nov 2019 12:12:06 -0800 Subject: [PATCH 21/41] integrate lighost's patches Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box.cpp | 1 + vpr/src/route/connection_box.h | 6 ++-- .../route/connection_box_lookahead_map.cpp | 34 +++++++++++++++---- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp index 1bee129f0d6..181fe171989 100644 --- a/vpr/src/route/connection_box.cpp +++ b/vpr/src/route/connection_box.cpp @@ -33,6 +33,7 @@ bool ConnectionBoxes::find_connection_box(int inode, float* site_pin_delay) const { VTR_ASSERT(box_id != nullptr); VTR_ASSERT(box_location != nullptr); + VTR_ASSERT(site_pin_delay != nullptr); if (inode >= (ssize_t)ipin_map_.size()) { return false; diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h index 25fd0ea422c..1757c6c726b 100644 --- a/vpr/src/route/connection_box.h +++ b/vpr/src/route/connection_box.h @@ -25,12 +25,12 @@ struct ConnBoxLoc { float a_site_pin_delay, ConnectionBoxId a_box_id) : box_location(a_box_location) - , box_id(a_box_id) - , site_pin_delay(a_site_pin_delay) {} + , site_pin_delay(a_site_pin_delay) + , box_id(a_box_id) {} std::pair box_location; - ConnectionBoxId box_id; float site_pin_delay; + ConnectionBoxId box_id; }; struct SinkToIpin { diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 35e3b224844..84ac3ffd71f 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -277,7 +277,7 @@ void CostMap::print(int iseg) const { VTR_LOG("cost EMPTY for box_id = %lu\n", box_id); continue; } - VTR_LOG("cost for box_id = %lu\n", box_id); + VTR_LOG("cost for box_id = %lu (%zu, %zu)\n", box_id, matrix.dim_size(0), matrix.dim_size(1)); double sum = 0.0; for (unsigned iy = 0; iy < matrix.dim_size(1); iy++) { for (unsigned ix = 0; ix < matrix.dim_size(0); ix++) { @@ -431,7 +431,16 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_delay = cost_entry.delay; float expected_congestion = cost_entry.congestion; + expected_delay += site_pin_delay; + expected_congestion += site_pin_delay; + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + + VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); + VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); + VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); + if (!std::isfinite(expected_cost) || expected_cost < 0.f) { VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); VTR_ASSERT(0); @@ -458,15 +467,21 @@ static void add_paths(int start_node_ind, std::vector path; for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f); // recalculate the path with congestion util::PQ_Entry current_full = parent_entry; int parent = start_node_ind; for (auto it = path.rbegin(); it != path.rend(); it++) { auto& parent_node = device_ctx.rr_nodes[parent]; + float Tsw_adjust = 0.f; + + // Remove site pin delay when taking edge from last channel to IPIN. + if (*it == node_ind) { + Tsw_adjust = -site_pin_delay; + } current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false); + current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust); parent = *it; } @@ -487,12 +502,17 @@ static void add_paths(int start_node_ind, seg_index, box_id, delta}; + + float new_delay = current_full.delay - parent_entry.delay; + float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream; + + VTR_ASSERT(new_delay >= 0.f); + VTR_ASSERT(new_congestion >= 0.f); + RoutingCost val = { parent, node_ind, - util::Cost_Entry( - current_full.delay - parent_entry.delay, - current_full.congestion_upstream - parent_entry.congestion_upstream)}; + util::Cost_Entry(new_delay, new_congestion)}; // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST @@ -516,7 +536,7 @@ static void add_paths(int start_node_ind, // update parent data parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false); + parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f); parent = *it; } } From c9cd48a387cd8407a5eb8495abe5f3bdb4d555d9 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Mon, 11 Nov 2019 11:22:23 -0800 Subject: [PATCH 22/41] independently minimize delay and base cost (congestion) --- vpr/src/route/connection_box_lookahead_map.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 84ac3ffd71f..a719f1f243d 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -323,6 +323,8 @@ std::vector> CostMap::list_empty() const { static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) { if (src.delay < dst->delay) { dst->delay = src.delay; + } + if (src.congestion < dst->congestion) { dst->congestion = src.congestion; } } From 629ca514ebc53263493a9ee1eeaecd8479c46369 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 14 Nov 2019 15:13:48 -0800 Subject: [PATCH 23/41] Expand delay and base cost independently Expand base_cost too. The delay and base cost matricies are independent, so should be expanded and filled independently. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> Signed-off-by: Dusty DeWeese --- .../route/connection_box_lookahead_map.cpp | 231 +++++++++--------- vpr/src/route/connection_box_lookahead_map.h | 4 +- 2 files changed, 119 insertions(+), 116 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a719f1f243d..a65bb04d2d0 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -38,16 +38,12 @@ * * See e_representative_entry_method */ #define REPRESENTATIVE_ENTRY_METHOD util::SMALLEST -// #define FILL_LIMIT 30 #define CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_MAPS // Sample based an NxN grid of starting segments, where N = SAMPLE_GRID_SIZE static constexpr int SAMPLE_GRID_SIZE = 3; -// Stop Dijkstra expansion after reaching COST_LIMIT -static constexpr float COST_LIMIT = std::numeric_limits::infinity(); - // Don't continue storing a path after hitting a lower-or-same cost entry. static constexpr bool BREAK_ON_MISS = true; @@ -68,9 +64,10 @@ struct SampleGrid { SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; }; +template static std::pair run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - float max_cost); + RoutingCosts* routing_costs); + static void find_inodes_for_segment_types(std::vector* inodes_for_segment); // also known as the L1 norm @@ -145,10 +142,13 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(const RoutingCosts& costs) { +void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { // calculate the bounding boxes vtr::Matrix> bounds({seg_count_, box_count_}); - for (const auto& entry : costs) { + for (const auto& entry : delay_costs) { + bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); + } + for (const auto& entry : base_costs) { bounds[entry.first.seg_index][size_t(entry.first.box_id)].expand_bounding_box(vtr::Rect(entry.first.delta)); } @@ -171,13 +171,21 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } // store entries into the matrices - for (const auto& entry : costs) { + for (const auto& entry : delay_costs) { int seg = entry.first.seg_index; int box = size_t(entry.first.box_id); const auto& seg_box_bounds = bounds[seg][box]; int x = entry.first.delta.x() - seg_box_bounds.xmin(); int y = entry.first.delta.y() - seg_box_bounds.ymin(); - cost_map_[seg][box][x][y] = entry.second.cost_entry; + cost_map_[seg][box][x][y].delay = entry.second; + } + for (const auto& entry : base_costs) { + int seg = entry.first.seg_index; + int box = size_t(entry.first.box_id); + const auto& seg_box_bounds = bounds[seg][box]; + int x = entry.first.delta.x() - seg_box_bounds.xmin(); + int y = entry.first.delta.y() - seg_box_bounds.ymin(); + cost_map_[seg][box][x][y].congestion = entry.second; } // fill the holes @@ -232,11 +240,10 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { } } } -#if !defined(FILL_LIMIT) if (couldnt_fill) { + // give up trying to fill an empty matrix break; } -#endif } if (!couldnt_fill) { @@ -251,13 +258,11 @@ void CostMap::set_cost_map(const RoutingCosts& costs) { if (couldnt_fill) { VTR_LOG_WARN("Couldn't fill holes in the cost matrix for %d -> %ld, %d x %d bounding box\n", seg, box, seg_box_bounds.width(), seg_box_bounds.height()); -#if !defined(FILL_LIMIT) for (unsigned y = 0; y < matrix.dim_size(1); y++) { for (unsigned x = 0; x < matrix.dim_size(0); x++) { VTR_ASSERT(!matrix[x][y].valid()); } } -#endif } } } @@ -335,35 +340,39 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat int cy, const vtr::Rect& bounds) { // spiral around (cx, cy) looking for a nearby entry - int n = 1; - bool in_bounds; - util::Cost_Entry entry; + bool in_bounds = bounds.contains(vtr::Point(cx, cy)); + if (!in_bounds) { + return std::make_pair(util::Cost_Entry(), 0); + } + int n = 0; + util::Cost_Entry fill(matrix[cx][cy]); - do { + while (in_bounds && !fill.valid()) { + n++; in_bounds = false; + util::Cost_Entry min_entry; for (int ox = -n; ox <= n; ox++) { int x = cx + ox; int oy = n - abs(ox); int yp = cy + oy; int yn = cy - oy; if (bounds.contains(vtr::Point(x, yp))) { - assign_min_entry(&entry, matrix[x][yp]); + assign_min_entry(&min_entry, matrix[x][yp]); in_bounds = true; } if (bounds.contains(vtr::Point(x, yn))) { - assign_min_entry(&entry, matrix[x][yn]); + assign_min_entry(&min_entry, matrix[x][yn]); in_bounds = true; } + if (!std::isfinite(fill.delay)) { + fill.delay = min_entry.delay; + } + if (!std::isfinite(fill.congestion)) { + fill.congestion = min_entry.congestion; + } } - if (entry.valid()) return std::make_pair(entry, n); - n++; -#if defined(FILL_LIMIT) - if (n > FILL_LIMIT) { - break; - } -#endif - } while (in_bounds); - return std::make_pair(util::Cost_Entry(), n); + } + return std::make_pair(fill, n); } // derive a cost from the map between two nodes @@ -376,8 +385,6 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, auto& device_ctx = g_vpr_ctx.device(); - std::pair from_location; - std::pair to_location; auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); if (to_node_type == SINK) { @@ -447,18 +454,21 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, VTR_LOG_ERROR("invalid cost for segment %d to connection box %d at (%d, %d)\n", from_seg_index, (int)size_t(box_id), (int)dx, (int)dy); VTR_ASSERT(0); } + return expected_cost; } // add a best cost routing path from start_node_ind to node_ind to routing costs +template static void add_paths(int start_node_ind, - int node_ind, + Entry current, std::unordered_map* paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; std::pair box_location; float site_pin_delay; + int node_ind = current.rr_node_ind; bool found = device_ctx.connection_boxes.find_connection_box( node_ind, &box_id, &box_location, &site_pin_delay); if (!found) { @@ -467,32 +477,20 @@ static void add_paths(int start_node_ind, // reconstruct the path std::vector path; - for (int i = node_ind; i != start_node_ind; path.push_back(i = (*paths)[i].parent)) - ; - util::PQ_Entry parent_entry(start_node_ind, UNDEFINED, 0, 0, 0, true, /*Tsw_adjust=*/0.f); - - // recalculate the path with congestion - util::PQ_Entry current_full = parent_entry; - int parent = start_node_ind; - for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - float Tsw_adjust = 0.f; - - // Remove site pin delay when taking edge from last channel to IPIN. - if (*it == node_ind) { - Tsw_adjust = -site_pin_delay; - } - current_full = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), current_full.delay, - current_full.R_upstream, current_full.congestion_upstream, false, Tsw_adjust); - parent = *it; + for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) { + path.push_back(i); } + path.push_back(start_node_ind); + + current.adjust_Tsw(-site_pin_delay); // add each node along the path subtracting the incremental costs from the current costs - parent = start_node_ind; + Entry start_to_here(start_node_ind, UNDEFINED, nullptr); + int parent = start_node_ind; for (auto it = path.rbegin(); it != path.rend(); it++) { - auto& parent_node = device_ctx.rr_nodes[parent]; - int seg_index = device_ctx.rr_indexed_data[parent_node.cost_index()].seg_index; - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(parent); + auto& here = device_ctx.rr_nodes[*it]; + int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); if (from_canonical_loc == nullptr) { VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", parent); @@ -505,50 +503,37 @@ static void add_paths(int start_node_ind, box_id, delta}; - float new_delay = current_full.delay - parent_entry.delay; - float new_congestion = current_full.congestion_upstream - parent_entry.congestion_upstream; + if (*it != start_node_ind) { + auto& parent_node = device_ctx.rr_nodes[parent]; + start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here); + parent = *it; + } - VTR_ASSERT(new_delay >= 0.f); - VTR_ASSERT(new_congestion >= 0.f); + float cost = current.cost() - start_to_here.cost(); + if (cost < 0.f && cost > -1e-15 /* 1 femtosecond */) { + cost = 0.f; + } - RoutingCost val = { - parent, - node_ind, - util::Cost_Entry(new_delay, new_congestion)}; + VTR_ASSERT(cost >= 0.f); // NOTE: implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - - auto result = routing_costs->insert(std::make_pair(key, val)); + auto result = routing_costs->insert(std::make_pair(key, cost)); if (!result.second) { - auto& existing = result.first->second; - if (existing.cost_entry.delay > val.cost_entry.delay) { - // this sample is cheaper - existing = val; - } else { - // this sample is not cheaper - if (BREAK_ON_MISS) { - // don't store the rest of the path - break; - } + if (cost < result.first->second) { + result.first->second = cost; + } else if (BREAK_ON_MISS) { + break; } - } else { - // this sample is new - (*routing_costs)[key] = val; } - - // update parent data - parent_entry = util::PQ_Entry(*it, parent_node.edge_switch((*paths)[*it].edge), parent_entry.delay, - parent_entry.R_upstream, parent_entry.congestion_upstream, false, /*Tsw_adjust=*/0.f); - parent = *it; } } /* runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information * to that pin is stored to an entry in the routing_cost_map */ +template static std::pair run_dijkstra(int start_node_ind, - RoutingCosts* routing_costs, - float cost_limit) { + RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; @@ -561,12 +546,11 @@ static std::pair run_dijkstra(int start_node_ind, * Also store the parent node so we can reconstruct a specific path. */ std::unordered_map paths; /* a priority queue for expansion */ - std::priority_queue, std::greater> pq; + std::priority_queue, std::greater> pq; /* first entry has no upstream delay or congestion */ - util::PQ_Entry_Lite first_entry(start_node_ind, UNDEFINED, 0, true); - - float max_cost = 0.f; + Entry first_entry(start_node_ind, UNDEFINED, nullptr); + float max_cost = first_entry.cost(); pq.push(first_entry); @@ -577,6 +561,9 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; + // the last cost should be the highest + max_cost = current.cost(); + /* check that we haven't already expanded from this node */ if (node_expanded[node_ind]) { continue; @@ -585,16 +572,12 @@ static std::pair run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { path_count++; - add_paths(start_node_ind, node_ind, &paths, routing_costs); + add_paths(start_node_ind, current, &paths, routing_costs); } else { - expand_dijkstra_neighbours(current, paths, node_expanded, pq); + util::expand_dijkstra_neighbours(device_ctx.rr_nodes, + current, paths, node_expanded, pq); node_expanded[node_ind] = true; } - - max_cost = std::max(max_cost, current.delay_cost); - if (max_cost > cost_limit) { - break; - } } return std::make_pair(max_cost, path_count); } @@ -645,7 +628,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen device_ctx.connection_boxes.num_connection_box_types()); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); - RoutingCosts all_costs; + RoutingCosts all_delay_costs; + RoutingCosts all_base_costs; /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) @@ -654,39 +638,58 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #else for (const auto& point : sample_points) { #endif - float max_cost = 0.f; - // holds the cost entries for a run - RoutingCosts costs; + RoutingCosts delay_costs; + RoutingCosts base_costs; + // statistics vtr::Timer run_timer; int path_count = 0; + float max_delay_cost = 0.f; + float max_base_cost = 0.f; + for (auto node_ind : point.samples) { - auto result = run_dijkstra(node_ind, &costs, COST_LIMIT); - max_cost = std::max(max_cost, result.first); - path_count += result.second; + { + auto result = run_dijkstra(node_ind, &delay_costs); + max_delay_cost = std::max(max_delay_cost, result.first); + path_count += result.second; + } + { + auto result = run_dijkstra(node_ind, &base_costs); + max_base_cost = std::max(max_base_cost, result.first); + path_count += result.second; + } } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - - VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e (%g paths/sec)\n", + /* + * for (auto node_ind : point.samples) { + * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); + * } + */ + VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e %e (%g paths/sec)\n", path_count, point.location.x(), point.location.y(), - max_cost, + max_delay_cost, max_base_cost, path_count / run_timer.elapsed_sec()); // combine the cost map from this run with the final cost maps for each segment - for (const auto& cost : costs) { - const auto& key = cost.first; + for (const auto& cost : delay_costs) { const auto& val = cost.second; - const auto& result = all_costs.insert(std::make_pair(key, val)); - auto& existing = result.first->second; - - // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST - if (!result.second || existing.cost_entry.delay > val.cost_entry.delay) { - existing = val; + auto result = all_delay_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); + } + } + for (const auto& cost : base_costs) { + const auto& val = cost.second; + auto result = all_base_costs.insert(std::make_pair(cost.first, val)); + if (!result.second) { + // implements REPRESENTATIVE_ENTRY_METHOD == SMALLEST + result.first->second = std::min(result.first->second, val); } } @@ -700,7 +703,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen VTR_LOG("Combining results\n"); /* boil down the cost list in routing_cost_map at each coordinate to a * representative cost entry and store it in the lookahead cost map */ - cost_map_.set_cost_map(all_costs); + cost_map_.set_cost_map(all_delay_costs, all_base_costs); // diagnostics #if defined(CONNECTION_BOX_LOOKAHEAD_MAP_PRINT_COST_ENTRIES) diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index b04c8131800..810dec59135 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -53,7 +53,7 @@ struct HashRoutingCostKey { }; // Map used to store intermediate routing costs -typedef std::unordered_map RoutingCosts; +typedef std::unordered_map RoutingCosts; // Dense cost maps per source segment and destination connection box types class CostMap { @@ -61,7 +61,7 @@ class CostMap { void set_counts(size_t seg_count, size_t box_count); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; - void set_cost_map(const RoutingCosts& costs); + void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); std::pair get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds); void read(const std::string& file); void write(const std::string& file) const; From 62f927c3e341b3a2a37463f790eb3579cb5707e2 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Thu, 14 Nov 2019 15:16:26 -0800 Subject: [PATCH 24/41] site_pin_delay shouldn't affect congestion Signed-off-by: Dusty DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index a65bb04d2d0..e360f7791bd 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -441,7 +441,6 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_congestion = cost_entry.congestion; expected_delay += site_pin_delay; - expected_congestion += site_pin_delay; float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; From a71492b3b2579f30caa7f385c558266d0438d4ea Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Mon, 18 Nov 2019 18:29:55 -0800 Subject: [PATCH 25/41] use histogram to choose sample sizes, set minimum penalty Signed-off-by: Dustin DeWeese --- .../route/connection_box_lookahead_map.cpp | 137 +++++++++++++----- 1 file changed, 99 insertions(+), 38 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index e360f7791bd..d770eaa2d5b 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -45,16 +45,18 @@ static constexpr int SAMPLE_GRID_SIZE = 3; // Don't continue storing a path after hitting a lower-or-same cost entry. -static constexpr bool BREAK_ON_MISS = true; +static constexpr bool BREAK_ON_MISS = false; // Distance penalties filling are calculated based on available samples, but can be adjusted with this factor. static constexpr float PENALTY_FACTOR = 1.f; +static constexpr float PENALTY_MIN = 1e-12f; // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { uint64_t order; // used to order sample points vtr::Point location; std::vector samples; + int segment_type; SamplePoint() : location(0, 0) {} }; @@ -119,6 +121,7 @@ int CostMap::node_to_segment(int from_node_ind) const { } static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { + penalty = std::max(penalty, PENALTY_MIN); return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, entry.congestion); } @@ -364,12 +367,12 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat assign_min_entry(&min_entry, matrix[x][yn]); in_bounds = true; } - if (!std::isfinite(fill.delay)) { - fill.delay = min_entry.delay; - } - if (!std::isfinite(fill.congestion)) { - fill.congestion = min_entry.congestion; - } + } + if (!std::isfinite(fill.delay)) { + fill.delay = min_entry.delay; + } + if (!std::isfinite(fill.congestion)) { + fill.congestion = min_entry.congestion; } } return std::make_pair(fill, n); @@ -459,7 +462,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, // add a best cost routing path from start_node_ind to node_ind to routing costs template -static void add_paths(int start_node_ind, +static bool add_paths(int start_node_ind, Entry current, std::unordered_map* paths, RoutingCosts* routing_costs) { @@ -470,6 +473,7 @@ static void add_paths(int start_node_ind, int node_ind = current.rr_node_ind; bool found = device_ctx.connection_boxes.find_connection_box( node_ind, &box_id, &box_location, &site_pin_delay); + bool new_sample_found = false; if (!found) { VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); } @@ -491,8 +495,7 @@ static void add_paths(int start_node_ind, int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); if (from_canonical_loc == nullptr) { - VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", - parent); + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", *it); } vtr::Point delta(ssize_t(from_canonical_loc->first) - ssize_t(box_location.first), @@ -509,7 +512,7 @@ static void add_paths(int start_node_ind, } float cost = current.cost() - start_to_here.cost(); - if (cost < 0.f && cost > -1e-15 /* 1 femtosecond */) { + if (cost < 0.f && cost > -10e-15 /* 10 femtosecond */) { cost = 0.f; } @@ -520,11 +523,15 @@ static void add_paths(int start_node_ind, if (!result.second) { if (cost < result.first->second) { result.first->second = cost; + new_sample_found = true; } else if (BREAK_ON_MISS) { break; } + } else { + new_sample_found = true; } } + return new_sample_found; } /* runs Dijkstra's algorithm from specified node until all nodes have been @@ -535,6 +542,11 @@ static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; + const std::pair* start_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (start_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ @@ -646,7 +658,6 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen int path_count = 0; float max_delay_cost = 0.f; float max_base_cost = 0.f; - for (auto node_ind : point.samples) { { auto result = run_dijkstra(node_ind, &delay_costs); @@ -663,14 +674,10 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - /* - * for (auto node_ind : point.samples) { - * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); - * } - */ - VTR_LOG("Expanded %d paths starting at (%d, %d) max_cost %e %e (%g paths/sec)\n", - path_count, + VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", + path_count, segment_inf[point.segment_type].name.c_str(), point.segment_type, point.location.x(), point.location.y(), + (int)point.samples.size(), max_delay_cost, max_base_cost, path_count / run_timer.elapsed_sec()); @@ -764,25 +771,29 @@ static vtr::Rect bounding_box_for_node(const ConnectionBoxes& connection_bo } } -static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& bounding_box, int sx, int sy, int n) { - vtr::Rect window(sample(bounding_box, sx, sy, n), +static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, int sy, int n) { + return vtr::Rect(sample(bounding_box, sx, sy, n), sample(bounding_box, sx + 1, sy + 1, n)); +} + +static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& window, int with_count) { vtr::Point center = sample(window, 1, 1, 2); vtr::Point sample_point = center; - int sample_distance = 0; - int sample_count = counts[sample_point.x()][sample_point.y()]; - for (int y = window.ymin(); y < window.ymax(); y++) { - for (int x = window.xmin(); x < window.xmax(); x++) { - vtr::Point here(x, y); - int count = counts[x][y]; - if (count < sample_count) continue; - int distance = manhattan_distance(center, here); - if (count > sample_count || (count == sample_count && distance < sample_distance)) { - sample_point = here; - sample_count = count; - sample_distance = distance; + if (with_count > 0) { + int sample_distance = std::numeric_limits::max(); + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + vtr::Point here(x, y); + if (counts[x][y] == with_count) { + int distance = manhattan_distance(center, here); + if (distance < sample_distance) { + sample_point = here; + sample_distance = distance; + } + } } } + VTR_ASSERT(counts[sample_point.x()][sample_point.y()] > 0); } return sample_point; } @@ -799,6 +810,51 @@ static std::pair grid_lookup(const SampleGrid& grid, vtr::Point p return std::make_pair(-1, -1); } +// histogram is a map from segment count to number of locations having that count +static int max_count_within_quantiles(const std::map& histogram, float lower, float upper) { + if (histogram.empty()) { + return 0; + } + int sum = 0; + for (const auto& entry : histogram) { + sum += entry.second; + } + int lower_limit = std::ceil(sum * lower); + int upper_limit = std::ceil(sum * upper); + std::pair max(0, 0); + for (const auto& entry : histogram) { + upper_limit -= entry.second; + if (lower_limit > 0) { + lower_limit -= entry.second; + if (lower_limit <= 0) { + max = entry; + } + } else { + if (entry.second >= max.second) { + max = entry; + } + if (upper_limit <= 0) { + break; + } + } + } + return max.first; +} + +// select a good number of segments to find +static int select_size(const vtr::Rect& box, const vtr::Matrix& counts) { + std::map histogram; + for (int y = box.ymin(); y < box.ymax(); y++) { + for (int x = box.xmin(); x < box.xmax(); x++) { + int count = counts[x][y]; + if (count > 0) { + histogram[count]++; + } + } + } + return max_count_within_quantiles(histogram, 0.75, 0.9); +} + // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { @@ -812,6 +868,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; int seg_index = device_ctx.rr_indexed_data[node.cost_index()].seg_index; VTR_ASSERT(seg_index != OPEN); @@ -829,10 +886,12 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se // initialize the samples vector for each sample point inodes_for_segment->clear(); inodes_for_segment->resize(num_segments); - for (auto& grid : *inodes_for_segment) { + for (int i = 0; i < num_segments; i++) { + auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { grid.point[y][x].samples = std::vector(); + grid.point[y][x].segment_type = i; } } } @@ -841,7 +900,7 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); if (loc == nullptr) continue; @@ -860,16 +919,18 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - grid.point[y][x].location = choose_point(counts, bounding_box, x, y, SAMPLE_GRID_SIZE); + vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); + int selected_size = select_size(window, segment_counts[i]); + grid.point[y][x].location = choose_point(counts, window, selected_size); } } } - // select an inode near the center of the bounding box for each segment type + // collect the node indices for each segment type at the selected sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; if (node.type() != CHANX && node.type() != CHANY) continue; - if (node.capacity() == 0) continue; + if (node.capacity() == 0 || node.num_edges() == 0) continue; const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(i); if (loc == nullptr) continue; From ba189e56571425b0701e078ddbd7ca571a72cf78 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 21 Nov 2019 09:28:01 -0800 Subject: [PATCH 26/41] Use get_rr_cong_cost to compute base_cost's. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 3 +++ vpr/src/route/router_lookahead_map_utils.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index d770eaa2d5b..b4c884d1942 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -607,6 +607,9 @@ static uint64_t interleave(uint32_t x) { void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + // Initialize rr_node_route_inf if not already + alloc_and_load_rr_node_route_structs(); + size_t num_segments = segment_inf.size(); std::vector sample_points; { diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 6346ba00006..8a8ed1276d2 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -56,6 +56,9 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: // This is used when building the SROURCE/OPIN --> CHAN lookup table that contains additional delay and // congestion data to reach the CHANX/CHANY nodes which is not present in the lookahead cost map. #define DIRECT_CONNECT_SPECIAL_SEG_TYPE -1; +======= +#include "route_common.h" +>>>>>>> Use get_rr_cong_cost to compute base_cost's. namespace util { From 800e81b64330a8b531bc5f77199707eb57fc4c87 Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Mon, 25 Nov 2019 18:01:52 -0800 Subject: [PATCH 27/41] run expansions in a region until a number of paths are found Signed-off-by: Dustin DeWeese --- .../route/connection_box_lookahead_map.cpp | 263 +++++++++--------- 1 file changed, 127 insertions(+), 136 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index b4c884d1942..cc8e7bf112e 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -11,6 +11,7 @@ #include "vtr_time.h" #include "vtr_geometry.h" #include "echo_files.h" +#include "rr_graph.h" #include "route_timing.h" #include "route_common.h" @@ -51,26 +52,26 @@ static constexpr bool BREAK_ON_MISS = false; static constexpr float PENALTY_FACTOR = 1.f; static constexpr float PENALTY_MIN = 1e-12f; +static constexpr int MIN_PATH_COUNT = 1000; + // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { - uint64_t order; // used to order sample points vtr::Point location; - std::vector samples; - int segment_type; - SamplePoint() - : location(0, 0) {} + std::vector nodes; }; -// a grid of sample points -struct SampleGrid { - SamplePoint point[SAMPLE_GRID_SIZE][SAMPLE_GRID_SIZE]; +struct SampleRegion { + int segment_type; + vtr::Point grid_location; + std::vector points; + uint64_t order; // for sorting }; template static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs); -static void find_inodes_for_segment_types(std::vector* inodes_for_segment); +static std::vector find_sample_regions(int num_segments); // also known as the L1 norm static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { @@ -593,16 +594,6 @@ static std::pair run_dijkstra(int start_node_ind, return std::make_pair(max_cost, path_count); } -static uint64_t interleave(uint32_t x) { - uint64_t i = x; - i = (i ^ (i << 16)) & 0x0000ffff0000ffff; - i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; - i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; - i = (i ^ (i << 2)) & 0x3333333333333333; - i = (i ^ (i << 1)) & 0x5555555555555555; - return i; -} - // compute the cost maps for lookahead void ConnectionBoxMapLookahead::compute(const std::vector& segment_inf) { vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); @@ -611,30 +602,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen alloc_and_load_rr_node_route_structs(); size_t num_segments = segment_inf.size(); - std::vector sample_points; - { - std::vector inodes_for_segment(num_segments); - find_inodes_for_segment_types(&inodes_for_segment); - - // collapse into a vector - for (auto& grid : inodes_for_segment) { - for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { - for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - auto& point = grid.point[y][x]; - if (!point.samples.empty()) { - point.order = interleave(point.location.x()) | (interleave(point.location.y()) << 1); - sample_points.push_back(point); - } - } - } - } - } - - // sort by VPR coordinate - std::sort(sample_points.begin(), sample_points.end(), - [](const SamplePoint& a, const SamplePoint& b) { - return a.order < b.order; - }); + std::vector sample_regions = find_sample_regions(num_segments); /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); @@ -648,41 +616,65 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) tbb::mutex all_costs_mutex; - tbb::parallel_for_each(sample_points, [&](const SamplePoint& point) { + tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { #else - for (const auto& point : sample_points) { + for (const auto& region : sample_regions) { #endif // holds the cost entries for a run RoutingCosts delay_costs; RoutingCosts base_costs; + int total_path_count = 0; + + for (auto& point : region.points) { + // statistics + vtr::Timer run_timer; + float max_delay_cost = 0.f; + float max_base_cost = 0.f; + int path_count = 0; + for (auto node_ind : point.nodes) { + { + auto result = run_dijkstra(node_ind, &delay_costs); + max_delay_cost = std::max(max_delay_cost, result.first); + path_count += result.second; + } + { + auto result = run_dijkstra(node_ind, &base_costs); + max_base_cost = std::max(max_base_cost, result.first); + path_count += result.second; + } + } - // statistics - vtr::Timer run_timer; - int path_count = 0; - float max_delay_cost = 0.f; - float max_base_cost = 0.f; - for (auto node_ind : point.samples) { - { - auto result = run_dijkstra(node_ind, &delay_costs); - max_delay_cost = std::max(max_delay_cost, result.first); - path_count += result.second; + if (path_count > 0) { + VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", + path_count, segment_inf[region.segment_type].name.c_str(), region.segment_type, + point.location.x(), point.location.y(), + (int)point.nodes.size(), + max_delay_cost, max_base_cost, + path_count / run_timer.elapsed_sec()); } - { - auto result = run_dijkstra(node_ind, &base_costs); - max_base_cost = std::max(max_base_cost, result.first); - path_count += result.second; + + /* + * if (path_count == 0) { + * for (auto node_ind : point.nodes) { + * VTR_LOG("Expanded node %s\n", describe_rr_node(node_ind).c_str()); + * } + * } + */ + + total_path_count += path_count; + if (total_path_count > MIN_PATH_COUNT) { + break; } } #if defined(VPR_USE_TBB) all_costs_mutex.lock(); #endif - VTR_LOG("Expanded %d paths of segment type %s(%d) starting at (%d, %d) from %d segments, max_cost %e %e (%g paths/sec)\n", - path_count, segment_inf[point.segment_type].name.c_str(), point.segment_type, - point.location.x(), point.location.y(), - (int)point.samples.size(), - max_delay_cost, max_base_cost, - path_count / run_timer.elapsed_sec()); + + if (total_path_count == 0) { + VTR_LOG("No paths found for sample region %s(%d, %d)\n", + segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); + } // combine the cost map from this run with the final cost maps for each segment for (const auto& cost : delay_costs) { @@ -779,42 +771,33 @@ static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, sample(bounding_box, sx + 1, sy + 1, n)); } -static vtr::Point choose_point(const vtr::Matrix& counts, const vtr::Rect& window, int with_count) { - vtr::Point center = sample(window, 1, 1, 2); - vtr::Point sample_point = center; - if (with_count > 0) { - int sample_distance = std::numeric_limits::max(); - for (int y = window.ymin(); y < window.ymax(); y++) { - for (int x = window.xmin(); x < window.xmax(); x++) { - vtr::Point here(x, y); - if (counts[x][y] == with_count) { - int distance = manhattan_distance(center, here); - if (distance < sample_distance) { - sample_point = here; - sample_distance = distance; - } - } +static std::vector choose_points(const vtr::Matrix& counts, + const vtr::Rect& window, + int min_count, + int max_count) { + std::vector points; + for (int y = window.ymin(); y < window.ymax(); y++) { + for (int x = window.xmin(); x < window.xmax(); x++) { + if (counts[x][y] >= min_count && counts[x][y] <= max_count) { + points.push_back(SamplePoint{/* .location = */ vtr::Point(x, y), + /* .nodes = */ {}}); } } - VTR_ASSERT(counts[sample_point.x()][sample_point.y()] > 0); } - return sample_point; -} -// linear lookup, so consider something more sophisticated for large SAMPLE_GRID_SIZEs -static std::pair grid_lookup(const SampleGrid& grid, vtr::Point point) { - for (int sy = 0; sy < SAMPLE_GRID_SIZE; sy++) { - for (int sx = 0; sx < SAMPLE_GRID_SIZE; sx++) { - if (grid.point[sy][sx].location == point) { - return std::make_pair(sx, sy); - } - } - } - return std::make_pair(-1, -1); + vtr::Point center = sample(window, 1, 1, 2); + + // sort by distance from center + std::sort(points.begin(), points.end(), + [&](const SamplePoint& a, const SamplePoint& b) { + return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); + }); + + return points; } // histogram is a map from segment count to number of locations having that count -static int max_count_within_quantiles(const std::map& histogram, float lower, float upper) { +static int quantile(const std::map& histogram, float ratio) { if (histogram.empty()) { return 0; } @@ -822,48 +805,46 @@ static int max_count_within_quantiles(const std::map& histogram, float for (const auto& entry : histogram) { sum += entry.second; } - int lower_limit = std::ceil(sum * lower); - int upper_limit = std::ceil(sum * upper); - std::pair max(0, 0); + int limit = std::ceil(sum * ratio); for (const auto& entry : histogram) { - upper_limit -= entry.second; - if (lower_limit > 0) { - lower_limit -= entry.second; - if (lower_limit <= 0) { - max = entry; - } - } else { - if (entry.second >= max.second) { - max = entry; - } - if (upper_limit <= 0) { - break; - } + limit -= entry.second; + if (limit <= 0) { + return entry.first; } } - return max.first; + return 0; } // select a good number of segments to find -static int select_size(const vtr::Rect& box, const vtr::Matrix& counts) { +static std::map count_histogram(const vtr::Rect& box, const vtr::Matrix& counts) { std::map histogram; for (int y = box.ymin(); y < box.ymax(); y++) { for (int x = box.xmin(); x < box.xmax(); x++) { int count = counts[x][y]; if (count > 0) { - histogram[count]++; + ++histogram[count]; } } } - return max_count_within_quantiles(histogram, 0.75, 0.9); + return histogram; +} + +static uint64_t interleave(uint32_t x) { + uint64_t i = x; + i = (i ^ (i << 16)) & 0x0000ffff0000ffff; + i = (i ^ (i << 8)) & 0x00ff00ff00ff00ff; + i = (i ^ (i << 4)) & 0x0f0f0f0f0f0f0f0f; + i = (i ^ (i << 2)) & 0x3333333333333333; + i = (i ^ (i << 1)) & 0x5555555555555555; + return i; } // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type -static void find_inodes_for_segment_types(std::vector* inodes_for_segment) { +static std::vector find_sample_regions(int num_segments) { + std::vector sample_regions; auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; - const int num_segments = inodes_for_segment->size(); std::vector> segment_counts(num_segments); // compute bounding boxes for each segment type @@ -886,19 +867,6 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se segment_counts[seg] = vtr::Matrix({size_t(box.width()), size_t(box.height())}, 0); } - // initialize the samples vector for each sample point - inodes_for_segment->clear(); - inodes_for_segment->resize(num_segments); - for (int i = 0; i < num_segments; i++) { - auto& grid = (*inodes_for_segment)[i]; - for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { - for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { - grid.point[y][x].samples = std::vector(); - grid.point[y][x].segment_type = i; - } - } - } - // count sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; @@ -919,16 +887,38 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se const auto& counts = segment_counts[i]; const auto& bounding_box = bounding_box_for_segment[i]; if (bounding_box.empty()) continue; - auto& grid = (*inodes_for_segment)[i]; for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); - int selected_size = select_size(window, segment_counts[i]); - grid.point[y][x].location = choose_point(counts, window, selected_size); + auto histogram = count_histogram(window, segment_counts[i]); + SampleRegion region = { + /* .segment_type = */ i, + /* .grid_location = */ vtr::Point(x, y), + /* .points = */ choose_points(counts, window, quantile(histogram, 0.5), quantile(histogram, 0.7)), + /* .order = */ 0}; + if (!region.points.empty()) { + vtr::Point location = region.points[0].location; + region.order = interleave(location.x()) | (interleave(location.y()) << 1); + sample_regions.push_back(region); + } } } } + // sort regions + std::sort(sample_regions.begin(), sample_regions.end(), + [](const SampleRegion& a, const SampleRegion& b) { + return a.order < b.order; + }); + + // build an index of sample points on segment type and location + std::map, SamplePoint*> sample_point_index; + for (auto& region : sample_regions) { + for (auto& point : region.points) { + sample_point_index[{region.segment_type, point.location.x(), point.location.y()}] = &point; + } + } + // collect the node indices for each segment type at the selected sample points for (size_t i = 0; i < rr_nodes.size(); i++) { auto& node = rr_nodes[i]; @@ -942,12 +932,13 @@ static void find_inodes_for_segment_types(std::vector* inodes_for_se VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - auto& grid = (*inodes_for_segment)[seg_index]; - auto grid_loc = grid_lookup(grid, vtr::Point(loc->first, loc->second)); - if (grid_loc.first >= 0) { - grid.point[grid_loc.first][grid_loc.second].samples.push_back(i); + auto point = sample_point_index.find({seg_index, loc->first, loc->second}); + if (point != sample_point_index.end()) { + point->second->nodes.push_back(i); } } + + return sample_regions; } #ifndef VTR_ENABLE_CAPNPROTO From b0fc84e5e5816959893464961696cfaa9c228644 Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Tue, 3 Dec 2019 10:46:39 -0800 Subject: [PATCH 28/41] use std::make_tuple() Signed-off-by: Dustin DeWeese --- vpr/src/route/connection_box_lookahead_map.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index cc8e7bf112e..ed0172fe252 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -915,7 +915,7 @@ static std::vector find_sample_regions(int num_segments) { std::map, SamplePoint*> sample_point_index; for (auto& region : sample_regions) { for (auto& point : region.points) { - sample_point_index[{region.segment_type, point.location.x(), point.location.y()}] = &point; + sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; } } @@ -932,7 +932,7 @@ static std::vector find_sample_regions(int num_segments) { VTR_ASSERT(seg_index != OPEN); VTR_ASSERT(seg_index < num_segments); - auto point = sample_point_index.find({seg_index, loc->first, loc->second}); + auto point = sample_point_index.find(std::make_tuple(seg_index, loc->first, loc->second)); if (point != sample_point_index.end()) { point->second->nodes.push_back(i); } From f4a2d91ba04529d0ca5797ed5882238e32957e9b Mon Sep 17 00:00:00 2001 From: Dustin DeWeese Date: Tue, 3 Dec 2019 15:28:10 -0800 Subject: [PATCH 29/41] suggested changes Signed-off-by: Dustin DeWeese --- libs/libvtrcapnproto/connection_map.capnp | 1 + .../route/connection_box_lookahead_map.cpp | 70 ++++++++++++++++--- vpr/src/route/connection_box_lookahead_map.h | 9 --- 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index bc445a30e70..30b9864153b 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -5,6 +5,7 @@ using Matrix = import "matrix.capnp"; struct VprCostEntry { delay @0 :Float32; congestion @1 :Float32; + fill @2 :Bool; } struct VprVector2D { diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index ed0172fe252..0edac971719 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -54,17 +54,34 @@ static constexpr float PENALTY_MIN = 1e-12f; static constexpr int MIN_PATH_COUNT = 1000; +// quantiles (like percentiles but 0-1) of segment count to use as a selection criteria +// choose locations with higher, but not extreme, counts of each segment type +static constexpr double kSamplingCountLowerQuantile = 0.5; +static constexpr double kSamplingCountUpperQuantile = 0.7; + // a sample point for a segment type, contains all segments at the VPR location struct SamplePoint { + // canonical location vtr::Point location; + + // nodes to expand std::vector nodes; }; struct SampleRegion { + // all nodes in `points' have this segment type int segment_type; + + // location on the sample grid vtr::Point grid_location; + + // locations to try + // The computation will keep expanding each of the points + // until a number of paths (segment -> connection box) are found. std::vector points; - uint64_t order; // for sorting + + // used to sort the regions to improve caching + uint64_t order; }; template @@ -350,6 +367,7 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat } int n = 0; util::Cost_Entry fill(matrix[cx][cy]); + fill.fill = true; while (in_bounds && !fill.valid()) { n++; @@ -448,6 +466,15 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", from_node_ind, to_node_ind); + const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; + const std::string& box_name = device_ctx.connection_boxes.get_connection_box(box_id)->name; + VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) to %s (%zu) with distance (%zd, %zd)\n", + segment_name.c_str(), from_seg_index, + box_name.c_str(), + size_t(box_id), + dx, dy); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); @@ -535,9 +562,12 @@ static bool add_paths(int start_node_ind, return new_sample_found; } -/* runs Dijkstra's algorithm from specified node until all nodes have been +/* Runs Dijkstra's algorithm from specified node until all nodes have been * visited. Each time a pin is visited, the delay/congestion information - * to that pin is stored to an entry in the routing_cost_map */ + * to that pin is stored to an entry in the routing_cost_map. + * + * Returns the maximum (last) minimum cost path stored, and + * the number of paths from start_node_ind stored. */ template static std::pair run_dijkstra(int start_node_ind, RoutingCosts* routing_costs) { @@ -573,9 +603,6 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; - // the last cost should be the highest - max_cost = current.cost(); - /* check that we haven't already expanded from this node */ if (node_expanded[node_ind]) { continue; @@ -583,6 +610,9 @@ static std::pair run_dijkstra(int start_node_ind, /* if this node is an ipin record its congestion/delay in the routing_cost_map */ if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + // the last cost should be the highest + max_cost = current.cost(); + path_count++; add_paths(start_node_ind, current, &paths, routing_costs); } else { @@ -672,8 +702,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen #endif if (total_path_count == 0) { - VTR_LOG("No paths found for sample region %s(%d, %d)\n", - segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); + VTR_LOG_WARN("No paths found for sample region %s(%d, %d)\n", + segment_inf[region.segment_type].name.c_str(), region.grid_location.x(), region.grid_location.y()); } // combine the cost map from this run with the final cost maps for each segment @@ -775,6 +805,7 @@ static std::vector choose_points(const vtr::Matrix& counts, const vtr::Rect& window, int min_count, int max_count) { + VTR_ASSERT(min_count <= max_count); std::vector points; for (int y = window.ymin(); y < window.ymax(); y++) { for (int x = window.xmin(); x < window.xmax(); x++) { @@ -829,6 +860,13 @@ static std::map count_histogram(const vtr::Rect& box, const vtr:: return histogram; } +// Used to calculate each region's `order.' +// A space-filling curve will order the regions so that +// nearby points stay close in order. A Hilbert curve might +// be better, but a Morton (Z)-order curve is easy to compute, +// because it's just interleaving binary bits, so this +// function interleaves with 0's so that the X and Y +// dimensions can then be OR'ed together. static uint64_t interleave(uint32_t x) { uint64_t i = x; i = (i ^ (i << 16)) & 0x0000ffff0000ffff; @@ -842,6 +880,7 @@ static uint64_t interleave(uint32_t x) { // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type static std::vector find_sample_regions(int num_segments) { + vtr::ScopedStartFinishTimer timer("finding sample regions"); std::vector sample_regions; auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; @@ -894,11 +933,22 @@ static std::vector find_sample_regions(int num_segments) { SampleRegion region = { /* .segment_type = */ i, /* .grid_location = */ vtr::Point(x, y), - /* .points = */ choose_points(counts, window, quantile(histogram, 0.5), quantile(histogram, 0.7)), + /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), /* .order = */ 0}; if (!region.points.empty()) { + /* In order to improve caching, the list of sample points are + * sorted to keep points that are nearby on the Euclidean plane also + * nearby in the vector of sample points. + * + * This means subsequent expansions on the same thread are likely + * to cover a similar set of nodes, so they are more likely to be + * cached. This improves performance by about 7%, which isn't a lot, + * but not a bad improvement for a few lines of code. */ vtr::Point location = region.points[0].location; + + // interleave bits of X and Y for a Z-curve ordering. region.order = interleave(location.x()) | (interleave(location.y()) << 1); + sample_regions.push_back(region); } } @@ -962,11 +1012,13 @@ void ConnectionBoxMapLookahead::write(const std::string& file) const { static void ToCostEntry(util::Cost_Entry* out, const VprCostEntry::Reader& in) { out->delay = in.getDelay(); out->congestion = in.getCongestion(); + out->fill = in.getFill(); } static void FromCostEntry(VprCostEntry::Builder* out, const util::Cost_Entry& in) { out->setDelay(in.delay); out->setCongestion(in.congestion); + out->setFill(in.fill); } static void ToVprVector2D(std::pair* out, const VprVector2D::Reader& in) { diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 810dec59135..70423409f7a 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -32,15 +32,6 @@ struct RoutingCostKey { } }; -// Data in the RoutingCosts map -struct RoutingCost { - // source and destination node indices - int from_node, to_node; - - // cost entry for the route - util::Cost_Entry cost_entry; -}; - // hash implementation for RoutingCostKey struct HashRoutingCostKey { std::size_t operator()(RoutingCostKey const& key) const noexcept { From 8ccb5fde1f0c787362fc5b8baef95f366ab66c00 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 31 Oct 2019 12:22:58 -0700 Subject: [PATCH 30/41] Add debugging prints and add fill output to lookahead. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 0edac971719..36355996e71 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -141,7 +141,7 @@ int CostMap::node_to_segment(int from_node_ind) const { static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, float penalty) { penalty = std::max(penalty, PENALTY_MIN); return util::Cost_Entry(entry.delay + distance * penalty * PENALTY_FACTOR, - entry.congestion); + entry.congestion, entry.fill); } // get a cost entry for a segment type, connection box type, and offset From 280cd9529c324ab612d82123b9c4b24bd9326c0e Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Tue, 21 Jan 2020 17:37:59 +0100 Subject: [PATCH 31/41] removed criticality from penalty cost math - addressed review comments Signed-off-by: Alessandro Comodi --- vpr/src/route/router_lookahead_map_utils.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 8a8ed1276d2..ff31fe6b0bb 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -113,8 +113,9 @@ util::PQ_Entry_Delay::PQ_Entry_Delay( const util::PQ_Entry_Delay* parent) { this->rr_node = set_rr_node; + auto& device_ctx = g_vpr_ctx.device(); + if (parent != nullptr) { - auto& device_ctx = g_vpr_ctx.device(); float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; float Rsw = device_ctx.rr_switch_inf[switch_ind].R; float Cnode = device_ctx.rr_nodes[size_t(set_rr_node)].C(); @@ -132,6 +133,8 @@ util::PQ_Entry_Delay::PQ_Entry_Delay( } else { this->delay_cost = 0.f; } + + this->delay_cost += device_ctx.rr_switch_inf[switch_ind].penalty_cost; } util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( @@ -140,8 +143,9 @@ util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( const util::PQ_Entry_Base_Cost* parent) { this->rr_node = set_rr_node; + auto& device_ctx = g_vpr_ctx.device(); + if (parent != nullptr) { - auto& device_ctx = g_vpr_ctx.device(); if (device_ctx.rr_switch_inf[switch_ind].configurable()) { this->base_cost = parent->base_cost + get_single_rr_cong_base_cost(size_t(set_rr_node)); } else { @@ -150,6 +154,8 @@ util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( } else { this->base_cost = 0.f; } + + this->base_cost += device_ctx.rr_switch_inf[switch_ind].penalty_cost; } /* returns cost entry with the smallest delay */ From 2b6dba1526a9d7bc207609dfa35731b4c683663a Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Wed, 29 Jan 2020 13:42:22 -0800 Subject: [PATCH 32/41] Fix missing #include Signed-off-by: Henner Zeller --- vpr/src/route/connection_box.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h index 1757c6c726b..37af4cc8320 100644 --- a/vpr/src/route/connection_box.h +++ b/vpr/src/route/connection_box.h @@ -4,11 +4,13 @@ // This class relates IPIN rr nodes with connection box type and locations, used // for connection box driven map lookahead. +#include +#include #include -#include "vtr_strong_id.h" + #include "vtr_flat_map.h" #include "vtr_range.h" -#include +#include "vtr_strong_id.h" struct connection_box_tag {}; typedef vtr::StrongId ConnectionBoxId; From 295c6041080a4c3687300b477b54480bae2e1698 Mon Sep 17 00:00:00 2001 From: Dusty DeWeese Date: Wed, 19 Feb 2020 12:37:46 -0800 Subject: [PATCH 33/41] Remove cached segment map from connection map lookahead file. Signed-off-by: Dusty DeWeese Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- libs/libvtrcapnproto/connection_map.capnp | 2 +- .../route/connection_box_lookahead_map.cpp | 44 +++++++------------ vpr/src/route/connection_box_lookahead_map.h | 1 + 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/libs/libvtrcapnproto/connection_map.capnp b/libs/libvtrcapnproto/connection_map.capnp index 30b9864153b..874b39822dd 100644 --- a/libs/libvtrcapnproto/connection_map.capnp +++ b/libs/libvtrcapnproto/connection_map.capnp @@ -20,6 +20,6 @@ struct VprFloatEntry { struct VprCostMap { costMap @0 :Matrix.Matrix((Matrix.Matrix(VprCostEntry))); offset @1 :Matrix.Matrix(VprVector2D); - segmentMap @2 :List(Int64); + depField @2 :List(Int64); penalty @3 :Matrix.Matrix(VprFloatEntry); } diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 36355996e71..8df10208c27 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -110,17 +110,8 @@ static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Poi } } -// resize internal data structures -void CostMap::set_counts(size_t seg_count, size_t box_count) { - cost_map_.clear(); - offset_.clear(); - penalty_.clear(); - cost_map_.resize({seg_count, box_count}); - offset_.resize({seg_count, box_count}); - penalty_.resize({seg_count, box_count}); - seg_count_ = seg_count; - box_count_ = box_count; - +// build the segment map +void CostMap::build_segment_map() { const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); for (size_t i = 0; i < segment_map_.size(); ++i) { @@ -133,6 +124,18 @@ void CostMap::set_counts(size_t seg_count, size_t box_count) { } } +// resize internal data structures +void CostMap::set_counts(size_t seg_count, size_t box_count) { + cost_map_.clear(); + offset_.clear(); + penalty_.clear(); + cost_map_.resize({seg_count, box_count}); + offset_.resize({seg_count, box_count}); + penalty_.resize({seg_count, box_count}); + seg_count_ = seg_count; + box_count_ = box_count; +} + // cached node -> segment map int CostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; @@ -638,6 +641,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen auto& device_ctx = g_vpr_ctx.device(); cost_map_.set_counts(segment_inf.size(), device_ctx.connection_boxes.num_connection_box_types()); + cost_map_.build_segment_map(); VTR_ASSERT(REPRESENTATIVE_ENTRY_METHOD == util::SMALLEST); RoutingCosts all_delay_costs; @@ -1053,22 +1057,13 @@ static void FromFloat(VprFloatEntry::Builder* out, const float& in) { } void CostMap::read(const std::string& file) { + build_segment_map(); MmapFile f(file); ::capnp::ReaderOptions opts = default_large_capnp_opts(); ::capnp::FlatArrayMessageReader reader(f.getData(), opts); auto cost_map = reader.getRoot(); - - { - const auto& segment_map = cost_map.getSegmentMap(); - segment_map_.resize(segment_map.size()); - auto dst_iter = segment_map_.begin(); - for (const auto& src : segment_map) { - *dst_iter++ = src; - } - } - { const auto& offset = cost_map.getOffset(); ToNdMatrix<2, VprVector2D, std::pair>( @@ -1093,13 +1088,6 @@ void CostMap::write(const std::string& file) const { auto cost_map = builder.initRoot(); - { - auto segment_map = cost_map.initSegmentMap(segment_map_.size()); - for (size_t i = 0; i < segment_map_.size(); ++i) { - segment_map.set(i, segment_map_[i]); - } - } - { auto offset = cost_map.initOffset(); FromNdMatrix<2, VprVector2D, std::pair>( diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 70423409f7a..90a7ab3276f 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -50,6 +50,7 @@ typedef std::unordered_map RoutingCos class CostMap { public: void set_counts(size_t seg_count, size_t box_count); + void build_segment_map(); int node_to_segment(int from_node_ind) const; util::Cost_Entry find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const; void set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs); From 2127254450d7fa8e9d2daf46028c9bddf850af8e Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 21 Feb 2020 15:31:37 -0800 Subject: [PATCH 34/41] Refactor lookahead computation. Before on the A200T: > Computing connection box lookahead map took 31769.29 seconds (max_rss 45671.6 MiB, delta_rss +14486.3 MiB) After on the A200T: > Computing connection box lookahead map took 16791.79 seconds (max_rss 40113.6 MiB, delta_rss +8895.9 MiB) Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- .../route/connection_box_lookahead_map.cpp | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 8df10208c27..3f765668c16 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -86,6 +86,8 @@ struct SampleRegion { template static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, RoutingCosts* routing_costs); static std::vector find_sample_regions(int num_segments); @@ -495,7 +497,7 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, template static bool add_paths(int start_node_ind, Entry current, - std::unordered_map* paths, + const std::vector& paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); ConnectionBoxId box_id; @@ -511,7 +513,8 @@ static bool add_paths(int start_node_ind, // reconstruct the path std::vector path; - for (int i = (*paths)[node_ind].parent; i != start_node_ind; i = (*paths)[i].parent) { + for (int i = paths[node_ind].parent; i != start_node_ind; i = paths[i].parent) { + VTR_ASSERT(i != -1); path.push_back(i); } path.push_back(start_node_ind); @@ -538,7 +541,7 @@ static bool add_paths(int start_node_ind, if (*it != start_node_ind) { auto& parent_node = device_ctx.rr_nodes[parent]; - start_to_here = Entry(*it, parent_node.edge_switch((*paths)[*it].edge), &start_to_here); + start_to_here = Entry(*it, parent_node.edge_switch(paths[*it].edge), &start_to_here); parent = *it; } @@ -573,6 +576,8 @@ static bool add_paths(int start_node_ind, * the number of paths from start_node_ind stored. */ template static std::pair run_dijkstra(int start_node_ind, + std::vector* node_expanded, + std::vector* paths, RoutingCosts* routing_costs) { auto& device_ctx = g_vpr_ctx.device(); int path_count = 0; @@ -584,12 +589,12 @@ static std::pair run_dijkstra(int start_node_ind, /* a list of boolean flags (one for each rr node) to figure out if a * certain node has already been expanded */ - std::vector node_expanded(device_ctx.rr_nodes.size(), false); + std::fill(node_expanded->begin(), node_expanded->end(), false); /* For each node keep a list of the cost with which that node has been * visited (used to determine whether to push a candidate node onto the * expansion queue. * Also store the parent node so we can reconstruct a specific path. */ - std::unordered_map paths; + std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), -1, -1}); /* a priority queue for expansion */ std::priority_queue, std::greater> pq; @@ -607,7 +612,7 @@ static std::pair run_dijkstra(int start_node_ind, int node_ind = current.rr_node_ind; /* check that we haven't already expanded from this node */ - if (node_expanded[node_ind]) { + if ((*node_expanded)[node_ind]) { continue; } @@ -617,11 +622,11 @@ static std::pair run_dijkstra(int start_node_ind, max_cost = current.cost(); path_count++; - add_paths(start_node_ind, current, &paths, routing_costs); + add_paths(start_node_ind, current, *paths, routing_costs); } else { util::expand_dijkstra_neighbours(device_ctx.rr_nodes, - current, paths, node_expanded, pq); - node_expanded[node_ind] = true; + current, paths, node_expanded, &pq); + (*node_expanded)[node_ind] = true; } } return std::make_pair(max_cost, path_count); @@ -658,6 +663,8 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen RoutingCosts delay_costs; RoutingCosts base_costs; int total_path_count = 0; + std::vector node_expanded(device_ctx.rr_nodes.size()); + std::vector paths(device_ctx.rr_nodes.size()); for (auto& point : region.points) { // statistics @@ -667,12 +674,12 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen int path_count = 0; for (auto node_ind : point.nodes) { { - auto result = run_dijkstra(node_ind, &delay_costs); + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &delay_costs); max_delay_cost = std::max(max_delay_cost, result.first); path_count += result.second; } { - auto result = run_dijkstra(node_ind, &base_costs); + auto result = run_dijkstra(node_ind, &node_expanded, &paths, &base_costs); max_base_cost = std::max(max_base_cost, result.first); path_count += result.second; } From 87b0e930fd931a2f8970bc56d01b147b8d13201d Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Fri, 7 Feb 2020 17:07:56 -0800 Subject: [PATCH 35/41] Skip empty windows. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box_lookahead_map.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp index 3f765668c16..6ad88fb2c74 100644 --- a/vpr/src/route/connection_box_lookahead_map.cpp +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -940,6 +940,8 @@ static std::vector find_sample_regions(int num_segments) { for (int y = 0; y < SAMPLE_GRID_SIZE; y++) { for (int x = 0; x < SAMPLE_GRID_SIZE; x++) { vtr::Rect window = sample_window(bounding_box, x, y, SAMPLE_GRID_SIZE); + if (window.empty()) continue; + auto histogram = count_histogram(window, segment_counts[i]); SampleRegion region = { /* .segment_type = */ i, From e2891cb0eaaaec5fb4f79ba06300f7e7767a26bc Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Wed, 1 Jul 2020 14:14:51 -0700 Subject: [PATCH 36/41] Add CONNECTION_BOX_MAP to ShowSetup print. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/ShowSetup.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index fb065b6d071..5ce6263e67a 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -313,6 +313,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { case e_router_lookahead::NO_OP: VTR_LOG("NO_OP\n"); break; + case e_router_lookahead::CONNECTION_BOX_MAP: + VTR_LOG("CONNECTION_BOX_MAP\n"); + break; default: VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown lookahead_type\n"); } From 9b4ab53eb0b26b13ee1278d2b85d8b77fd527d44 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Mon, 14 Sep 2020 13:27:33 +0200 Subject: [PATCH 37/41] connection box: fixes after rebase Signed-off-by: Alessandro Comodi --- vpr/src/base/vpr_types.h | 4 +- .../route/connection_box_lookahead_map.cpp | 183 ++++++++++-------- vpr/src/route/connection_box_lookahead_map.h | 6 +- vpr/src/route/route_timing.h | 2 - .../route/router_lookahead_extended_map.cpp | 2 +- vpr/src/route/router_lookahead_map_utils.cpp | 3 - 6 files changed, 105 insertions(+), 95 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index b17036638de..062dc3625c8 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -113,8 +113,8 @@ enum class e_router_lookahead { CLASSIC, /// location; @@ -68,7 +68,7 @@ struct SamplePoint { std::vector nodes; }; -struct SampleRegion { +struct ConnectionBoxSampleRegion { // all nodes in `points' have this segment type int segment_type; @@ -78,7 +78,7 @@ struct SampleRegion { // locations to try // The computation will keep expanding each of the points // until a number of paths (segment -> connection box) are found. - std::vector points; + std::vector points; // used to sort the regions to improve caching uint64_t order; @@ -90,30 +90,25 @@ static std::pair run_dijkstra(int start_node_ind, std::vector* paths, RoutingCosts* routing_costs); -static std::vector find_sample_regions(int num_segments); +static std::vector find_sample_regions(int num_segments); // also known as the L1 norm static int manhattan_distance(const vtr::Point& a, const vtr::Point& b) { return abs(b.x() - a.x()) + abs(b.y() - a.y()); } -template -constexpr const T& clamp(const T& v, const T& lo, const T& hi) { - return std::min(std::max(v, lo), hi); -} - template static vtr::Point closest_point_in_rect(const vtr::Rect& r, const vtr::Point& p) { if (r.empty()) { return vtr::Point(0, 0); } else { - return vtr::Point(clamp(p.x(), r.xmin(), r.xmax() - 1), - clamp(p.y(), r.ymin(), r.ymax() - 1)); + return vtr::Point(vtr::clamp(p.x(), r.xmin(), r.xmax() - 1), + vtr::clamp(p.y(), r.ymin(), r.ymax() - 1)); } } // build the segment map -void CostMap::build_segment_map() { +void ConnectionBoxCostMap::build_segment_map() { const auto& device_ctx = g_vpr_ctx.device(); segment_map_.resize(device_ctx.rr_nodes.size()); for (size_t i = 0; i < segment_map_.size(); ++i) { @@ -127,7 +122,7 @@ void CostMap::build_segment_map() { } // resize internal data structures -void CostMap::set_counts(size_t seg_count, size_t box_count) { +void ConnectionBoxCostMap::set_counts(size_t seg_count, size_t box_count) { cost_map_.clear(); offset_.clear(); penalty_.clear(); @@ -139,7 +134,7 @@ void CostMap::set_counts(size_t seg_count, size_t box_count) { } // cached node -> segment map -int CostMap::node_to_segment(int from_node_ind) const { +int ConnectionBoxCostMap::node_to_segment(int from_node_ind) const { return segment_map_[from_node_ind]; } @@ -150,7 +145,7 @@ static util::Cost_Entry penalize(const util::Cost_Entry& entry, int distance, fl } // get a cost entry for a segment type, connection box type, and offset -util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { +util::Cost_Entry ConnectionBoxCostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, int delta_x, int delta_y) const { VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); const auto& cost_map = cost_map_[from_seg_index][size_t(box_id)]; if (cost_map.dim_size(0) == 0 || cost_map.dim_size(1) == 0) { @@ -168,7 +163,7 @@ util::Cost_Entry CostMap::find_cost(int from_seg_index, ConnectionBoxId box_id, } // set the cost map for a segment type and connection box type, filling holes -void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { +void ConnectionBoxCostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& base_costs) { // calculate the bounding boxes vtr::Matrix> bounds({seg_count_, box_count_}); for (const auto& entry : delay_costs) { @@ -298,7 +293,7 @@ void CostMap::set_cost_map(const RoutingCosts& delay_costs, const RoutingCosts& // o => above average // . => at or below average // * => invalid (missing) -void CostMap::print(int iseg) const { +void ConnectionBoxCostMap::print(int iseg) const { const auto& device_ctx = g_vpr_ctx.device(); for (size_t box_id = 0; box_id < device_ctx.connection_boxes.num_connection_box_types(); @@ -340,7 +335,7 @@ void CostMap::print(int iseg) const { } // list segment type and connection box type pairs that have empty cost maps (debug) -std::vector> CostMap::list_empty() const { +std::vector> ConnectionBoxCostMap::list_empty() const { std::vector> results; for (int iseg = 0; iseg < (int)cost_map_.dim_size(0); iseg++) { for (int box_id = 0; box_id < (int)cost_map_.dim_size(1); box_id++) { @@ -361,7 +356,7 @@ static void assign_min_entry(util::Cost_Entry* dst, const util::Cost_Entry& src) } // find the minimum cost entry from the nearest manhattan distance neighbor -std::pair CostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, +std::pair ConnectionBoxCostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, int cx, int cy, const vtr::Rect& bounds) { @@ -403,65 +398,73 @@ std::pair CostMap::get_nearby_cost_entry(const vtr::NdMat } // derive a cost from the map between two nodes -float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, - int to_node_ind, - float criticality_fac) const { - if (from_node_ind == to_node_ind) { - return 0.f; +std::pair ConnectionBoxMapLookahead::get_expected_delay_and_cong(int inode, int target_node, const t_conn_cost_params& params, float R_upstream) const { + if (inode == target_node) { + return std::make_pair(0., 0.); } + RRNodeId from_node(inode); + RRNodeId to_node(target_node); + auto& device_ctx = g_vpr_ctx.device(); - auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); + auto to_node_type = device_ctx.rr_nodes.node_type(to_node); if (to_node_type == SINK) { - const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); + const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(target_node); float cost = std::numeric_limits::infinity(); - + float delay = std::numeric_limits::infinity(); + float cong = std::numeric_limits::infinity(); // Find cheapest cost from from_node_ind to IPINs for this SINK. for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { - cost = std::min(cost, - get_map_cost( - from_node_ind, - sink_to_ipin.ipin_nodes[i], criticality_fac)); + float new_cost, new_delay, new_cong; + std::tie(new_delay, new_cong) = get_expected_delay_and_cong(inode, sink_to_ipin.ipin_nodes[i], params, R_upstream); + + new_cost = new_delay + new_cong; + if (new_cost < cost) { + delay = new_delay; + cong = new_cong; + cost = new_cost; + } + if (cost <= 0.f) break; } - return cost; + return std::pair(delay, cong); } - if (device_ctx.rr_nodes[to_node_ind].type() != IPIN) { - VPR_THROW(VPR_ERROR_ROUTE, "Not an IPIN/SINK, is %d", to_node_ind); + if (to_node_type != IPIN) { + VPR_THROW(VPR_ERROR_ROUTE, "Not an IPIN/SINK, is %d", target_node); } ConnectionBoxId box_id; std::pair box_location; float site_pin_delay; bool found = device_ctx.connection_boxes.find_connection_box( - to_node_ind, &box_id, &box_location, &site_pin_delay); + target_node, &box_id, &box_location, &site_pin_delay); if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", target_node); } - const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(inode); if (from_canonical_loc == nullptr) { VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", - from_node_ind, to_node_ind); + inode, target_node); } ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); - int from_seg_index = cost_map_.node_to_segment(from_node_ind); + int from_seg_index = cost_map_.node_to_segment(inode); util::Cost_Entry cost_entry = cost_map_.find_cost(from_seg_index, box_id, dx, dy); if (!cost_entry.valid()) { // there is no route VTR_LOGV_DEBUG(f_router_debug, "Not connected %d (%s, %d) -> %d (%s, %d, (%d, %d))\n", - from_node_ind, device_ctx.rr_nodes[from_node_ind].type_string(), from_seg_index, - to_node_ind, device_ctx.rr_nodes[to_node_ind].type_string(), + inode, device_ctx.rr_nodes[inode].type_string(), from_seg_index, + target_node, device_ctx.rr_nodes[target_node].type_string(), (int)size_t(box_id), (int)box_location.first, (int)box_location.second); - return std::numeric_limits::infinity(); + return std::pair(std::numeric_limits::infinity(), std::numeric_limits::infinity()); } float expected_delay = cost_entry.delay; @@ -469,19 +472,22 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, expected_delay += site_pin_delay; - float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + float expected_delay_cost = params.criticality * expected_delay; + float expected_cong_cost = (1.0 - params.criticality) * expected_congestion; + + float expected_cost = expected_delay_cost + expected_cong_cost; - VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", from_node_ind, to_node_ind); - const std::string& segment_name = device_ctx.segment_inf[from_seg_index].name; + VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", inode, target_node); + const std::string& segment_name = device_ctx.rr_segments[from_seg_index].name; const std::string& box_name = device_ctx.connection_boxes.get_connection_box(box_id)->name; VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) to %s (%zu) with distance (%zd, %zd)\n", segment_name.c_str(), from_seg_index, box_name.c_str(), size_t(box_id), dx, dy); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay); - VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_congestion); - VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", criticality_fac); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay_cost); + VTR_LOGV_DEBUG(f_router_debug, "Lookahead congestion: %g\n", expected_cong_cost); + VTR_LOGV_DEBUG(f_router_debug, "Criticality: %g\n", params.criticality); VTR_LOGV_DEBUG(f_router_debug, "Lookahead cost: %g\n", expected_cost); VTR_LOGV_DEBUG(f_router_debug, "Site pin delay: %g\n", site_pin_delay); @@ -490,12 +496,12 @@ float ConnectionBoxMapLookahead::get_map_cost(int from_node_ind, VTR_ASSERT(0); } - return expected_cost; + return std::make_pair(expected_delay_cost, expected_cong_cost); } // add a best cost routing path from start_node_ind to node_ind to routing costs template -static bool add_paths(int start_node_ind, +static bool add_paths(RRNodeId start_node, Entry current, const std::vector& paths, RoutingCosts* routing_costs) { @@ -503,28 +509,29 @@ static bool add_paths(int start_node_ind, ConnectionBoxId box_id; std::pair box_location; float site_pin_delay; - int node_ind = current.rr_node_ind; + RRNodeId node = current.rr_node; bool found = device_ctx.connection_boxes.find_connection_box( - node_ind, &box_id, &box_location, &site_pin_delay); + size_t(node), &box_id, &box_location, &site_pin_delay); bool new_sample_found = false; if (!found) { - VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", size_t(node)); } // reconstruct the path std::vector path; - for (int i = paths[node_ind].parent; i != start_node_ind; i = paths[i].parent) { - VTR_ASSERT(i != -1); + for (size_t i = paths[size_t(node)].parent; i != size_t(start_node); i = paths[i].parent) { + VTR_ASSERT(i != std::numeric_limits::max()); path.push_back(i); } - path.push_back(start_node_ind); + path.push_back(size_t(start_node)); current.adjust_Tsw(-site_pin_delay); // add each node along the path subtracting the incremental costs from the current costs - Entry start_to_here(start_node_ind, UNDEFINED, nullptr); - int parent = start_node_ind; + Entry start_to_here(start_node, UNDEFINED, nullptr); + RRNodeId parent = start_node; for (auto it = path.rbegin(); it != path.rend(); it++) { + RRNodeId this_node(*it); auto& here = device_ctx.rr_nodes[*it]; int seg_index = device_ctx.rr_indexed_data[here.cost_index()].seg_index; const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(*it); @@ -539,10 +546,10 @@ static bool add_paths(int start_node_ind, box_id, delta}; - if (*it != start_node_ind) { - auto& parent_node = device_ctx.rr_nodes[parent]; - start_to_here = Entry(*it, parent_node.edge_switch(paths[*it].edge), &start_to_here); - parent = *it; + if (this_node != start_node) { + auto& parent_node = device_ctx.rr_nodes[size_t(parent)]; + start_to_here = Entry(this_node, parent_node.edge_switch(paths[*it].edge), &start_to_here); + parent = this_node; } float cost = current.cost() - start_to_here.cost(); @@ -594,12 +601,15 @@ static std::pair run_dijkstra(int start_node_ind, * visited (used to determine whether to push a candidate node onto the * expansion queue. * Also store the parent node so we can reconstruct a specific path. */ - std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), -1, -1}); + std::fill(paths->begin(), paths->end(), util::Search_Path{std::numeric_limits::infinity(), std::numeric_limits::max(), -1}); + /* a priority queue for expansion */ std::priority_queue, std::greater> pq; + RRNodeId start_node(start_node_ind); + /* first entry has no upstream delay or congestion */ - Entry first_entry(start_node_ind, UNDEFINED, nullptr); + Entry first_entry(start_node, UNDEFINED, nullptr); float max_cost = first_entry.cost(); pq.push(first_entry); @@ -609,24 +619,24 @@ static std::pair run_dijkstra(int start_node_ind, auto current = pq.top(); pq.pop(); - int node_ind = current.rr_node_ind; + RRNodeId node = current.rr_node; /* check that we haven't already expanded from this node */ - if ((*node_expanded)[node_ind]) { + if ((*node_expanded)[size_t(node)]) { continue; } /* if this node is an ipin record its congestion/delay in the routing_cost_map */ - if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + if (device_ctx.rr_nodes.node_type(node) == IPIN) { // the last cost should be the highest max_cost = current.cost(); path_count++; - add_paths(start_node_ind, current, *paths, routing_costs); + add_paths(start_node, current, *paths, routing_costs); } else { util::expand_dijkstra_neighbours(device_ctx.rr_nodes, current, paths, node_expanded, &pq); - (*node_expanded)[node_ind] = true; + (*node_expanded)[size_t(node)] = true; } } return std::make_pair(max_cost, path_count); @@ -640,7 +650,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen alloc_and_load_rr_node_route_structs(); size_t num_segments = segment_inf.size(); - std::vector sample_regions = find_sample_regions(num_segments); + std::vector sample_regions = find_sample_regions(num_segments); /* free previous delay map and allocate new one */ auto& device_ctx = g_vpr_ctx.device(); @@ -655,7 +665,7 @@ void ConnectionBoxMapLookahead::compute(const std::vector& segmen /* run Dijkstra's algorithm for each segment type & channel type combination */ #if defined(VPR_USE_TBB) tbb::mutex all_costs_mutex; - tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { + tbb::parallel_for_each(sample_regions, [&](const ConnectionBoxSampleRegion& region) { #else for (const auto& region : sample_regions) { #endif @@ -782,14 +792,17 @@ float ConnectionBoxMapLookahead::get_expected_cost( int current_node, int target_node, const t_conn_cost_params& params, - float /*R_upstream*/) const { + float R_upstream) const { auto& device_ctx = g_vpr_ctx.device(); t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); if (rr_type == CHANX || rr_type == CHANY) { - return get_map_cost( - current_node, target_node, params.criticality); + float delay_cost, cong_cost; + + // Get the total cost using the combined delay and congestion costs + std::tie(delay_cost, cong_cost) = get_expected_delay_and_cong(current_node, target_node, params, R_upstream); + return delay_cost + cong_cost; } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); } else { /* Change this if you want to investigate route-throughs */ @@ -812,16 +825,16 @@ static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, sample(bounding_box, sx + 1, sy + 1, n)); } -static std::vector choose_points(const vtr::Matrix& counts, +static std::vector choose_points(const vtr::Matrix& counts, const vtr::Rect& window, int min_count, int max_count) { VTR_ASSERT(min_count <= max_count); - std::vector points; + std::vector points; for (int y = window.ymin(); y < window.ymax(); y++) { for (int x = window.xmin(); x < window.xmax(); x++) { if (counts[x][y] >= min_count && counts[x][y] <= max_count) { - points.push_back(SamplePoint{/* .location = */ vtr::Point(x, y), + points.push_back(ConnectionBoxSamplePoint{/* .location = */ vtr::Point(x, y), /* .nodes = */ {}}); } } @@ -831,7 +844,7 @@ static std::vector choose_points(const vtr::Matrix& counts, // sort by distance from center std::sort(points.begin(), points.end(), - [&](const SamplePoint& a, const SamplePoint& b) { + [&](const ConnectionBoxSamplePoint& a, const ConnectionBoxSamplePoint& b) { return manhattan_distance(a.location, center) < manhattan_distance(b.location, center); }); @@ -890,9 +903,9 @@ static uint64_t interleave(uint32_t x) { // for each segment type, find the nearest nodes to an equally spaced grid of points // within the bounding box for that segment type -static std::vector find_sample_regions(int num_segments) { +static std::vector find_sample_regions(int num_segments) { vtr::ScopedStartFinishTimer timer("finding sample regions"); - std::vector sample_regions; + std::vector sample_regions; auto& device_ctx = g_vpr_ctx.device(); auto& rr_nodes = device_ctx.rr_nodes; std::vector> segment_counts(num_segments); @@ -943,7 +956,7 @@ static std::vector find_sample_regions(int num_segments) { if (window.empty()) continue; auto histogram = count_histogram(window, segment_counts[i]); - SampleRegion region = { + ConnectionBoxSampleRegion region = { /* .segment_type = */ i, /* .grid_location = */ vtr::Point(x, y), /* .points = */ choose_points(counts, window, quantile(histogram, kSamplingCountLowerQuantile), quantile(histogram, kSamplingCountUpperQuantile)), @@ -970,12 +983,12 @@ static std::vector find_sample_regions(int num_segments) { // sort regions std::sort(sample_regions.begin(), sample_regions.end(), - [](const SampleRegion& a, const SampleRegion& b) { + [](const ConnectionBoxSampleRegion& a, const ConnectionBoxSampleRegion& b) { return a.order < b.order; }); // build an index of sample points on segment type and location - std::map, SamplePoint*> sample_point_index; + std::map, ConnectionBoxSamplePoint*> sample_point_index; for (auto& region : sample_regions) { for (auto& point : region.points) { sample_point_index[std::make_tuple(region.segment_type, point.location.x(), point.location.y())] = &point; @@ -1065,7 +1078,7 @@ static void FromFloat(VprFloatEntry::Builder* out, const float& in) { out->setValue(in); } -void CostMap::read(const std::string& file) { +void ConnectionBoxCostMap::read(const std::string& file) { build_segment_map(); MmapFile f(file); @@ -1092,7 +1105,7 @@ void CostMap::read(const std::string& file) { } } -void CostMap::write(const std::string& file) const { +void ConnectionBoxCostMap::write(const std::string& file) const { ::capnp::MallocMessageBuilder builder; auto cost_map = builder.initRoot(); diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h index 90a7ab3276f..3863fab9e36 100644 --- a/vpr/src/route/connection_box_lookahead_map.h +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -47,7 +47,7 @@ struct HashRoutingCostKey { typedef std::unordered_map RoutingCosts; // Dense cost maps per source segment and destination connection box types -class CostMap { +class ConnectionBoxCostMap { public: void set_counts(size_t seg_count, size_t box_count); void build_segment_map(); @@ -73,13 +73,15 @@ class CostMap { class ConnectionBoxMapLookahead : public RouterLookahead { public: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + std::pair get_expected_delay_and_cong(int inode, int target_node, const t_conn_cost_params& params, float R_upstream) const override; + float get_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) const; void compute(const std::vector& segment_inf) override; void read(const std::string& file) override; void write(const std::string& file) const override; - CostMap cost_map_; + ConnectionBoxCostMap cost_map_; }; #endif diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index 98ab811cf46..90756a02e19 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -16,8 +16,6 @@ extern bool f_router_debug; -extern bool f_router_debug; - int get_max_pins_per_net(); bool try_timing_driven_route(const t_router_opts& router_opts, diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp index 6431dfbf375..79a14988627 100644 --- a/vpr/src/route/router_lookahead_extended_map.cpp +++ b/vpr/src/route/router_lookahead_extended_map.cpp @@ -230,7 +230,7 @@ std::pair ExtendedMapLookahead::get_expected_delay_and_cong(int in VTR_LOGV_DEBUG(f_router_debug, "Requested lookahead from node %d to %d\n", size_t(from_node), size_t(to_node)); const std::string& segment_name = device_ctx.rr_segments[from_seg_index].name; - VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) with distance (%zd, %zd)\n", + VTR_LOGV_DEBUG(f_router_debug, "Lookahead returned %s (%d) with distance (%d, %d)\n", segment_name.c_str(), from_seg_index, dx, dy); VTR_LOGV_DEBUG(f_router_debug, "Lookahead delay: %g\n", expected_delay_cost); diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index ff31fe6b0bb..4d5c96fdb99 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -56,9 +56,6 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: // This is used when building the SROURCE/OPIN --> CHAN lookup table that contains additional delay and // congestion data to reach the CHANX/CHANY nodes which is not present in the lookahead cost map. #define DIRECT_CONNECT_SPECIAL_SEG_TYPE -1; -======= -#include "route_common.h" ->>>>>>> Use get_rr_cong_cost to compute base_cost's. namespace util { From 2f863db36d096cb4b1798db008a0192ad7e7854f Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Thu, 18 Jun 2020 11:17:44 -0700 Subject: [PATCH 38/41] Move connection_box to end to preserve order w.r.t. upstream. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- .../gen/rr_graph_uxsdcxx.capnp | 36 +- vpr/src/route/gen/rr_graph_uxsdcxx.h | 755 +++++++++--------- vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h | 144 ++-- .../route/gen/rr_graph_uxsdcxx_interface.h | 78 +- vpr/src/route/rr_graph.xsd | 2 +- 5 files changed, 508 insertions(+), 507 deletions(-) diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 519512f25e9..8a612573a82 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -4,9 +4,9 @@ # # Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd # Input file: rr_graph.xsd -# md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 +# md5sum of input file: bea3923ac9822e94db96d5d9d15e18c6 -@0xa90e1cca7f71265c; +@0xf281a101eeb2a247; using Cxx = import "/capnp/c++.capnp"; $Cxx.namespace("ucap"); @@ -138,18 +138,6 @@ struct BlockTypes { blockTypes @0 :List(BlockType); } -struct ConnectionBoxDeclaration { - id @0 :UInt32; - name @1 :Text; -} - -struct ConnectionBoxes { - numBoxes @0 :UInt32; - xDim @1 :UInt32; - yDim @2 :UInt32; - connectionBoxes @3 :List(ConnectionBoxDeclaration); -} - struct GridLoc { blockTypeId @0 :Int32; heightOffset @1 :Int32; @@ -229,6 +217,18 @@ struct RrEdges { edges @0 :List(Edge); } +struct ConnectionBoxDeclaration { + id @0 :UInt32; + name @1 :Text; +} + +struct ConnectionBoxes { + numBoxes @0 :UInt32; + xDim @1 :UInt32; + yDim @2 :UInt32; + connectionBoxes @3 :List(ConnectionBoxDeclaration); +} + struct RrGraph { toolComment @0 :Text; toolName @1 :Text; @@ -237,8 +237,8 @@ struct RrGraph { switches @4 :Switches; segments @5 :Segments; blockTypes @6 :BlockTypes; - connectionBoxes @7 :ConnectionBoxes; - grid @8 :GridLocs; - rrNodes @9 :RrNodes; - rrEdges @10 :RrEdges; + grid @7 :GridLocs; + rrNodes @8 :RrNodes; + rrEdges @9 :RrEdges; + connectionBoxes @10 :ConnectionBoxes; } diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx.h b/vpr/src/route/gen/rr_graph_uxsdcxx.h index e417e8fc578..a0fbd692544 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 + * md5sum of input file: bea3923ac9822e94db96d5d9d15e18c6 */ #include @@ -80,12 +80,6 @@ inline void load_block_type_required_attributes(const pugi::xml_node& root, int* template inline void load_block_types(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template -inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error); -template -inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); -inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error); -template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error); template @@ -120,6 +114,12 @@ inline void load_edge_required_attributes(const pugi::xml_node& root, unsigned i template inline void load_rr_edges(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error); +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error); +template inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug); /* Declarations for internal write functions for the complex types. */ @@ -142,8 +142,6 @@ inline void write_block_type(T& in, std::ostream& os, const void* data, void* it template inline void write_block_types(T& in, std::ostream& os, const void* data, void* iter); template -inline void write_connection_boxes(T& in, std::ostream& os, const void* data, void* iter); -template inline void write_grid_locs(T& in, std::ostream& os, const void* data, void* iter); template inline void write_meta(T& in, std::ostream& os, const void* data, void* iter); @@ -158,6 +156,8 @@ inline void write_edge(T& in, std::ostream& os, const void* data, void* iter); template inline void write_rr_edges(T& in, std::ostream& os, const void* data, void* iter); template +inline void write_connection_boxes(T& in, std::ostream& os, const void* data, void* iter); +template inline void write_rr_graph(T& in, std::ostream& os, const void* data, void* iter); /* Load function for the root element. */ @@ -301,17 +301,6 @@ constexpr const char* atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types { BLOCK_TYPE }; constexpr const char* gtok_lookup_t_block_types[] = {"block_type"}; -enum class atok_t_connection_box_declaration { ID, - NAME }; -constexpr const char* atok_lookup_t_connection_box_declaration[] = {"id", "name"}; - -enum class gtok_t_connection_boxes { CONNECTION_BOX }; -constexpr const char* gtok_lookup_t_connection_boxes[] = {"connection_box"}; -enum class atok_t_connection_boxes { NUM_BOXES, - X_DIM, - Y_DIM }; -constexpr const char* atok_lookup_t_connection_boxes[] = {"num_boxes", "x_dim", "y_dim"}; - enum class atok_t_grid_loc { BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, @@ -377,15 +366,27 @@ constexpr const char* atok_lookup_t_edge[] = {"sink_node", "src_node", "switch_i enum class gtok_t_rr_edges { EDGE }; constexpr const char* gtok_lookup_t_rr_edges[] = {"edge"}; + +enum class atok_t_connection_box_declaration { ID, + NAME }; +constexpr const char* atok_lookup_t_connection_box_declaration[] = {"id", "name"}; + +enum class gtok_t_connection_boxes { CONNECTION_BOX }; +constexpr const char* gtok_lookup_t_connection_boxes[] = {"connection_box"}; +enum class atok_t_connection_boxes { NUM_BOXES, + X_DIM, + Y_DIM }; +constexpr const char* atok_lookup_t_connection_boxes[] = {"num_boxes", "x_dim", "y_dim"}; + enum class gtok_t_rr_graph { CHANNELS, SWITCHES, SEGMENTS, BLOCK_TYPES, - CONNECTION_BOXES, GRID, RR_NODES, - RR_EDGES }; -constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "connection_boxes", "grid", "rr_nodes", "rr_edges"}; + RR_EDGES, + CONNECTION_BOXES }; +constexpr const char* gtok_lookup_t_rr_graph[] = {"channels", "switches", "segments", "block_types", "grid", "rr_nodes", "rr_edges", "connection_boxes"}; enum class atok_t_rr_graph { TOOL_COMMENT, TOOL_NAME, TOOL_VERSION }; @@ -1198,122 +1199,6 @@ inline gtok_t_block_types lex_node_t_block_types(const char* in, const std::func noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); } -inline atok_t_connection_box_declaration lex_attr_t_connection_box_declaration(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 2: - switch (in[0]) { - case onechar('i', 0, 8): - switch (in[1]) { - case onechar('d', 0, 8): - return atok_t_connection_box_declaration::ID; - break; - default: - break; - } - break; - default: - break; - } - break; - case 4: - switch (*((triehash_uu32*)&in[0])) { - case onechar('n', 0, 32) | onechar('a', 8, 32) | onechar('m', 16, 32) | onechar('e', 24, 32): - return atok_t_connection_box_declaration::NAME; - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - -inline gtok_t_connection_boxes lex_node_t_connection_boxes(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 14: - switch (*((triehash_uu64*)&in[0])) { - case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): - switch (*((triehash_uu32*)&in[8])) { - case onechar('o', 0, 32) | onechar('n', 8, 32) | onechar('_', 16, 32) | onechar('b', 24, 32): - switch (in[12]) { - case onechar('o', 0, 8): - switch (in[13]) { - case onechar('x', 0, 8): - return gtok_t_connection_boxes::CONNECTION_BOX; - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); -} -inline atok_t_connection_boxes lex_attr_t_connection_boxes(const char* in, const std::function* report_error) { - unsigned int len = strlen(in); - switch (len) { - case 5: - switch (*((triehash_uu32*)&in[0])) { - case onechar('x', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): - switch (in[4]) { - case onechar('m', 0, 8): - return atok_t_connection_boxes::X_DIM; - break; - default: - break; - } - break; - case onechar('y', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): - switch (in[4]) { - case onechar('m', 0, 8): - return atok_t_connection_boxes::Y_DIM; - break; - default: - break; - } - break; - default: - break; - } - break; - case 9: - switch (*((triehash_uu64*)&in[0])) { - case onechar('n', 0, 64) | onechar('u', 8, 64) | onechar('m', 16, 64) | onechar('_', 24, 64) | onechar('b', 32, 64) | onechar('o', 40, 64) | onechar('x', 48, 64) | onechar('e', 56, 64): - switch (in[8]) { - case onechar('s', 0, 8): - return atok_t_connection_boxes::NUM_BOXES; - break; - default: - break; - } - break; - default: - break; - } - break; - default: - break; - } - noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); -} - inline atok_t_grid_loc lex_attr_t_grid_loc(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { @@ -1930,49 +1815,52 @@ inline gtok_t_rr_edges lex_node_t_rr_edges(const char* in, const std::function.").c_str()); } -inline gtok_t_rr_graph lex_node_t_rr_graph(const char* in, const std::function* report_error) { +inline atok_t_connection_box_declaration lex_attr_t_connection_box_declaration(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { - case 4: - switch (*((triehash_uu32*)&in[0])) { - case onechar('g', 0, 32) | onechar('r', 8, 32) | onechar('i', 16, 32) | onechar('d', 24, 32): - return gtok_t_rr_graph::GRID; + case 2: + switch (in[0]) { + case onechar('i', 0, 8): + switch (in[1]) { + case onechar('d', 0, 8): + return atok_t_connection_box_declaration::ID; + break; + default: + break; + } break; default: break; } break; - case 8: - switch (*((triehash_uu64*)&in[0])) { - case onechar('c', 0, 64) | onechar('h', 8, 64) | onechar('a', 16, 64) | onechar('n', 24, 64) | onechar('n', 32, 64) | onechar('e', 40, 64) | onechar('l', 48, 64) | onechar('s', 56, 64): - return gtok_t_rr_graph::CHANNELS; - break; - case onechar('r', 0, 64) | onechar('r', 8, 64) | onechar('_', 16, 64) | onechar('e', 24, 64) | onechar('d', 32, 64) | onechar('g', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): - return gtok_t_rr_graph::RR_EDGES; - break; - case onechar('r', 0, 64) | onechar('r', 8, 64) | onechar('_', 16, 64) | onechar('n', 24, 64) | onechar('o', 32, 64) | onechar('d', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): - return gtok_t_rr_graph::RR_NODES; - break; - case onechar('s', 0, 64) | onechar('e', 8, 64) | onechar('g', 16, 64) | onechar('m', 24, 64) | onechar('e', 32, 64) | onechar('n', 40, 64) | onechar('t', 48, 64) | onechar('s', 56, 64): - return gtok_t_rr_graph::SEGMENTS; - break; - case onechar('s', 0, 64) | onechar('w', 8, 64) | onechar('i', 16, 64) | onechar('t', 24, 64) | onechar('c', 32, 64) | onechar('h', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): - return gtok_t_rr_graph::SWITCHES; + case 4: + switch (*((triehash_uu32*)&in[0])) { + case onechar('n', 0, 32) | onechar('a', 8, 32) | onechar('m', 16, 32) | onechar('e', 24, 32): + return atok_t_connection_box_declaration::NAME; break; default: break; } break; - case 11: + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline gtok_t_connection_boxes lex_node_t_connection_boxes(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 14: switch (*((triehash_uu64*)&in[0])) { - case onechar('b', 0, 64) | onechar('l', 8, 64) | onechar('o', 16, 64) | onechar('c', 24, 64) | onechar('k', 32, 64) | onechar('_', 40, 64) | onechar('t', 48, 64) | onechar('y', 56, 64): - switch (in[8]) { - case onechar('p', 0, 8): - switch (in[9]) { - case onechar('e', 0, 8): - switch (in[10]) { - case onechar('s', 0, 8): - return gtok_t_rr_graph::BLOCK_TYPES; + case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): + switch (*((triehash_uu32*)&in[8])) { + case onechar('o', 0, 32) | onechar('n', 8, 32) | onechar('_', 16, 32) | onechar('b', 24, 32): + switch (in[12]) { + case onechar('o', 0, 8): + switch (in[13]) { + case onechar('x', 0, 8): + return gtok_t_connection_boxes::CONNECTION_BOX; break; default: break; @@ -1990,47 +1878,160 @@ inline gtok_t_rr_graph lex_node_t_rr_graph(const char* in, const std::function.").c_str()); + noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); } -inline atok_t_rr_graph lex_attr_t_rr_graph(const char* in, const std::function* report_error) { +inline atok_t_connection_boxes lex_attr_t_connection_boxes(const char* in, const std::function* report_error) { unsigned int len = strlen(in); switch (len) { - case 9: - switch (*((triehash_uu64*)&in[0])) { - case onechar('t', 0, 64) | onechar('o', 8, 64) | onechar('o', 16, 64) | onechar('l', 24, 64) | onechar('_', 32, 64) | onechar('n', 40, 64) | onechar('a', 48, 64) | onechar('m', 56, 64): - switch (in[8]) { - case onechar('e', 0, 8): - return atok_t_rr_graph::TOOL_NAME; + case 5: + switch (*((triehash_uu32*)&in[0])) { + case onechar('x', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::X_DIM; break; default: break; } break; - default: - break; - } - break; - case 12: - switch (*((triehash_uu64*)&in[0])) { - case onechar('t', 0, 64) | onechar('o', 8, 64) | onechar('o', 16, 64) | onechar('l', 24, 64) | onechar('_', 32, 64) | onechar('c', 40, 64) | onechar('o', 48, 64) | onechar('m', 56, 64): + case onechar('y', 0, 32) | onechar('_', 8, 32) | onechar('d', 16, 32) | onechar('i', 24, 32): + switch (in[4]) { + case onechar('m', 0, 8): + return atok_t_connection_boxes::Y_DIM; + break; + default: + break; + } + break; + default: + break; + } + break; + case 9: + switch (*((triehash_uu64*)&in[0])) { + case onechar('n', 0, 64) | onechar('u', 8, 64) | onechar('m', 16, 64) | onechar('_', 24, 64) | onechar('b', 32, 64) | onechar('o', 40, 64) | onechar('x', 48, 64) | onechar('e', 56, 64): + switch (in[8]) { + case onechar('s', 0, 8): + return atok_t_connection_boxes::NUM_BOXES; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized attribute " + std::string(in) + " of .").c_str()); +} + +inline gtok_t_rr_graph lex_node_t_rr_graph(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 4: + switch (*((triehash_uu32*)&in[0])) { + case onechar('g', 0, 32) | onechar('r', 8, 32) | onechar('i', 16, 32) | onechar('d', 24, 32): + return gtok_t_rr_graph::GRID; + break; + default: + break; + } + break; + case 8: + switch (*((triehash_uu64*)&in[0])) { + case onechar('c', 0, 64) | onechar('h', 8, 64) | onechar('a', 16, 64) | onechar('n', 24, 64) | onechar('n', 32, 64) | onechar('e', 40, 64) | onechar('l', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::CHANNELS; + break; + case onechar('r', 0, 64) | onechar('r', 8, 64) | onechar('_', 16, 64) | onechar('e', 24, 64) | onechar('d', 32, 64) | onechar('g', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::RR_EDGES; + break; + case onechar('r', 0, 64) | onechar('r', 8, 64) | onechar('_', 16, 64) | onechar('n', 24, 64) | onechar('o', 32, 64) | onechar('d', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::RR_NODES; + break; + case onechar('s', 0, 64) | onechar('e', 8, 64) | onechar('g', 16, 64) | onechar('m', 24, 64) | onechar('e', 32, 64) | onechar('n', 40, 64) | onechar('t', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::SEGMENTS; + break; + case onechar('s', 0, 64) | onechar('w', 8, 64) | onechar('i', 16, 64) | onechar('t', 24, 64) | onechar('c', 32, 64) | onechar('h', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::SWITCHES; + break; + default: + break; + } + break; + case 11: + switch (*((triehash_uu64*)&in[0])) { + case onechar('b', 0, 64) | onechar('l', 8, 64) | onechar('o', 16, 64) | onechar('c', 24, 64) | onechar('k', 32, 64) | onechar('_', 40, 64) | onechar('t', 48, 64) | onechar('y', 56, 64): + switch (in[8]) { + case onechar('p', 0, 8): + switch (in[9]) { + case onechar('e', 0, 8): + switch (in[10]) { + case onechar('s', 0, 8): + return gtok_t_rr_graph::BLOCK_TYPES; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + break; + case 16: + switch (*((triehash_uu64*)&in[0])) { + case onechar('c', 0, 64) | onechar('o', 8, 64) | onechar('n', 16, 64) | onechar('n', 24, 64) | onechar('e', 32, 64) | onechar('c', 40, 64) | onechar('t', 48, 64) | onechar('i', 56, 64): + switch (*((triehash_uu64*)&in[8])) { + case onechar('o', 0, 64) | onechar('n', 8, 64) | onechar('_', 16, 64) | onechar('b', 24, 64) | onechar('o', 32, 64) | onechar('x', 40, 64) | onechar('e', 48, 64) | onechar('s', 56, 64): + return gtok_t_rr_graph::CONNECTION_BOXES; + break; + default: + break; + } + break; + default: + break; + } + break; + default: + break; + } + noreturn_report(report_error, ("Found unrecognized child " + std::string(in) + " of .").c_str()); +} +inline atok_t_rr_graph lex_attr_t_rr_graph(const char* in, const std::function* report_error) { + unsigned int len = strlen(in); + switch (len) { + case 9: + switch (*((triehash_uu64*)&in[0])) { + case onechar('t', 0, 64) | onechar('o', 8, 64) | onechar('o', 16, 64) | onechar('l', 24, 64) | onechar('_', 32, 64) | onechar('n', 40, 64) | onechar('a', 48, 64) | onechar('m', 56, 64): + switch (in[8]) { + case onechar('e', 0, 8): + return atok_t_rr_graph::TOOL_NAME; + break; + default: + break; + } + break; + default: + break; + } + break; + case 12: + switch (*((triehash_uu64*)&in[0])) { + case onechar('t', 0, 64) | onechar('o', 8, 64) | onechar('o', 16, 64) | onechar('l', 24, 64) | onechar('_', 32, 64) | onechar('c', 40, 64) | onechar('o', 48, 64) | onechar('m', 56, 64): switch (*((triehash_uu32*)&in[8])) { case onechar('m', 0, 32) | onechar('e', 8, 32) | onechar('n', 16, 32) | onechar('t', 24, 32): return atok_t_rr_graph::TOOL_COMMENT; @@ -2696,55 +2697,6 @@ inline void load_block_type_required_attributes(const pugi::xml_node& root, int* if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_block_type, report_error); } -inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error) { - std::bitset<2> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_connection_box_declaration::ID: - *id = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_box_declaration::NAME: - /* Attribute name set after element init */ - break; - default: - break; /* Not possible. */ - } - } - std::bitset<2> test_astate = astate | std::bitset<2>(0b00); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_declaration, report_error); -} - -inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error) { - std::bitset<3> astate = 0; - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_boxes in = lex_attr_t_connection_boxes(attr.name(), report_error); - if (astate[(int)in] == 0) - astate[(int)in] = 1; - else - noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); - switch (in) { - case atok_t_connection_boxes::NUM_BOXES: - *num_boxes = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_boxes::X_DIM: - *x_dim = load_unsigned_int(attr.value(), report_error); - break; - case atok_t_connection_boxes::Y_DIM: - *y_dim = load_unsigned_int(attr.value(), report_error); - break; - default: - break; /* Not possible. */ - } - } - std::bitset<3> test_astate = astate | std::bitset<3>(0b000); - if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_boxes, report_error); -} - inline void load_grid_loc_required_attributes(const pugi::xml_node& root, int* block_type_id, int* height_offset, int* width_offset, int* x, int* y, const std::function* report_error) { std::bitset<5> astate = 0; for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { @@ -2961,6 +2913,55 @@ inline void load_edge_required_attributes(const pugi::xml_node& root, unsigned i std::bitset<3> test_astate = astate | std::bitset<3>(0b000); if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_edge, report_error); } + +inline void load_connection_box_declaration_required_attributes(const pugi::xml_node& root, unsigned int* id, const std::function* report_error) { + std::bitset<2> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_box_declaration::ID: + *id = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_box_declaration::NAME: + /* Attribute name set after element init */ + break; + default: + break; /* Not possible. */ + } + } + std::bitset<2> test_astate = astate | std::bitset<2>(0b00); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_box_declaration, report_error); +} + +inline void load_connection_boxes_required_attributes(const pugi::xml_node& root, unsigned int* num_boxes, unsigned int* x_dim, unsigned int* y_dim, const std::function* report_error) { + std::bitset<3> astate = 0; + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_boxes in = lex_attr_t_connection_boxes(attr.name(), report_error); + if (astate[(int)in] == 0) + astate[(int)in] = 1; + else + noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); + switch (in) { + case atok_t_connection_boxes::NUM_BOXES: + *num_boxes = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::X_DIM: + *x_dim = load_unsigned_int(attr.value(), report_error); + break; + case atok_t_connection_boxes::Y_DIM: + *y_dim = load_unsigned_int(attr.value(), report_error); + break; + default: + break; /* Not possible. */ + } + } + std::bitset<3> test_astate = astate | std::bitset<3>(0b000); + if (!test_astate.all()) attr_error(test_astate, atok_lookup_t_connection_boxes, report_error); +} template inline void load_channel(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -3634,94 +3635,6 @@ inline void load_block_types(const pugi::xml_node& root, T& out, Context& contex if (state != 0) dfa_error("end of input", gstate_t_block_types[state], gtok_lookup_t_block_types, 1, report_error); } -template -inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { - atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); - switch (in) { - case atok_t_connection_box_declaration::ID: - /* Attribute id is already set */ - break; - case atok_t_connection_box_declaration::NAME: - out.set_connection_box_declaration_name(attr.value(), context); - break; - default: - break; /* Not possible. */ - } - } - - if (root.first_child().type() == pugi::node_element) - noreturn_report(report_error, "Unexpected child element in ."); -} - -constexpr int NUM_T_CONNECTION_BOXES_STATES = 2; -constexpr const int NUM_T_CONNECTION_BOXES_INPUTS = 1; -constexpr int gstate_t_connection_boxes[NUM_T_CONNECTION_BOXES_STATES][NUM_T_CONNECTION_BOXES_INPUTS] = { - {0}, - {0}, -}; -template -inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { - (void)root; - (void)out; - (void)context; - (void)report_error; - // Update current file offset in case an error is encountered. - *offset_debug = root.offset_debug(); - - // Preallocate arrays by counting child nodes (if any) - size_t connection_box_count = 0; - { - int next, state = 1; - for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { - *offset_debug = node.offset_debug(); - gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); - next = gstate_t_connection_boxes[state][(int)in]; - if (next == -1) - dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); - state = next; - switch (in) { - case gtok_t_connection_boxes::CONNECTION_BOX: - connection_box_count += 1; - break; - default: - break; /* Not possible. */ - } - } - - out.preallocate_connection_boxes_connection_box(context, connection_box_count); - } - int next, state = 1; - for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { - *offset_debug = node.offset_debug(); - gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); - next = gstate_t_connection_boxes[state][(int)in]; - if (next == -1) - dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); - state = next; - switch (in) { - case gtok_t_connection_boxes::CONNECTION_BOX: { - unsigned int connection_box_declaration_id; - memset(&connection_box_declaration_id, 0, sizeof(connection_box_declaration_id)); - load_connection_box_declaration_required_attributes(node, &connection_box_declaration_id, report_error); - auto child_context = out.add_connection_boxes_connection_box(context, connection_box_declaration_id); - load_connection_box_declaration(node, out, child_context, report_error, offset_debug); - out.finish_connection_boxes_connection_box(child_context); - } break; - default: - break; /* Not possible. */ - } - } - if (state != 0) dfa_error("end of input", gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); -} - template inline void load_grid_loc(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -4260,6 +4173,94 @@ inline void load_rr_edges(const pugi::xml_node& root, T& out, Context& context, if (state != 0) dfa_error("end of input", gstate_t_rr_edges[state], gtok_lookup_t_rr_edges, 1, report_error); } +template +inline void load_connection_box_declaration(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + for (pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()) { + atok_t_connection_box_declaration in = lex_attr_t_connection_box_declaration(attr.name(), report_error); + switch (in) { + case atok_t_connection_box_declaration::ID: + /* Attribute id is already set */ + break; + case atok_t_connection_box_declaration::NAME: + out.set_connection_box_declaration_name(attr.value(), context); + break; + default: + break; /* Not possible. */ + } + } + + if (root.first_child().type() == pugi::node_element) + noreturn_report(report_error, "Unexpected child element in ."); +} + +constexpr int NUM_T_CONNECTION_BOXES_STATES = 2; +constexpr const int NUM_T_CONNECTION_BOXES_INPUTS = 1; +constexpr int gstate_t_connection_boxes[NUM_T_CONNECTION_BOXES_STATES][NUM_T_CONNECTION_BOXES_INPUTS] = { + {0}, + {0}, +}; +template +inline void load_connection_boxes(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { + (void)root; + (void)out; + (void)context; + (void)report_error; + // Update current file offset in case an error is encountered. + *offset_debug = root.offset_debug(); + + // Preallocate arrays by counting child nodes (if any) + size_t connection_box_count = 0; + { + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: + connection_box_count += 1; + break; + default: + break; /* Not possible. */ + } + } + + out.preallocate_connection_boxes_connection_box(context, connection_box_count); + } + int next, state = 1; + for (pugi::xml_node node = root.first_child(); node; node = node.next_sibling()) { + *offset_debug = node.offset_debug(); + gtok_t_connection_boxes in = lex_node_t_connection_boxes(node.name(), report_error); + next = gstate_t_connection_boxes[state][(int)in]; + if (next == -1) + dfa_error(gtok_lookup_t_connection_boxes[(int)in], gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); + state = next; + switch (in) { + case gtok_t_connection_boxes::CONNECTION_BOX: { + unsigned int connection_box_declaration_id; + memset(&connection_box_declaration_id, 0, sizeof(connection_box_declaration_id)); + load_connection_box_declaration_required_attributes(node, &connection_box_declaration_id, report_error); + auto child_context = out.add_connection_boxes_connection_box(context, connection_box_declaration_id); + load_connection_box_declaration(node, out, child_context, report_error, offset_debug); + out.finish_connection_boxes_connection_box(child_context); + } break; + default: + break; /* Not possible. */ + } + } + if (state != 0) dfa_error("end of input", gstate_t_connection_boxes[state], gtok_lookup_t_connection_boxes, 1, report_error); +} + template inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, const std::function* report_error, ptrdiff_t* offset_debug) { (void)root; @@ -4315,18 +4316,6 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, load_block_types(node, out, child_context, report_error, offset_debug); out.finish_rr_graph_block_types(child_context); } break; - case gtok_t_rr_graph::CONNECTION_BOXES: { - unsigned int connection_boxes_num_boxes; - memset(&connection_boxes_num_boxes, 0, sizeof(connection_boxes_num_boxes)); - unsigned int connection_boxes_x_dim; - memset(&connection_boxes_x_dim, 0, sizeof(connection_boxes_x_dim)); - unsigned int connection_boxes_y_dim; - memset(&connection_boxes_y_dim, 0, sizeof(connection_boxes_y_dim)); - load_connection_boxes_required_attributes(node, &connection_boxes_num_boxes, &connection_boxes_x_dim, &connection_boxes_y_dim, report_error); - auto child_context = out.init_rr_graph_connection_boxes(context, connection_boxes_num_boxes, connection_boxes_x_dim, connection_boxes_y_dim); - load_connection_boxes(node, out, child_context, report_error, offset_debug); - out.finish_rr_graph_connection_boxes(child_context); - } break; case gtok_t_rr_graph::GRID: { auto child_context = out.init_rr_graph_grid(context); load_grid_locs(node, out, child_context, report_error, offset_debug); @@ -4342,11 +4331,23 @@ inline void load_rr_graph(const pugi::xml_node& root, T& out, Context& context, load_rr_edges(node, out, child_context, report_error, offset_debug); out.finish_rr_graph_rr_edges(child_context); } break; + case gtok_t_rr_graph::CONNECTION_BOXES: { + unsigned int connection_boxes_num_boxes; + memset(&connection_boxes_num_boxes, 0, sizeof(connection_boxes_num_boxes)); + unsigned int connection_boxes_x_dim; + memset(&connection_boxes_x_dim, 0, sizeof(connection_boxes_x_dim)); + unsigned int connection_boxes_y_dim; + memset(&connection_boxes_y_dim, 0, sizeof(connection_boxes_y_dim)); + load_connection_boxes_required_attributes(node, &connection_boxes_num_boxes, &connection_boxes_x_dim, &connection_boxes_y_dim, report_error); + auto child_context = out.init_rr_graph_connection_boxes(context, connection_boxes_num_boxes, connection_boxes_x_dim, connection_boxes_y_dim); + load_connection_boxes(node, out, child_context, report_error, offset_debug); + out.finish_rr_graph_connection_boxes(child_context); + } break; default: break; /* Not possible. */ } } - std::bitset<8> test_gstate = gstate | std::bitset<8>(0b00010000); + std::bitset<8> test_gstate = gstate | std::bitset<8>(0b10000000); if (!test_gstate.all()) all_error(test_gstate, gtok_lookup_t_rr_graph, report_error); } @@ -4537,22 +4538,6 @@ inline void write_block_types(T& in, std::ostream& os, Context& context) { } } -template -inline void write_connection_boxes(T& in, std::ostream& os, Context& context) { - (void)in; - (void)os; - (void)context; - { - for (size_t i = 0, n = in.num_connection_boxes_connection_box(context); i < n; i++) { - auto child_context = in.get_connection_boxes_connection_box(i, context); - os << "\n"; - } - } -} - template inline void write_grid_locs(T& in, std::ostream& os, Context& context) { (void)in; @@ -4716,6 +4701,22 @@ inline void write_rr_edges(T& in, std::ostream& os, Context& context) { } } +template +inline void write_connection_boxes(T& in, std::ostream& os, Context& context) { + (void)in; + (void)os; + (void)context; + { + for (size_t i = 0, n = in.num_connection_boxes_connection_box(context); i < n; i++) { + auto child_context = in.get_connection_boxes_connection_box(i, context); + os << "\n"; + } + } +} + template inline void write_rr_graph(T& in, std::ostream& os, Context& context) { (void)in; @@ -4745,18 +4746,6 @@ inline void write_rr_graph(T& in, std::ostream& os, Context& context) { write_block_types(in, os, child_context); os << "\n"; } - { - if (in.has_rr_graph_connection_boxes(context)) { - auto child_context = in.get_rr_graph_connection_boxes(context); - os << ""; - write_connection_boxes(in, os, child_context); - os << "\n"; - } - } { auto child_context = in.get_rr_graph_grid(context); os << "\n"; @@ -4775,6 +4764,18 @@ inline void write_rr_graph(T& in, std::ostream& os, Context& context) { write_rr_edges(in, os, child_context); os << "\n"; } + { + if (in.has_rr_graph_connection_boxes(context)) { + auto child_context = in.get_rr_graph_connection_boxes(context); + os << ""; + write_connection_boxes(in, os, child_context); + os << "\n"; + } + } } inline void dfa_error(const char* wrong, const int* states, const char* const* lookup, int len, const std::function* report_error) { diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h index f6ad76c9cf8..eca8bfc8f86 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_capnp.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcap.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 + * md5sum of input file: bea3923ac9822e94db96d5d9d15e18c6 */ #include @@ -54,10 +54,6 @@ void load_block_type_capnp_type(const ucap::BlockType::Reader& root, T& out, Con template void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template -void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template -void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); -template void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template void load_grid_locs_capnp_type(const ucap::GridLocs::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); @@ -84,6 +80,10 @@ void load_edge_capnp_type(const ucap::Edge::Reader& root, T& out, Context& conte template void load_rr_edges_capnp_type(const ucap::RrEdges::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); template +void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template +void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); +template void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack); /* Declarations for internal write functions for the complex types. */ @@ -106,8 +106,6 @@ inline void write_block_type_capnp_type(T& in, ucap::BlockType::Builder& root, C template inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, Context& context); template -inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context); -template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context); template inline void write_meta_capnp_type(T& in, ucap::Meta::Builder& root, Context& context); @@ -122,6 +120,8 @@ inline void write_edge_capnp_type(T& in, ucap::Edge::Builder& root, Context& con template inline void write_rr_edges_capnp_type(T& in, ucap::RrEdges::Builder& root, Context& context); template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context); +template inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Context& context); /* Enum conversions from uxsd to ucap */ @@ -625,39 +625,6 @@ inline void load_block_types_capnp_type(const ucap::BlockTypes::Reader& root, T& stack->pop_back(); } -template -inline void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; - - out.set_connection_box_declaration_name(root.getName().cStr(), context); -} - -template -inline void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { - (void)root; - (void)out; - (void)context; - (void)report_error; - (void)stack; - - stack->push_back(std::make_pair("getConnectionBox", 0)); - { - auto data = root.getConnectionBoxes(); - out.preallocate_connection_boxes_connection_box(context, data.size()); - for (const auto& el : data) { - auto child_context = out.add_connection_boxes_connection_box(context, el.getId()); - load_connection_box_declaration_capnp_type(el, out, child_context, report_error, stack); - out.finish_connection_boxes_connection_box(child_context); - stack->back().second += 1; - } - } - stack->pop_back(); -} - template inline void load_grid_loc_capnp_type(const ucap::GridLoc::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -893,6 +860,39 @@ inline void load_rr_edges_capnp_type(const ucap::RrEdges::Reader& root, T& out, stack->pop_back(); } +template +inline void load_connection_box_declaration_capnp_type(const ucap::ConnectionBoxDeclaration::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + out.set_connection_box_declaration_name(root.getName().cStr(), context); +} + +template +inline void load_connection_boxes_capnp_type(const ucap::ConnectionBoxes::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { + (void)root; + (void)out; + (void)context; + (void)report_error; + (void)stack; + + stack->push_back(std::make_pair("getConnectionBox", 0)); + { + auto data = root.getConnectionBoxes(); + out.preallocate_connection_boxes_connection_box(context, data.size()); + for (const auto& el : data) { + auto child_context = out.add_connection_boxes_connection_box(context, el.getId()); + load_connection_box_declaration_capnp_type(el, out, child_context, report_error, stack); + out.finish_connection_boxes_connection_box(child_context); + stack->back().second += 1; + } + } + stack->pop_back(); +} + template inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, Context& context, const std::function* report_error, std::vector>* stack) { (void)root; @@ -936,14 +936,6 @@ inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, out.finish_rr_graph_block_types(child_context); } stack->pop_back(); - stack->push_back(std::make_pair("getConnectionBoxes", 0)); - if (root.hasConnectionBoxes()) { - auto child_el = root.getConnectionBoxes(); - auto child_context = out.init_rr_graph_connection_boxes(context, child_el.getNumBoxes(), child_el.getXDim(), child_el.getYDim()); - load_connection_boxes_capnp_type(child_el, out, child_context, report_error, stack); - out.finish_rr_graph_connection_boxes(child_context); - } - stack->pop_back(); stack->push_back(std::make_pair("getGrid", 0)); if (root.hasGrid()) { auto child_el = root.getGrid(); @@ -968,6 +960,14 @@ inline void load_rr_graph_capnp_type(const ucap::RrGraph::Reader& root, T& out, out.finish_rr_graph_rr_edges(child_context); } stack->pop_back(); + stack->push_back(std::make_pair("getConnectionBoxes", 0)); + if (root.hasConnectionBoxes()) { + auto child_el = root.getConnectionBoxes(); + auto child_context = out.init_rr_graph_connection_boxes(context, child_el.getNumBoxes(), child_el.getXDim(), child_el.getYDim()); + load_connection_boxes_capnp_type(child_el, out, child_context, report_error, stack); + out.finish_rr_graph_connection_boxes(child_context); + } + stack->pop_back(); } /* Internal writing functions, which uxsdcxx uses to write out a class. */ @@ -1139,21 +1139,6 @@ inline void write_block_types_capnp_type(T& in, ucap::BlockTypes::Builder& root, } } -template -inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context) { - (void)in; - (void)root; - - size_t num_connection_boxes_connection_boxes = in.num_connection_boxes_connection_box(context); - auto connection_boxes_connection_boxes = root.initConnectionBoxes(num_connection_boxes_connection_boxes); - for (size_t i = 0; i < num_connection_boxes_connection_boxes; i++) { - auto connection_boxes_connection_box = connection_boxes_connection_boxes[i]; - auto child_context = in.get_connection_boxes_connection_box(i, context); - connection_boxes_connection_box.setId(in.get_connection_box_declaration_id(child_context)); - connection_boxes_connection_box.setName(in.get_connection_box_declaration_name(child_context)); - } -} - template inline void write_grid_locs_capnp_type(T& in, ucap::GridLocs::Builder& root, Context& context) { (void)in; @@ -1295,6 +1280,21 @@ inline void write_rr_edges_capnp_type(T& in, ucap::RrEdges::Builder& root, Conte } } +template +inline void write_connection_boxes_capnp_type(T& in, ucap::ConnectionBoxes::Builder& root, Context& context) { + (void)in; + (void)root; + + size_t num_connection_boxes_connection_boxes = in.num_connection_boxes_connection_box(context); + auto connection_boxes_connection_boxes = root.initConnectionBoxes(num_connection_boxes_connection_boxes); + for (size_t i = 0; i < num_connection_boxes_connection_boxes; i++) { + auto connection_boxes_connection_box = connection_boxes_connection_boxes[i]; + auto child_context = in.get_connection_boxes_connection_box(i, context); + connection_boxes_connection_box.setId(in.get_connection_box_declaration_id(child_context)); + connection_boxes_connection_box.setName(in.get_connection_box_declaration_name(child_context)); + } +} + template inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Context& context) { (void)in; @@ -1324,15 +1324,6 @@ inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Conte write_block_types_capnp_type(in, rr_graph_block_types, child_context); } - if (in.has_rr_graph_connection_boxes(context)) { - auto rr_graph_connection_boxes = root.initConnectionBoxes(); - auto child_context = in.get_rr_graph_connection_boxes(context); - rr_graph_connection_boxes.setNumBoxes(in.get_connection_boxes_num_boxes(child_context)); - rr_graph_connection_boxes.setXDim(in.get_connection_boxes_x_dim(child_context)); - rr_graph_connection_boxes.setYDim(in.get_connection_boxes_y_dim(child_context)); - write_connection_boxes_capnp_type(in, rr_graph_connection_boxes, child_context); - } - { auto child_context = in.get_rr_graph_grid(context); auto rr_graph_grid = root.initGrid(); @@ -1350,6 +1341,15 @@ inline void write_rr_graph_capnp_type(T& in, ucap::RrGraph::Builder& root, Conte auto rr_graph_rr_edges = root.initRrEdges(); write_rr_edges_capnp_type(in, rr_graph_rr_edges, child_context); } + + if (in.has_rr_graph_connection_boxes(context)) { + auto rr_graph_connection_boxes = root.initConnectionBoxes(); + auto child_context = in.get_rr_graph_connection_boxes(context); + rr_graph_connection_boxes.setNumBoxes(in.get_connection_boxes_num_boxes(child_context)); + rr_graph_connection_boxes.setXDim(in.get_connection_boxes_x_dim(child_context)); + rr_graph_connection_boxes.setYDim(in.get_connection_boxes_y_dim(child_context)); + write_connection_boxes_capnp_type(in, rr_graph_connection_boxes, child_context); + } } } /* namespace uxsd */ diff --git a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h index 4406f7531bb..31809f3777f 100644 --- a/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h +++ b/vpr/src/route/gen/rr_graph_uxsdcxx_interface.h @@ -6,7 +6,7 @@ * * Cmdline: uxsdcxx/uxsdcxx.py rr_graph.xsd * Input file: rr_graph.xsd - * md5sum of input file: d9e439fa173fdf56b51feeed0ac48272 + * md5sum of input file: bea3923ac9822e94db96d5d9d15e18c6 */ #include @@ -68,8 +68,6 @@ struct DefaultRrGraphContextTypes { using PinClassReadContext = void*; using BlockTypeReadContext = void*; using BlockTypesReadContext = void*; - using ConnectionBoxDeclarationReadContext = void*; - using ConnectionBoxesReadContext = void*; using GridLocReadContext = void*; using GridLocsReadContext = void*; using NodeLocReadContext = void*; @@ -83,6 +81,8 @@ struct DefaultRrGraphContextTypes { using RrNodesReadContext = void*; using EdgeReadContext = void*; using RrEdgesReadContext = void*; + using ConnectionBoxDeclarationReadContext = void*; + using ConnectionBoxesReadContext = void*; using RrGraphReadContext = void*; using ChannelWriteContext = void*; using XListWriteContext = void*; @@ -99,8 +99,6 @@ struct DefaultRrGraphContextTypes { using PinClassWriteContext = void*; using BlockTypeWriteContext = void*; using BlockTypesWriteContext = void*; - using ConnectionBoxDeclarationWriteContext = void*; - using ConnectionBoxesWriteContext = void*; using GridLocWriteContext = void*; using GridLocsWriteContext = void*; using NodeLocWriteContext = void*; @@ -114,6 +112,8 @@ struct DefaultRrGraphContextTypes { using RrNodesWriteContext = void*; using EdgeWriteContext = void*; using RrEdgesWriteContext = void*; + using ConnectionBoxDeclarationWriteContext = void*; + using ConnectionBoxesWriteContext = void*; using RrGraphWriteContext = void*; }; @@ -359,35 +359,6 @@ class RrGraphBase { virtual inline size_t num_block_types_block_type(typename ContextTypes::BlockTypesReadContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypeReadContext get_block_types_block_type(int n, typename ContextTypes::BlockTypesReadContext& ctx) = 0; - /** Generated for complex type "connection_box_declaration": - * - * - * - * - */ - virtual inline unsigned int get_connection_box_declaration_id(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; - virtual inline const char* get_connection_box_declaration_name(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; - virtual inline void set_connection_box_declaration_name(const char* name, typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; - - /** Generated for complex type "connection_boxes": - * - * - * - * - * - * - * - * - */ - virtual inline unsigned int get_connection_boxes_num_boxes(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline unsigned int get_connection_boxes_x_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline unsigned int get_connection_boxes_y_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline void preallocate_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, size_t size) = 0; - virtual inline typename ContextTypes::ConnectionBoxDeclarationWriteContext add_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, unsigned int id) = 0; - virtual inline void finish_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; - virtual inline size_t num_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxDeclarationReadContext get_connection_boxes_connection_box(int n, typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; - /** Generated for complex type "grid_loc": * * @@ -588,6 +559,35 @@ class RrGraphBase { virtual inline size_t num_rr_edges_edge(typename ContextTypes::RrEdgesReadContext& ctx) = 0; virtual inline typename ContextTypes::EdgeReadContext get_rr_edges_edge(int n, typename ContextTypes::RrEdgesReadContext& ctx) = 0; + /** Generated for complex type "connection_box_declaration": + * + * + * + * + */ + virtual inline unsigned int get_connection_box_declaration_id(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline const char* get_connection_box_declaration_name(typename ContextTypes::ConnectionBoxDeclarationReadContext& ctx) = 0; + virtual inline void set_connection_box_declaration_name(const char* name, typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + + /** Generated for complex type "connection_boxes": + * + * + * + * + * + * + * + * + */ + virtual inline unsigned int get_connection_boxes_num_boxes(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_x_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline unsigned int get_connection_boxes_y_dim(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline void preallocate_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, size_t size) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationWriteContext add_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesWriteContext& ctx, unsigned int id) = 0; + virtual inline void finish_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxDeclarationWriteContext& ctx) = 0; + virtual inline size_t num_connection_boxes_connection_box(typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxDeclarationReadContext get_connection_boxes_connection_box(int n, typename ContextTypes::ConnectionBoxesReadContext& ctx) = 0; + /** Generated for complex type "rr_graph": * * @@ -595,10 +595,10 @@ class RrGraphBase { * * * - * * * * + * * * * @@ -623,10 +623,6 @@ class RrGraphBase { virtual inline typename ContextTypes::BlockTypesWriteContext init_rr_graph_block_types(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_block_types(typename ContextTypes::BlockTypesWriteContext& ctx) = 0; virtual inline typename ContextTypes::BlockTypesReadContext get_rr_graph_block_types(typename ContextTypes::RrGraphReadContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxesWriteContext init_rr_graph_connection_boxes(typename ContextTypes::RrGraphWriteContext& ctx, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) = 0; - virtual inline void finish_rr_graph_connection_boxes(typename ContextTypes::ConnectionBoxesWriteContext& ctx) = 0; - virtual inline typename ContextTypes::ConnectionBoxesReadContext get_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; - virtual inline bool has_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsWriteContext init_rr_graph_grid(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_grid(typename ContextTypes::GridLocsWriteContext& ctx) = 0; virtual inline typename ContextTypes::GridLocsReadContext get_rr_graph_grid(typename ContextTypes::RrGraphReadContext& ctx) = 0; @@ -636,6 +632,10 @@ class RrGraphBase { virtual inline typename ContextTypes::RrEdgesWriteContext init_rr_graph_rr_edges(typename ContextTypes::RrGraphWriteContext& ctx) = 0; virtual inline void finish_rr_graph_rr_edges(typename ContextTypes::RrEdgesWriteContext& ctx) = 0; virtual inline typename ContextTypes::RrEdgesReadContext get_rr_graph_rr_edges(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesWriteContext init_rr_graph_connection_boxes(typename ContextTypes::RrGraphWriteContext& ctx, unsigned int num_boxes, unsigned int x_dim, unsigned int y_dim) = 0; + virtual inline void finish_rr_graph_connection_boxes(typename ContextTypes::ConnectionBoxesWriteContext& ctx) = 0; + virtual inline typename ContextTypes::ConnectionBoxesReadContext get_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; + virtual inline bool has_rr_graph_connection_boxes(typename ContextTypes::RrGraphReadContext& ctx) = 0; }; } /* namespace uxsd */ diff --git a/vpr/src/route/rr_graph.xsd b/vpr/src/route/rr_graph.xsd index 464ccf9677f..f3c5827feb0 100644 --- a/vpr/src/route/rr_graph.xsd +++ b/vpr/src/route/rr_graph.xsd @@ -359,10 +359,10 @@ - + From a69095c7347e264316dd4095a4c97c63d8c1e33c Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Tue, 30 Jun 2020 20:28:10 -0700 Subject: [PATCH 39/41] Adjust connection_box verification logic to handle different definition order. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box.cpp | 38 +++++++++++++++------ vpr/src/route/connection_box.h | 3 ++ vpr/src/route/rr_graph_uxsdcxx_serializer.h | 1 + 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp index 181fe171989..b495b6bdfd3 100644 --- a/vpr/src/route/connection_box.cpp +++ b/vpr/src/route/connection_box.cpp @@ -53,8 +53,6 @@ bool ConnectionBoxes::find_connection_box(int inode, // Clear IPIN map and set connection box grid size and box ids. void ConnectionBoxes::reset_boxes(std::pair size, const std::vector boxes) { - clear(); - size_ = size; boxes_ = boxes; } @@ -75,15 +73,34 @@ void ConnectionBoxes::clear() { sink_to_ipin_.clear(); } -void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay) { - // Ensure that box location is in bounds - VTR_ASSERT(box_location.first < size_.first); - VTR_ASSERT(box_location.second < size_.second); +void ConnectionBoxes::verify_connection_boxes() { + if (size_.first == 0 && size_.second == 0) { + return; + } + + for (const auto& loc : canonical_loc_map_) { + VTR_ASSERT(loc.first < size_.first); + VTR_ASSERT(loc.second < size_.second); + } + + for (const auto& conn_box_loc : ipin_map_) { + if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { + continue; + } + const auto& box_location = conn_box_loc.box_location; + const auto& box_id = conn_box_loc.box_id; + + // Ensure that box location is in bounds + VTR_ASSERT(box_location.first < size_.first); + VTR_ASSERT(box_location.second < size_.second); - // Bounds check box_id - VTR_ASSERT(bool(box_id)); - VTR_ASSERT(size_t(box_id) < boxes_.size()); + // Bounds check box_id + VTR_ASSERT(bool(box_id)); + VTR_ASSERT(size_t(box_id) < boxes_.size()); + } +} +void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay) { // Make sure sink map will not be invalidated upon insertion. VTR_ASSERT(sink_to_ipin_.size() == 0); @@ -94,8 +111,6 @@ void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std: } void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { - VTR_ASSERT(loc.first < size_.first); - VTR_ASSERT(loc.second < size_.second); if (inode >= (ssize_t)(canonical_loc_map_.size())) { canonical_loc_map_.resize(inode + 1); } @@ -117,6 +132,7 @@ const std::pair* ConnectionBoxes::find_canonical_loc(int inode) void ConnectionBoxes::create_sink_back_ref() { const auto& device_ctx = g_vpr_ctx.device(); + verify_connection_boxes(); sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h index 37af4cc8320..b4f872e3bdb 100644 --- a/vpr/src/route/connection_box.h +++ b/vpr/src/route/connection_box.h @@ -63,6 +63,9 @@ class ConnectionBoxes { void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location, float site_pin_delay); void add_canonical_loc(int inode, std::pair loc); + // Verify that all connection box annotations match defined connection boxes. + void verify_connection_boxes(); + // Create map from SINK's back to IPIN's // // This must be called after all connection boxes have been added. diff --git a/vpr/src/route/rr_graph_uxsdcxx_serializer.h b/vpr/src/route/rr_graph_uxsdcxx_serializer.h index 0a1f5168938..de967b1e1c6 100644 --- a/vpr/src/route/rr_graph_uxsdcxx_serializer.h +++ b/vpr/src/route/rr_graph_uxsdcxx_serializer.h @@ -303,6 +303,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // report_error_in should be invoked if RrGraphSerializer encounters // an error during the read. report_error_ = report_error_in; + connection_boxes_->clear(); } void start_write() final {} void finish_write() final {} From cfd25e8b824f2330f5d4dda4b069c3c0afc883ae Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Wed, 22 Jul 2020 17:35:14 -0700 Subject: [PATCH 40/41] Adjust connection_box verification logic to handle different definition order. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/route/connection_box.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp index b495b6bdfd3..530a51fe4ba 100644 --- a/vpr/src/route/connection_box.cpp +++ b/vpr/src/route/connection_box.cpp @@ -79,8 +79,19 @@ void ConnectionBoxes::verify_connection_boxes() { } for (const auto& loc : canonical_loc_map_) { - VTR_ASSERT(loc.first < size_.first); - VTR_ASSERT(loc.second < size_.second); + if (loc.first == size_t(-1)) { + VTR_ASSERT(loc.second == size_t(-1)); + continue; + } + + if (loc.first >= size_.first) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "loc.first (%zu) >= size_.first (%zu)", + loc.first, size_.first); + } + if (loc.second >= size_.second) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "loc.second (%zu) >= size_.second (%zu)", + loc.second, size_.second); + } } for (const auto& conn_box_loc : ipin_map_) { From 932fda898a2fc68b713fb16f8eed7ebca80a20e7 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Tue, 15 Sep 2020 14:13:20 +0200 Subject: [PATCH 41/41] run make format Signed-off-by: Alessandro Comodi --- vpr/src/base/vpr_types.h | 8 ++++---- vpr/src/route/connection_box_lookahead_map.cpp | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 062dc3625c8..3c6de135c85 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -110,10 +110,10 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY"; #endif enum class e_router_lookahead { - CLASSIC, /// ConnectionBoxCostMap::get_nearby_cost_entry(const vtr::NdMatrix& matrix, - int cx, - int cy, - const vtr::Rect& bounds) { + int cx, + int cy, + const vtr::Rect& bounds) { // spiral around (cx, cy) looking for a nearby entry bool in_bounds = bounds.contains(vtr::Point(cx, cy)); if (!in_bounds) { @@ -826,16 +826,16 @@ static vtr::Rect sample_window(const vtr::Rect& bounding_box, int sx, } static std::vector choose_points(const vtr::Matrix& counts, - const vtr::Rect& window, - int min_count, - int max_count) { + const vtr::Rect& window, + int min_count, + int max_count) { VTR_ASSERT(min_count <= max_count); std::vector points; for (int y = window.ymin(); y < window.ymax(); y++) { for (int x = window.xmin(); x < window.xmax(); x++) { if (counts[x][y] >= min_count && counts[x][y] <= max_count) { points.push_back(ConnectionBoxSamplePoint{/* .location = */ vtr::Point(x, y), - /* .nodes = */ {}}); + /* .nodes = */ {}}); } } }