From e84f89a89e872813650647fb07a9414abc007c6f Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Wed, 19 Aug 2020 17:33:30 -0400 Subject: [PATCH 01/15] update changes from vqm2bliff_one_lut_removal branch --- utils/vqm2blif/src/base/cleanup.cpp | 191 ++++++++++++++++++ utils/vqm2blif/src/base/cleanup.h | 5 +- utils/vqm2blif/src/main.cpp | 4 +- vpr/src/base/SetupGrid.cpp | 22 +- vtr_flow/arch/titan/stratixiv_arch.timing.xml | 44 +++- 5 files changed, 257 insertions(+), 9 deletions(-) diff --git a/utils/vqm2blif/src/base/cleanup.cpp b/utils/vqm2blif/src/base/cleanup.cpp index e4234600bc3..9ec028cf32e 100644 --- a/utils/vqm2blif/src/base/cleanup.cpp +++ b/utils/vqm2blif/src/base/cleanup.cpp @@ -12,6 +12,7 @@ void build_netlist (t_module* module, busvec* buses, s_hash** hash_table); void init_nets (t_pin_def** pins, int num_pins, busvec* buses, struct s_hash** hash_table); void set_net_assigns (t_assign** assignments, int num_assigns, busvec* buses, struct s_hash** hash_table); void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); +void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ); void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ); void reassign_net_source (t_net* net); void print_to_module ( t_module* module, busvec* buses, struct s_hash** hash_table ); @@ -21,6 +22,9 @@ netvec* get_bus_from_hash (struct s_hash** hash_table, char* temp_name, busvec* void verify_netlist ( t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); void print_all_nets ( busvec* buses, const char* filename ); +bool is_onelut ( t_node* node ); +void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ); + //============================================================================================ //============================================================================================ @@ -39,16 +43,22 @@ void netlist_cleanup (t_module* module){ cout << "\t>> VQM Netlist contains " << buffer_count << " buffers.\n" ; cout << "\t>> VQM Netlist contains " << invert_count << " invertors.\n" ; + cout << "\t>> VQM Netlist contains " << onelut_count << " one-LUTs.\n" ; //Verify that the initial netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); + cout << "\t>> Removing One-LUTs" << "...\n"; + + remove_one_lut_nodes ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes, module ); + cout << "\t>> Removing buffered nets" << ((clean_mode == CL_BUFF)? "":" and inverted subckt inputs") << "...\n"; clean_netlist ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes ); cout << "\t>> Removed " << buffers_elim << " buffers of " << buffer_count << ".\n" ; cout << "\t>> Removed " << inverts_elim << " invertors of " << invert_count << ".\n" ; + cout << "\t>> Removed " << oneluts_elim << " one-LUTs of " << onelut_count << ".\n" ; //Verify that the final modified netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); @@ -195,8 +205,13 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** t_node* temp_node; t_node_port_association* temp_port; + onelut_count = 0; + for (int i = 0; i < num_nodes; i++){ temp_node = nodes[i]; + if(is_onelut(temp_node)){ + onelut_count++; + } for (int j = 0; j < temp_node->number_of_ports; j++){ temp_port = temp_node->array_of_ports[j]; @@ -223,6 +238,123 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** //============================================================================================ //============================================================================================ +void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ){ +/* + Go through all nodes, if a node's source net is the sink of a one-LUT, there are two cases: + 1. The one-LUT has an input and an output: + Re-associate the node with the source net of the one-LUT, then remove the one-LUT and the node's original source net + 2. The one-LUT just has an output (provides VCC to its sink): + Re-associate the node with the VCC net, then remove the one-LUT and the node's original source net +*/ + oneluts_elim = 0; + + t_node* temp_node; + t_node_port_association* temp_port; + netvec* temp_bus; + t_net* temp_net; + + t_node* source_node; + t_node_port_association* source_port; + t_node_port_association* prev_port; + netvec* prev_bus; + t_net* prev_net; + + netvec* vcc_bus = get_bus_from_hash (hash_table, const_cast("vcc"), buses); + VTR_ASSERT(vcc_bus != NULL); + t_net* vcc_net = &(vcc_bus->at(0)); //Find any VCC net + + for (int i = 0; i < original_num_nodes; i++){ + temp_node = nodes[i]; + if (temp_node == NULL) { //Node was deleted during a previous iteration + continue; + } + for (int j = 0; j < temp_node->number_of_ports; j++){ + temp_port = temp_node->array_of_ports[j]; + temp_bus = get_bus_from_hash (hash_table, temp_port->associated_net->name, buses); + VTR_ASSERT((unsigned int)temp_port->wire_index < temp_bus->size()); + temp_net = &(temp_bus->at(temp_port->wire_index)); + + if (temp_port != (t_node_port_association*)temp_net->source){ + //Must be an input port + if (temp_net->driver == BLACKBOX && is_onelut(temp_net->block_src) && temp_net->num_children == 1){ + source_port = (t_node_port_association*)temp_net->source; //The output port of the one-LUT + source_node = temp_net->block_src; //The one-LUT + + //Re-associate temp_port with the appropriate net + if(source_node->number_of_ports == 2){ + //For one-LUT with an input and an output, find the net before the one_LUT and associate temp_port with that net instead + VTR_ASSERT(source_node->number_of_ports == 2); + for (int k = 0; k < source_node->number_of_ports; k++){ + prev_port = source_node->array_of_ports[k]; + if(prev_port != source_port) { + //The input port of the one-LUT + prev_bus = get_bus_from_hash (hash_table, prev_port->associated_net->name, buses); + VTR_ASSERT((unsigned int)prev_port->wire_index < prev_bus->size()); + prev_net = &(prev_bus->at(prev_port->wire_index)); //Net associated with the input port + } + } + temp_port->associated_net = prev_net->pin; + temp_port->wire_index = prev_net->wire_index; + } else { + //For one-LUT with just an output, associate temp_port with VCC instead + VTR_ASSERT(source_node->number_of_ports == 1); //If is_onelut==true, there are only 1 or 2 ports + VTR_ASSERT(vcc_net != NULL); //Should have a VCC + temp_port->associated_net = vcc_net->pin; + temp_port->wire_index = vcc_net->wire_index; + vcc_net->num_children++; + } + + //Remove temp_net + temp_net->num_children--; + temp_net->source = NULL; + temp_net->driver = NODRIVE; + + //Free the LUT + remove_node(source_node, nodes, original_num_nodes); + + } + } + } + } + + //Regorganize nodes array by filling in gaps with the last available elements in the array to save CPU time + int new_array_size = original_num_nodes - oneluts_elim; + int curr_node_index = 0; + int replacement_node_index = original_num_nodes - 1; + while (curr_node_index < replacement_node_index) { + if (nodes[curr_node_index] == NULL) { + if (nodes[replacement_node_index] != NULL) { + //Replace gap with node + nodes[curr_node_index] = nodes[replacement_node_index]; + nodes[replacement_node_index] = NULL; + curr_node_index++; + } + replacement_node_index--; + } else { + curr_node_index++; + } + } + if (nodes[curr_node_index] == NULL) { + VTR_ASSERT(curr_node_index == new_array_size); //check array size + } else { + VTR_ASSERT(curr_node_index == new_array_size - 1); //check array size + } + + //Update array bounds + module -> number_of_nodes = new_array_size; + + //Reduce run-time by only verifying at the end + //verify_netlist (nodes, module->number_of_nodes, buses, hash_table); + +#ifdef CLEAN_DEBUG + cout << "\t\t>> Dumping to all_buff.out\n" ; + print_all_nets(buses, "all_buff.out"); +#endif +} + +//============================================================================================ +//============================================================================================ + void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ){ netvec* temp_bus; @@ -627,5 +759,64 @@ void print_all_nets ( busvec* buses, const char* filename ){ outfile.close(); } + +//============================================================================================ +//============================================================================================ + +bool is_onelut ( t_node* node ) { + if(node == NULL) return false; + + //Hardcoded for Stratix IV + string node_name = node->name; + string node_name_ending; + if (node_name.length() >= 8){ + node_name_ending = node_name.substr(node_name.length()-8); + } else { + node_name_ending = node_name; + } + +#ifdef CLEAN_DEBUG + cout << "\t\t Node Type: " << node->type << "\t" << "Node Name Ending: " << node_name_ending << "\t" << "Num of Ports: " << node->number_of_ports <<"\n"; +#endif + + //Only LUTs with 1 port (1 output port) or 2 ports (1 input and 1 output) are considered one-luts + if (node->number_of_ports == 1 || node->number_of_ports == 2){ + //Only stratixiv_lcell_comb one-LUTs that end in "feeder" can be removed at this stage + if (node->type == string("stratixiv_lcell_comb") && node_name_ending == string("feeder_I")) { + return true; + } + } + + return false; +} + +//============================================================================================ +//============================================================================================ + +void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ) { + //Free node and assign it to NULL on the spot + //Array will be re-organized to fill in the gaps later + + VTR_ASSERT(node != NULL); + VTR_ASSERT(nodes != NULL); + +#ifdef CLEAN_DEBUG + cout << "\t\t\t Removing " << node->name << "\n"; +#endif + bool found = false; + + for (int i = 0; i < original_num_nodes; i++){ + if(nodes[i] == node){ + free_node( (void*)nodes[i] ); + nodes[i] = NULL; + found = true; + break; + } + } + + VTR_ASSERT(found); + oneluts_elim++; +} + //============================================================================================ //============================================================================================ diff --git a/utils/vqm2blif/src/base/cleanup.h b/utils/vqm2blif/src/base/cleanup.h index 9d699feece9..8fb60bba0cb 100644 --- a/utils/vqm2blif/src/base/cleanup.h +++ b/utils/vqm2blif/src/base/cleanup.h @@ -8,13 +8,14 @@ #include "vqm2blif_util.h" #include "lut_recog.h" +#include "vqm_common.h" //============================================================================================ // GLOBALS //============================================================================================ -extern int buffer_count, invert_count; -extern int buffers_elim, inverts_elim; +extern int buffer_count, invert_count, onelut_count; +extern int buffers_elim, inverts_elim, oneluts_elim; void netlist_cleanup (t_module* module); diff --git a/utils/vqm2blif/src/main.cpp b/utils/vqm2blif/src/main.cpp index 495a3c864df..cfc9f033f9a 100644 --- a/utils/vqm2blif/src/main.cpp +++ b/utils/vqm2blif/src/main.cpp @@ -109,8 +109,8 @@ e_elab elab_mode; //user-set flag dictating how to elaborate a VQM Primitive e_lut lut_mode; //user-set flag dictating how to treat LUTs (as blackboxes or .names) -int buffer_count, invert_count; -int buffers_elim, inverts_elim; +int buffer_count, invert_count, onelut_count; +int buffers_elim, inverts_elim, oneluts_elim; e_clean clean_mode; diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp index 479f8f055d9..584a5db6776 100644 --- a/vpr/src/base/SetupGrid.cpp +++ b/vpr/src/base/SetupGrid.cpp @@ -150,6 +150,17 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one "); + //Determine maximum device size to try before concluding that the circuit cannot fit on any device + //Calculate total number of required instances + //Then multiply by a factor of 100 as overhead + size_t max_size; + size_t total_minimum_instance_counts = 0; + for (auto& inst : minimum_instance_counts) { + size_t count = inst.second; + total_minimum_instance_counts += count; + } + max_size = total_minimum_instance_counts * 10000; + const auto& grid_def = *auto_layout_itr; VTR_ASSERT(grid_def.aspect_ratio >= 0.); @@ -159,6 +170,7 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo size_t width = 3; size_t height = 3; std::vector limiting_resources; + size_t grid_size = 0; do { //Scale opposite dimension to match aspect ratio height = vtr::nint(width / grid_def.aspect_ratio); @@ -183,10 +195,18 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo limiting_resources = grid_overused_resources(grid, minimum_instance_counts); + //Determine grid size + grid_size = width * height; + //Increase the grid size width++; - } while (true); + } while (grid_size < max_size); + + //Maximum device size reached + VPR_FATAL_ERROR(VPR_ERROR_OTHER, + "Device auto-fit aborted: device size already exceeds required resources count by 100 times yet still cannot fit the design. " + "Might be using more instances of a particular type of resource than the StratixIV devices can support (e.g. PLLs)\n"); } else { VTR_ASSERT(auto_layout_itr == grid_layouts.end()); diff --git a/vtr_flow/arch/titan/stratixiv_arch.timing.xml b/vtr_flow/arch/titan/stratixiv_arch.timing.xml index ca0b67a68a1..cc3685c593c 100644 --- a/vtr_flow/arch/titan/stratixiv_arch.timing.xml +++ b/vtr_flow/arch/titan/stratixiv_arch.timing.xml @@ -4408,6 +4408,14 @@ + + + + + + + + @@ -4729,8 +4737,9 @@ - @@ -4753,10 +4762,18 @@ + + + + + + + + @@ -4786,8 +4803,9 @@ - @@ -4810,10 +4828,18 @@ + + + + + + + + @@ -6163,6 +6189,16 @@ + + + + + + + + + + From cc5a4e57fb8441e6c28a7e3b85f820ce31c80cda Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Thu, 20 Aug 2020 08:54:34 -0400 Subject: [PATCH 02/15] Initial commit to add and use pin index in rt_node --- utils/route_diag/src/main.cpp | 2 +- vpr/src/base/vpr_types.h | 3 ++ vpr/src/route/check_route.cpp | 9 ++-- vpr/src/route/connection_based_routing.cpp | 39 --------------- vpr/src/route/connection_based_routing.h | 10 ++-- vpr/src/route/route_breadth_first.cpp | 2 +- vpr/src/route/route_common.cpp | 22 ++++++--- vpr/src/route/route_common.h | 4 +- vpr/src/route/route_timing.cpp | 29 ++++++----- vpr/src/route/route_tree_timing.cpp | 56 ++++++++++++++++------ vpr/src/route/route_tree_timing.h | 2 +- vpr/src/route/route_tree_type.h | 3 ++ vpr/src/route/router_delay_profiling.cpp | 4 +- 13 files changed, 95 insertions(+), 90 deletions(-) diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 2598c8de50f..a30bec137b1 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -112,7 +112,7 @@ static void do_one_route(int source_node, int sink_node, if (found_path) { VTR_ASSERT(cheapest.index == sink_node); - t_rt_node* rt_node_of_sink = update_route_tree(&cheapest, nullptr); + t_rt_node* rt_node_of_sink = update_route_tree(&cheapest, OPEN, nullptr); //find delay float net_delay = rt_node_of_sink->Tdel; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index dbf5a274cef..23aa26e5eed 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1315,6 +1315,8 @@ typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_i * @brief Basic element used to store the traceback (routing) of each net. * * @param index Array index (ID) of this routing resource node. + * @param index Index of the pin for a sink node. Only used for rt_node + * tree traceback. Otherwise value is OPEN. * @param iswitch Index of the switch type used to go from this rr_node to * the next one in the routing. OPEN if there is no next node * (i.e. this node is the last one (a SINK) in a branch of the @@ -1324,6 +1326,7 @@ typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_i struct t_trace { t_trace* next; int index; + int ipin = OPEN; short iswitch; }; diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index bc244c3b519..a57b81eaa5d 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -190,8 +190,7 @@ static void check_sink(int inode, ClusterNetId net_id, bool* pin_done) { int ptc_num = device_ctx.rr_nodes[inode].ptc_num(); int ifound = 0; - for (int iblk = 0; iblk < type->capacity; iblk++) { - ClusterBlockId bnum = place_ctx.grid_blocks[i][j].blocks[iblk]; /* Hardcoded to one cluster_ctx block*/ + for (auto bnum : place_ctx.grid_blocks[i][j].blocks) { unsigned int ipin = 1; for (auto pin_id : cluster_ctx.clb_nlist.net_sinks(net_id)) { if (cluster_ctx.clb_nlist.pin_block(pin_id) == bnum) { @@ -203,6 +202,7 @@ static void check_sink(int inode, ClusterNetId net_id, bool* pin_done) { if (pin_done[ipin] == false) { ifound++; pin_done[ipin] = true; + break; } } } @@ -210,10 +210,7 @@ static void check_sink(int inode, ClusterNetId net_id, bool* pin_done) { } } - if (ifound > 1 && is_io_type(type)) { - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, - "in check_sink: found %d terminals of net %d of pad %d at location (%d, %d).\n", ifound, size_t(net_id), ptc_num, i, j); - } + VTR_ASSERT(ifound <= 1); if (ifound < 1) { VPR_FATAL_ERROR(VPR_ERROR_ROUTE, diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index ffe27280066..a46b7fbe6a2 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -56,45 +56,6 @@ Connection_based_routing_resources::Connection_based_routing_resources() } } -void Connection_based_routing_resources::convert_sink_nodes_to_net_pins(std::vector& rr_sink_nodes) const { - /* Turn a vector of device_ctx.rr_nodes indices, assumed to be of sinks for a net * - * into the pin indices of the same net. */ - - VTR_ASSERT(current_inet != ClusterNetId::INVALID()); // not uninitialized - - const auto& node_to_pin_mapping = rr_sink_node_to_pin[current_inet]; - - for (size_t s = 0; s < rr_sink_nodes.size(); ++s) { - auto mapping = node_to_pin_mapping.find(rr_sink_nodes[s]); - if (mapping != node_to_pin_mapping.end()) { - rr_sink_nodes[s] = mapping->second; - } else { - VTR_ASSERT_SAFE_MSG(false, "Should always expect it find a pin mapping for its own net"); - } - } -} - -void Connection_based_routing_resources::put_sink_rt_nodes_in_net_pins_lookup(const std::vector& sink_rt_nodes, - t_rt_node** rt_node_of_sink) const { - /* Load rt_node_of_sink (which maps a PIN index to a route tree node) - * with a vector of route tree sink nodes. */ - - VTR_ASSERT(current_inet != ClusterNetId::INVALID()); - - // a net specific mapping from node index to pin index - const auto& node_to_pin_mapping = rr_sink_node_to_pin[current_inet]; - - for (t_rt_node* rt_node : sink_rt_nodes) { - auto mapping = node_to_pin_mapping.find(rt_node->inode); - - if (mapping != node_to_pin_mapping.end()) { - rt_node_of_sink[mapping->second] = rt_node; - } else { - VTR_ASSERT_SAFE_MSG(false, "element should be able to find itself"); - } - } -} - bool Connection_based_routing_resources::sanity_check_lookup() const { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& route_ctx = g_vpr_ctx.routing(); diff --git a/vpr/src/route/connection_based_routing.h b/vpr/src/route/connection_based_routing.h index 2ca63cda261..cea2d34caa5 100644 --- a/vpr/src/route/connection_based_routing.h +++ b/vpr/src/route/connection_based_routing.h @@ -19,6 +19,10 @@ class Connection_based_routing_resources { // each net maps SINK node index -> PIN index for net // only need to be built once at the start since the SINK nodes never change // the reverse lookup of route_ctx.net_rr_terminals + // be careful: it is possible for multiple sinks to share the same node index in some cases. + // rt_nodes already have pin index stored as a member, so in most cases, you do not + // need this lookup. Only use this if necessary and if you are sure that a node index + // can uniquely identify the node. vtr::vector> rr_sink_node_to_pin; // a property of each net, but only valid after pruning the previous route tree @@ -42,11 +46,7 @@ class Connection_based_routing_resources { // get a handle on the resources std::vector& get_remaining_targets() { return remaining_targets; } std::vector& get_reached_rt_sinks() { return reached_rt_sinks; } - - void convert_sink_nodes_to_net_pins(std::vector& rr_sink_nodes) const; - - void put_sink_rt_nodes_in_net_pins_lookup(const std::vector& sink_rt_nodes, - t_rt_node** rt_node_of_sink) const; + vtr::vector>& get_rr_sink_node_to_pin() { return rr_sink_node_to_pin; }; bool sanity_check_lookup() const; diff --git a/vpr/src/route/route_breadth_first.cpp b/vpr/src/route/route_breadth_first.cpp index 3fc32cba0e7..39bee911471 100644 --- a/vpr/src/route/route_breadth_first.cpp +++ b/vpr/src/route/route_breadth_first.cpp @@ -265,7 +265,7 @@ static bool breadth_first_route_net(BinaryHeap& heap, ClusterNetId net_id, float route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ remaining_connections_to_sink = route_ctx.rr_node_route_inf[inode].target_flag; - tptr = update_traceback(current, net_id); + tptr = update_traceback(current, OPEN, net_id); heap.free(current); } diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 7855ae82ad3..c93888289d2 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -86,7 +86,7 @@ static int num_linked_f_pointer_allocated = 0; * */ /******************** Subroutines local to route_common.c *******************/ -static t_trace_branch traceback_branch(int node, std::unordered_set& main_branch_visited); +static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_set& main_branch_visited); static std::pair add_trace_non_configurable(t_trace* head, t_trace* tail, int node, std::unordered_set& visited); static std::pair add_trace_non_configurable_recurr(int node, std::unordered_set& visited, int depth = 0); @@ -494,7 +494,7 @@ void init_route_structs(int bb_factor) { route_ctx.net_status.resize(cluster_ctx.clb_nlist.nets().size()); } -t_trace* update_traceback(t_heap* hptr, ClusterNetId net_id) { +t_trace* update_traceback(t_heap* hptr, int target_pin, ClusterNetId net_id) { /* This routine adds the most recently finished wire segment to the * * traceback linked list. The first connection starts with the net SOURCE * * and begins at the structure pointed to by route_ctx.trace[net_id].head. * @@ -513,7 +513,7 @@ t_trace* update_traceback(t_heap* hptr, ClusterNetId net_id) { VTR_ASSERT_SAFE(validate_trace_nodes(route_ctx.trace[net_id].head, trace_nodes)); - t_trace_branch branch = traceback_branch(hptr->index, trace_nodes); + t_trace_branch branch = traceback_branch(hptr->index, target_pin, trace_nodes); VTR_ASSERT_SAFE(validate_trace_nodes(branch.head, trace_nodes)); @@ -532,7 +532,7 @@ t_trace* update_traceback(t_heap* hptr, ClusterNetId net_id) { //Traces back a new routing branch starting from the specified 'node' and working backwards to any existing routing. //Returns the new branch, and also updates trace_nodes for any new nodes which are included in the branches traceback. -static t_trace_branch traceback_branch(int node, std::unordered_set& trace_nodes) { +static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_set& trace_nodes) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.routing(); @@ -547,6 +547,7 @@ static t_trace_branch traceback_branch(int node, std::unordered_set& trace_ t_trace* branch_head = alloc_trace_data(); t_trace* branch_tail = branch_head; branch_head->index = node; + branch_head->ipin = target_pin; branch_head->iswitch = OPEN; branch_head->next = nullptr; @@ -561,6 +562,7 @@ static t_trace_branch traceback_branch(int node, std::unordered_set& trace_ //Add the current node to the head of traceback t_trace* prev_ptr = alloc_trace_data(); prev_ptr->index = inode; + prev_ptr->ipin = OPEN; prev_ptr->iswitch = device_ctx.rr_nodes.edge_switch(iedge); prev_ptr->next = branch_head; branch_head = prev_ptr; @@ -741,11 +743,16 @@ void mark_ends(ClusterNetId net_id) { } } -void mark_remaining_ends(const std::vector& remaining_sinks) { +void mark_remaining_ends(ClusterNetId net_id, const std::vector& remaining_sinks) { // like mark_ends, but only performs it for the remaining sinks of a net + int inode; + auto& route_ctx = g_vpr_ctx.mutable_routing(); - for (int sink_node : remaining_sinks) - ++route_ctx.rr_node_route_inf[sink_node].target_flag; + + for (int sink_pin : remaining_sinks) { + inode = route_ctx.net_rr_terminals[net_id][sink_pin]; + ++route_ctx.rr_node_route_inf[inode].target_flag; + } } void drop_traceback_tail(ClusterNetId net_id) { @@ -1192,6 +1199,7 @@ alloc_trace_data() { trace_free_head->next = nullptr; } temp_ptr = trace_free_head; + temp_ptr->ipin = OPEN; //default trace_free_head = trace_free_head->next; num_trace_allocated++; return (temp_ptr); diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 003f10002d4..de2dc983509 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -22,7 +22,7 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove float update_pres_fac(float new_pres_fac); -t_trace* update_traceback(t_heap* hptr, ClusterNetId net_id); +t_trace* update_traceback(t_heap* hptr, int target_pin, ClusterNetId net_id); void reset_path_costs(const std::vector& visited_rr_nodes); @@ -85,7 +85,7 @@ inline float get_single_rr_cong_cost(int inode, float pres_fac) { } void mark_ends(ClusterNetId net_id); -void mark_remaining_ends(const std::vector& remaining_sinks); +void mark_remaining_ends(ClusterNetId net_id, const std::vector& remaining_sinks); void free_traceback(ClusterNetId net_id); void drop_traceback_tail(ClusterNetId net_id); diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 26dba9e97dd..ac1776de8ee 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -89,6 +89,7 @@ static bool timing_driven_pre_route_to_clock_root( ConnectionRouter& router, ClusterNetId net_id, int sink_node, + int sink_pin, const t_conn_cost_params cost_params, int high_fanout_threshold, t_rt_node* rt_root, @@ -997,6 +998,8 @@ bool timing_driven_route_net(ConnectionRouter& router, if (cluster_ctx.clb_nlist.net_is_global(net_id) && router_opts.two_stage_clock_routing) { //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); int sink_node = device_ctx.virtual_clock_network_root_idx; + auto& rr_sink_node_to_pin = connections_inf.get_rr_sink_node_to_pin(); + int sink_pin = rr_sink_node_to_pin[net_id][sink_node]; //clock net sink nodes all have unique node IDs so this mapping can be used enable_router_debug(router_opts, net_id, sink_node, itry, &router); @@ -1009,6 +1012,7 @@ bool timing_driven_route_net(ConnectionRouter& router, router, net_id, sink_node, + sink_pin, cost_params, router_opts.high_fanout_threshold, rt_root, @@ -1083,6 +1087,7 @@ bool timing_driven_route_net(ConnectionRouter& router, // route tree is not kept persistent since building it from the traceback the next iteration takes almost 0 time VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); + free_route_tree(rt_root); return (true); } @@ -1092,6 +1097,7 @@ static bool timing_driven_pre_route_to_clock_root( ConnectionRouter& router, ClusterNetId net_id, int sink_node, + int sink_pin, const t_conn_cost_params cost_params, int high_fanout_threshold, t_rt_node* rt_root, @@ -1144,9 +1150,9 @@ static bool timing_driven_pre_route_to_clock_root( * lets me reuse all the routines written for breadth-first routing, which * * all take a traceback structure as input. */ - t_trace* new_route_start_tptr = update_traceback(&cheapest, net_id); + t_trace* new_route_start_tptr = update_traceback(&cheapest, sink_pin, net_id); VTR_ASSERT_DEBUG(validate_traceback(route_ctx.trace[net_id].head)); - update_route_tree(&cheapest, ((high_fanout) ? &spatial_rt_lookup : nullptr)); + update_route_tree(&cheapest, sink_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr)); VTR_ASSERT_DEBUG(verify_route_tree(rt_root)); VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(rt_root, spatial_rt_lookup)); @@ -1196,7 +1202,8 @@ static bool timing_driven_route_sink( profiling::sink_criticality_start(); int sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; - +auto& device_ctx = g_vpr_ctx.device(); +VTR_ASSERT(device_ctx.rr_nodes[sink_node].type() == SINK); VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(sink_node).c_str()); VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); @@ -1255,10 +1262,11 @@ static bool timing_driven_route_sink( int inode = cheapest.index; route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ - t_trace* new_route_start_tptr = update_traceback(&cheapest, net_id); + t_trace* new_route_start_tptr = update_traceback(&cheapest, target_pin, net_id); + VTR_ASSERT_DEBUG(validate_traceback(route_ctx.trace[net_id].head)); - rt_node_of_sink[target_pin] = update_route_tree(&cheapest, ((high_fanout) ? &spatial_rt_lookup : nullptr)); + rt_node_of_sink[target_pin] = update_route_tree(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr)); VTR_ASSERT_DEBUG(verify_route_tree(rt_root)); VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(rt_root, spatial_rt_lookup)); @@ -1379,7 +1387,9 @@ static t_rt_node* setup_routing_resources(int itry, add_route_tree_to_rr_node_lookup(rt_root); // give lookup on the reached sinks - connections_inf.put_sink_rt_nodes_in_net_pins_lookup(reached_rt_sinks, rt_node_of_sink); + for (t_rt_node* sink_node : reached_rt_sinks) { + rt_node_of_sink[sink_node->ipin] = sink_node; + } profiling::net_rebuild_end(num_sinks, remaining_targets.size()); @@ -1388,11 +1398,8 @@ static t_rt_node* setup_routing_resources(int itry, // congestion should've been pruned away VTR_ASSERT_SAFE(is_uncongested_route_tree(rt_root)); - // use the nodes to directly mark ends before they get converted to pins - mark_remaining_ends(remaining_targets); - - // everything dealing with a net works with it in terms of its sink pins; need to convert its sink nodes to sink pins - connections_inf.convert_sink_nodes_to_net_pins(remaining_targets); + // mark remaining ends + mark_remaining_ends(net_id, remaining_targets); // still need to calculate the tree's time delay (0 Tarrival means from SOURCE) load_route_tree_Tdel(rt_root, 0); diff --git a/vpr/src/route/route_tree_timing.cpp b/vpr/src/route/route_tree_timing.cpp index 5e98fba420f..c1ac6e18439 100644 --- a/vpr/src/route/route_tree_timing.cpp +++ b/vpr/src/route/route_tree_timing.cpp @@ -27,6 +27,13 @@ /* Array below allows mapping from any rr_node to any rt_node currently in * the rt_tree. */ +/* In some cases the same SINK node is put into the tree multiple times in a * + * single route. To model this, we are putting in separate rt_nodes in the route * + * tree if we go to the same SINK more than once. rr_node_to_rt_node[inode] will * + * therefore store the last rt_node created of all the SINK nodes with the same * + * index "inode". This is okay because the mapping is only used in this file to * + * quickly figure out where rt_nodes that we are branching off of (for nets with * + * fanout > 1) are, and we will never branch off a SINK. */ static std::vector rr_node_to_rt_node; /* [0..device_ctx.rr_nodes.size()-1] */ /* Frees lists for fast addition and deletion of nodes and edges. */ @@ -45,6 +52,7 @@ static t_linked_rt_edge* alloc_linked_rt_edge(); static void free_linked_rt_edge(t_linked_rt_edge* rt_edge); static t_rt_node* add_subtree_to_route_tree(t_heap* hptr, + int target_pin, t_rt_node** sink_rt_node_ptr); static t_rt_node* add_non_configurable_to_route_tree(const int rr_node, const bool reached_by_non_configurable_edge, std::unordered_set& visited); @@ -59,7 +67,7 @@ static t_trace* traceback_to_route_tree_branch(t_trace* trace, std::map traceback_from_route_tree_recurr(t_trace* head, t_trace* tail, const t_rt_node* node); -void collect_route_tree_connections(const t_rt_node* node, std::set>& connections); +void collect_route_tree_connections(const t_rt_node* node, std::multiset>& connections); /************************** Subroutine definitions ***************************/ @@ -187,6 +195,7 @@ t_rt_node* init_route_tree_to_source(ClusterNetId inet) { inode = route_ctx.net_rr_terminals[inet][0]; /* Net source */ rt_root->inode = inode; + rt_root->ipin = OPEN; rt_root->C_downstream = device_ctx.rr_nodes[inode].C(); rt_root->R_upstream = device_ctx.rr_nodes[inode].R(); rt_root->Tdel = 0.5 * device_ctx.rr_nodes[inode].R() * device_ctx.rr_nodes[inode].C(); @@ -199,7 +208,7 @@ t_rt_node* init_route_tree_to_source(ClusterNetId inet) { * updates the Tdel, etc. numbers for the rest of the routing tree. hptr * is the heap pointer of the SINK that was reached. This routine returns * a pointer to the rt_node of the SINK that it adds to the routing. */ -t_rt_node* update_route_tree(t_heap* hptr, SpatialRouteTreeLookup* spatial_rt_lookup) { +t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLookup* spatial_rt_lookup) { t_rt_node *start_of_new_subtree_rt_node, *sink_rt_node; t_rt_node *unbuffered_subtree_rt_root, *subtree_parent_rt_node; float Tdel_start; @@ -208,7 +217,7 @@ t_rt_node* update_route_tree(t_heap* hptr, SpatialRouteTreeLookup* spatial_rt_lo auto& device_ctx = g_vpr_ctx.device(); //Create a new subtree from the target in hptr to existing routing - start_of_new_subtree_rt_node = add_subtree_to_route_tree(hptr, &sink_rt_node); + start_of_new_subtree_rt_node = add_subtree_to_route_tree(hptr, target_pin, &sink_rt_node); //Propagate R_upstream down into the new subtree load_new_subtree_R_upstream(start_of_new_subtree_rt_node); @@ -241,7 +250,12 @@ t_rt_node* update_route_tree(t_heap* hptr, SpatialRouteTreeLookup* spatial_rt_lo void add_route_tree_to_rr_node_lookup(t_rt_node* node) { if (node) { - VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] == node); + auto& device_ctx = g_vpr_ctx.device(); + if (device_ctx.rr_nodes[node->inode].type() == SINK) { + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode]->inode == node->inode); + } else { + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] == node); + } rr_node_to_rt_node[node->inode] = node; @@ -252,7 +266,7 @@ void add_route_tree_to_rr_node_lookup(t_rt_node* node) { } static t_rt_node* -add_subtree_to_route_tree(t_heap* hptr, t_rt_node** sink_rt_node_ptr) { +add_subtree_to_route_tree(t_heap* hptr, int target_pin, t_rt_node** sink_rt_node_ptr) { /* Adds the most recent wire segment, ending at the SINK indicated by hptr, * to the routing tree. It returns the first (most upstream) new rt_node, * and (via a pointer) the rt_node of the new SINK. Traverses up from SINK */ @@ -274,6 +288,7 @@ add_subtree_to_route_tree(t_heap* hptr, t_rt_node** sink_rt_node_ptr) { sink_rt_node = alloc_rt_node(); sink_rt_node->u.child_list = nullptr; sink_rt_node->inode = inode; + sink_rt_node->ipin = target_pin; rr_node_to_rt_node[inode] = sink_rt_node; /* In the code below I'm marking SINKs and IPINs as not to be re-expanded. @@ -286,8 +301,8 @@ add_subtree_to_route_tree(t_heap* hptr, t_rt_node** sink_rt_node_ptr) { downstream_rt_node = sink_rt_node; - std::unordered_set main_branch_visited; - std::unordered_set all_visited; + std::unordered_set main_branch_visited; //does not include sink + std::unordered_set all_visited; //does not include sink inode = hptr->prev_node(); RREdgeId edge = hptr->prev_edge(); short iswitch = device_ctx.rr_nodes.edge_switch(edge); @@ -316,6 +331,7 @@ add_subtree_to_route_tree(t_heap* hptr, t_rt_node** sink_rt_node_ptr) { rt_node->u.child_list = linked_rt_edge; rt_node->inode = inode; + rt_node->ipin = OPEN; rr_node_to_rt_node[inode] = rt_node; @@ -347,6 +363,7 @@ add_subtree_to_route_tree(t_heap* hptr, t_rt_node** sink_rt_node_ptr) { //Expand (recursively) each of the main-branch nodes adding any //non-configurably connected nodes + //Sink is not included, so no need to pass in the node's ipin value. for (int rr_node : main_branch_visited) { add_non_configurable_to_route_tree(rr_node, false, all_visited); } @@ -374,6 +391,7 @@ static t_rt_node* add_non_configurable_to_route_tree(const int rr_node, const bo rt_node = alloc_rt_node(); rt_node->u.child_list = nullptr; rt_node->inode = rr_node; + rt_node->ipin = OPEN; if (device_ctx.rr_nodes[rr_node].type() == IPIN) { rt_node->re_expand = false; @@ -657,7 +675,7 @@ void print_route_tree(const t_rt_node* rt_node, int depth) { } auto& device_ctx = g_vpr_ctx.device(); - VTR_LOG("%srt_node: %d (%s)", indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string()); + VTR_LOG("%srt_node: %d (%s) \t ipin: %d", indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin); if (rt_node->parent_switch != OPEN) { bool parent_edge_configurable = device_ctx.rr_switch_inf[rt_node->parent_switch].configurable(); @@ -760,22 +778,28 @@ static t_trace* traceback_to_route_tree_branch(t_trace* trace, t_rt_node* node = nullptr; int inode = trace->index; + int ipin = trace->ipin; int iswitch = trace->iswitch; + auto& device_ctx = g_vpr_ctx.device(); auto itr = rr_node_to_rt.find(trace->index); - if (itr == rr_node_to_rt.end()) { + + // In some cases, the same sink node is put into the tree multiple times in a single route. + // So it is possible to hit the same node index multiple times during traceback. Create a + // separate rt_node for each sink with the same node index. + if (itr == rr_node_to_rt.end() || device_ctx.rr_nodes[inode].type() == SINK) { //Create //Initialize route tree node node = alloc_rt_node(); node->inode = inode; + node->ipin = ipin; node->u.child_list = nullptr; node->R_upstream = std::numeric_limits::quiet_NaN(); node->C_downstream = std::numeric_limits::quiet_NaN(); node->Tdel = std::numeric_limits::quiet_NaN(); - auto& device_ctx = g_vpr_ctx.device(); auto node_type = device_ctx.rr_nodes[inode].type(); if (node_type == IPIN || node_type == SINK) node->re_expand = false; @@ -808,7 +832,6 @@ static t_trace* traceback_to_route_tree_branch(t_trace* trace, // // Each configurable edges from the non-configurable set is a // usage of the set. - auto& device_ctx = g_vpr_ctx.device(); auto set_itr = device_ctx.rr_node_to_non_config_node_set.find(inode); if (non_config_node_set_usage != nullptr && set_itr != device_ctx.rr_node_to_non_config_node_set.end()) { if (device_ctx.rr_switch_inf[iswitch].configurable()) { @@ -853,6 +876,7 @@ static std::pair traceback_from_route_tree_recurr(t_trace* h for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { t_trace* curr = alloc_trace_data(); curr->index = node->inode; + curr->ipin = node->ipin; curr->iswitch = edge->iswitch; curr->next = nullptr; @@ -873,6 +897,7 @@ static std::pair traceback_from_route_tree_recurr(t_trace* h //Leaf t_trace* curr = alloc_trace_data(); curr->index = node->inode; + curr->ipin = node->ipin; curr->iswitch = OPEN; curr->next = nullptr; @@ -1010,7 +1035,7 @@ static t_rt_node* prune_route_tree_recurr(t_rt_node* node, CBRR& connections_inf VTR_ASSERT(force_prune); //Record as not reached - connections_inf.toreach_rr_sink(node->inode); + connections_inf.toreach_rr_sink(node->ipin); free_rt_node(node); return nullptr; //Pruned @@ -1431,6 +1456,7 @@ init_route_tree_to_source_no_net(int inode) { rt_root->parent_switch = OPEN; rt_root->re_expand = true; rt_root->inode = inode; + rt_root->ipin = OPEN; rt_root->C_downstream = device_ctx.rr_nodes[inode].C(); rt_root->R_upstream = device_ctx.rr_nodes[inode].R(); rt_root->Tdel = 0.5 * device_ctx.rr_nodes[inode].R() * device_ctx.rr_nodes[inode].C(); @@ -1441,7 +1467,7 @@ init_route_tree_to_source_no_net(int inode) { bool verify_traceback_route_tree_equivalent(const t_trace* head, const t_rt_node* rt_root) { //Walk the route tree saving all the used connections - std::set> route_tree_connections; + std::multiset> route_tree_connections; collect_route_tree_connections(rt_root, route_tree_connections); //Remove the extra parent connection to root (not included in traceback) @@ -1461,7 +1487,7 @@ bool verify_traceback_route_tree_equivalent(const t_trace* head, const t_rt_node VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Route tree missing traceback connection: node %d -> %d (switch %d)\n", prev_node, to_node, prev_switch); } else { - route_tree_connections.erase(conn); //Remove found connections + route_tree_connections.erase(route_tree_connections.lower_bound(conn)); //Remove the first found connections } } @@ -1482,7 +1508,7 @@ bool verify_traceback_route_tree_equivalent(const t_trace* head, const t_rt_node return true; } -void collect_route_tree_connections(const t_rt_node* node, std::set>& connections) { +void collect_route_tree_connections(const t_rt_node* node, std::multiset>& connections) { if (node) { //Record reaching connection int prev_node = OPEN; diff --git a/vpr/src/route/route_tree_timing.h b/vpr/src/route/route_tree_timing.h index ac10b52459b..106efd27041 100644 --- a/vpr/src/route/route_tree_timing.h +++ b/vpr/src/route/route_tree_timing.h @@ -19,7 +19,7 @@ void free_route_tree(t_rt_node* rt_node); void print_route_tree(const t_rt_node* rt_node); void print_route_tree(const t_rt_node* rt_node, int depth); -t_rt_node* update_route_tree(t_heap* hptr, SpatialRouteTreeLookup* spatial_rt_lookup); +t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLookup* spatial_rt_lookup); void update_net_delays_from_route_tree(float* net_delay, const t_rt_node* const* rt_node_of_sink, diff --git a/vpr/src/route/route_tree_type.h b/vpr/src/route/route_tree_type.h index 7d663aafb9f..6f2408adac9 100644 --- a/vpr/src/route/route_tree_type.h +++ b/vpr/src/route/route_tree_type.h @@ -27,6 +27,8 @@ struct t_linked_rt_edge { * parent_switch: Index of the switch type driving this node (by its * * parent). * * inode: index (ID) of the rr_node that corresponds to this rt_node. * + * ipin: Pin index associated with the rt_node. Gives an unique identifier * + * or each rt_node. * * C_downstream: Total downstream capacitance from this rt_node. That is, * * the total C of the subtree rooted at the current node, * * including the C of the current node. * @@ -43,6 +45,7 @@ struct t_rt_node { short parent_switch; bool re_expand; int inode; + int ipin; float C_downstream; float R_upstream; float Tdel; diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index 008aa46b7de..79307342a40 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -74,7 +74,7 @@ bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const if (found_path) { VTR_ASSERT(cheapest.index == sink_node); - t_rt_node* rt_node_of_sink = update_route_tree(&cheapest, nullptr); + t_rt_node* rt_node_of_sink = update_route_tree(&cheapest, OPEN, nullptr); //find delay *net_delay = rt_node_of_sink->Tdel; @@ -143,7 +143,7 @@ std::vector calculate_all_path_delays_from_rr_node(int src_rr_node, const //Build the routing tree to get the delay rt_root = setup_routing_resources_no_net(src_rr_node); - t_rt_node* rt_node_of_sink = update_route_tree(&shortest_paths[sink_rr_node], nullptr); + t_rt_node* rt_node_of_sink = update_route_tree(&shortest_paths[sink_rr_node], OPEN, nullptr); VTR_ASSERT(rt_node_of_sink->inode == sink_rr_node); From e46fed46532e6d17ef6b6f0ad9bb4c75a4f732f8 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Fri, 21 Aug 2020 14:17:13 -0400 Subject: [PATCH 03/15] Fix bug in net_delay to allow pin lookup for net delay --- vpr/src/base/vpr_api.cpp | 4 +-- vpr/src/route/route_timing.cpp | 2 +- vpr/src/route/route_tree_timing.cpp | 3 +- vpr/src/timing/net_delay.cpp | 45 +++++++++++++++++------------ vpr/src/timing/net_delay.h | 2 +- 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 8b6ae4ffdf7..0f5e190f502 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -852,7 +852,7 @@ RouteStatus vpr_load_routing(t_vpr_setup& vpr_setup, if (vpr_setup.Timing.timing_analysis_enabled) { //Update timing info - load_net_delay_from_routing(net_delay); + load_net_delay_from_routing(net_delay, true); timing_info->update(); } @@ -1216,7 +1216,7 @@ void vpr_analysis(t_vpr_setup& vpr_setup, const t_arch& Arch, const RouteStatus& auto& cluster_ctx = g_vpr_ctx.clustering(); ClbNetPinsMatrix net_delay = make_net_pins_matrix(cluster_ctx.clb_nlist); - load_net_delay_from_routing(net_delay); + load_net_delay_from_routing(net_delay, true); //Do final timing analysis auto analysis_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, net_delay); diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index ac1776de8ee..55b253b5ada 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1470,7 +1470,7 @@ static bool timing_driven_check_net_delays(ClbNetPinsMatrix& net_delay) { unsigned int ipin; ClbNetPinsMatrix net_delay_check = make_net_pins_matrix(cluster_ctx.clb_nlist); - load_net_delay_from_routing(net_delay_check); + load_net_delay_from_routing(net_delay_check, false); for (auto net_id : cluster_ctx.clb_nlist.nets()) { for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { diff --git a/vpr/src/route/route_tree_timing.cpp b/vpr/src/route/route_tree_timing.cpp index c1ac6e18439..f7cf21c5b88 100644 --- a/vpr/src/route/route_tree_timing.cpp +++ b/vpr/src/route/route_tree_timing.cpp @@ -675,7 +675,8 @@ void print_route_tree(const t_rt_node* rt_node, int depth) { } auto& device_ctx = g_vpr_ctx.device(); - VTR_LOG("%srt_node: %d (%s) \t ipin: %d", indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin); + VTR_LOG("%srt_node: %d (%s) \t ipin: %d \t R: %g \t C: %g \t delay: %g", + indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin, rt_node->R_upstream, rt_node->C_downstream, rt_node->Tdel); if (rt_node->parent_switch != OPEN) { bool parent_edge_configurable = device_ctx.rr_switch_inf[rt_node->parent_switch].configurable(); diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index c97a1a88325..4656039f5a6 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -24,24 +24,24 @@ /********************** Variables local to this module ***********************/ -/* Unordered map below stores the pair whose key is the index of the rr_node * - * that corresponds to the rt_node, and whose value is the time delay * - * associated with that node. The map will be used to store delays while * - * traversing the nodes of the route tree in load_one_net_delay_recurr. */ +/* Unordered map below stores the pair whose key is either the index of the * + * rr_node or the pin that corresponds to the rt_node, and whose value is the * + * time delay associated with that node. The map will be used to store delays * + * while traversing the nodes of the route tree in load_one_net_delay_recurr. */ -static std::unordered_map inode_to_Tdel_map; +static std::unordered_map index_to_Tdel_map; /*********************** Subroutines local to this module ********************/ -static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id); +static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, bool analysis); -static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id); +static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id, bool analysis); static void load_one_constant_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, float delay_value); /*************************** Subroutine definitions **************************/ -void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay) { +void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay, bool analysis) { /* This routine loads net_delay[0..nets.size()-1][1..num_pins-1]. Each entry * * is the Elmore delay from the net source to the appropriate sink. Both * * the rr_graph and the routing traceback must be completely constructed * @@ -53,12 +53,12 @@ void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay) { if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { load_one_constant_net_delay(net_delay, net_id, 0.); } else { - load_one_net_delay(net_delay, net_id); + load_one_net_delay(net_delay, net_id, analysis); } } } -static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id) { +static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, bool analysis) { /* This routine loads delay values for one net in * * net_delay[net_id][1..num_pins-1]. First, from the traceback, it * * constructs the route tree and computes its values for R, C, and Tdel. * @@ -66,7 +66,7 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId * each node into the map inode_to_Tdel. Then, while looping through the * * net_delay array we search for the inode corresponding to the pin * * identifiers, and correspondingly update the entry in net_delay. * - * Finally, it frees the route tree and clears the inode_to_Tdel_map * + * Finally, it frees the route tree and clears the index_to_Tdel_map * * associated with that net. */ auto& route_ctx = g_vpr_ctx.routing(); @@ -83,26 +83,35 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId load_new_subtree_R_upstream(rt_root); // load in the resistance values for the route tree load_new_subtree_C_downstream(rt_root); // load in the capacitance values for the route tree load_route_tree_Tdel(rt_root, 0.); // load the time delay values for the route tree - load_one_net_delay_recurr(rt_root, net_id); // recursively traverse the tree and load entries into the inode_to_Tdel map + load_one_net_delay_recurr(rt_root, net_id, analysis); // recursively traverse the tree and load entries into the inode_to_Tdel map for (unsigned int ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { inode = route_ctx.net_rr_terminals[net_id][ipin]; // look for the index of the rr node that corresponds to the sink that was used to route a certain connection. - auto itr = inode_to_Tdel_map.find(inode); - VTR_ASSERT(itr != inode_to_Tdel_map.end()); + std::unordered_map::iterator itr; + if (analysis) { + itr = index_to_Tdel_map.find(inode); + } else { + itr = index_to_Tdel_map.find(ipin); + } + VTR_ASSERT(itr != index_to_Tdel_map.end()); net_delay[net_id][ipin] = itr->second; // search for the value of Tdel in the inode map and load into net_delay } free_route_tree(rt_root); // free the route tree - inode_to_Tdel_map.clear(); // clear the map + index_to_Tdel_map.clear(); // clear the map } -static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id) { +static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id, bool analysis) { /* This routine recursively traverses the route tree, and copies the Tdel of the node into the map. */ - inode_to_Tdel_map[node->inode] = node->Tdel; // add to the map, process current node + if (analysis) { + index_to_Tdel_map[node->inode] = node->Tdel; // add node index to the map, process current node during analysis + } else { + index_to_Tdel_map[node->ipin] = node->Tdel; // add pin index to the map, process current node during timing driven route net + } for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { // process children - load_one_net_delay_recurr(edge->child, net_id); + load_one_net_delay_recurr(edge->child, net_id, analysis); } } diff --git a/vpr/src/timing/net_delay.h b/vpr/src/timing/net_delay.h index 560b89c9841..c3ebe215423 100644 --- a/vpr/src/timing/net_delay.h +++ b/vpr/src/timing/net_delay.h @@ -5,6 +5,6 @@ #include "vtr_vector.h" #include "vpr_net_pins_matrix.h" -void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay); +void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay, bool analysis); #endif From f86140d522c1e81c91a70740fe23174ecdd6dde6 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Mon, 24 Aug 2020 23:25:04 -0400 Subject: [PATCH 04/15] Fixed deep copy of traceback and implemented better fix for net delay --- vpr/src/base/vpr_api.cpp | 4 ++-- vpr/src/timing/net_delay.cpp | 45 +++++++++++++++--------------------- vpr/src/timing/net_delay.h | 2 +- 3 files changed, 21 insertions(+), 30 deletions(-) diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 0f5e190f502..8b6ae4ffdf7 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -852,7 +852,7 @@ RouteStatus vpr_load_routing(t_vpr_setup& vpr_setup, if (vpr_setup.Timing.timing_analysis_enabled) { //Update timing info - load_net_delay_from_routing(net_delay, true); + load_net_delay_from_routing(net_delay); timing_info->update(); } @@ -1216,7 +1216,7 @@ void vpr_analysis(t_vpr_setup& vpr_setup, const t_arch& Arch, const RouteStatus& auto& cluster_ctx = g_vpr_ctx.clustering(); ClbNetPinsMatrix net_delay = make_net_pins_matrix(cluster_ctx.clb_nlist); - load_net_delay_from_routing(net_delay, true); + load_net_delay_from_routing(net_delay); //Do final timing analysis auto analysis_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, net_delay); diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index 4656039f5a6..c97a1a88325 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -24,24 +24,24 @@ /********************** Variables local to this module ***********************/ -/* Unordered map below stores the pair whose key is either the index of the * - * rr_node or the pin that corresponds to the rt_node, and whose value is the * - * time delay associated with that node. The map will be used to store delays * - * while traversing the nodes of the route tree in load_one_net_delay_recurr. */ +/* Unordered map below stores the pair whose key is the index of the rr_node * + * that corresponds to the rt_node, and whose value is the time delay * + * associated with that node. The map will be used to store delays while * + * traversing the nodes of the route tree in load_one_net_delay_recurr. */ -static std::unordered_map index_to_Tdel_map; +static std::unordered_map inode_to_Tdel_map; /*********************** Subroutines local to this module ********************/ -static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, bool analysis); +static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id); -static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id, bool analysis); +static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id); static void load_one_constant_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, float delay_value); /*************************** Subroutine definitions **************************/ -void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay, bool analysis) { +void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay) { /* This routine loads net_delay[0..nets.size()-1][1..num_pins-1]. Each entry * * is the Elmore delay from the net source to the appropriate sink. Both * * the rr_graph and the routing traceback must be completely constructed * @@ -53,12 +53,12 @@ void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay, bool analys if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { load_one_constant_net_delay(net_delay, net_id, 0.); } else { - load_one_net_delay(net_delay, net_id, analysis); + load_one_net_delay(net_delay, net_id); } } } -static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id, bool analysis) { +static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_id) { /* This routine loads delay values for one net in * * net_delay[net_id][1..num_pins-1]. First, from the traceback, it * * constructs the route tree and computes its values for R, C, and Tdel. * @@ -66,7 +66,7 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId * each node into the map inode_to_Tdel. Then, while looping through the * * net_delay array we search for the inode corresponding to the pin * * identifiers, and correspondingly update the entry in net_delay. * - * Finally, it frees the route tree and clears the index_to_Tdel_map * + * Finally, it frees the route tree and clears the inode_to_Tdel_map * * associated with that net. */ auto& route_ctx = g_vpr_ctx.routing(); @@ -83,35 +83,26 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId load_new_subtree_R_upstream(rt_root); // load in the resistance values for the route tree load_new_subtree_C_downstream(rt_root); // load in the capacitance values for the route tree load_route_tree_Tdel(rt_root, 0.); // load the time delay values for the route tree - load_one_net_delay_recurr(rt_root, net_id, analysis); // recursively traverse the tree and load entries into the inode_to_Tdel map + load_one_net_delay_recurr(rt_root, net_id); // recursively traverse the tree and load entries into the inode_to_Tdel map for (unsigned int ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { inode = route_ctx.net_rr_terminals[net_id][ipin]; // look for the index of the rr node that corresponds to the sink that was used to route a certain connection. - std::unordered_map::iterator itr; - if (analysis) { - itr = index_to_Tdel_map.find(inode); - } else { - itr = index_to_Tdel_map.find(ipin); - } - VTR_ASSERT(itr != index_to_Tdel_map.end()); + auto itr = inode_to_Tdel_map.find(inode); + VTR_ASSERT(itr != inode_to_Tdel_map.end()); net_delay[net_id][ipin] = itr->second; // search for the value of Tdel in the inode map and load into net_delay } free_route_tree(rt_root); // free the route tree - index_to_Tdel_map.clear(); // clear the map + inode_to_Tdel_map.clear(); // clear the map } -static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id, bool analysis) { +static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id) { /* This routine recursively traverses the route tree, and copies the Tdel of the node into the map. */ - if (analysis) { - index_to_Tdel_map[node->inode] = node->Tdel; // add node index to the map, process current node during analysis - } else { - index_to_Tdel_map[node->ipin] = node->Tdel; // add pin index to the map, process current node during timing driven route net - } + inode_to_Tdel_map[node->inode] = node->Tdel; // add to the map, process current node for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { // process children - load_one_net_delay_recurr(edge->child, net_id, analysis); + load_one_net_delay_recurr(edge->child, net_id); } } diff --git a/vpr/src/timing/net_delay.h b/vpr/src/timing/net_delay.h index c3ebe215423..560b89c9841 100644 --- a/vpr/src/timing/net_delay.h +++ b/vpr/src/timing/net_delay.h @@ -5,6 +5,6 @@ #include "vtr_vector.h" #include "vpr_net_pins_matrix.h" -void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay, bool analysis); +void load_net_delay_from_routing(ClbNetPinsMatrix& net_delay); #endif From 7e3c64c8cd20e374db5c325cd2560e3dc5c5957d Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Mon, 24 Aug 2020 23:28:33 -0400 Subject: [PATCH 05/15] Missed checking in these files in previous commit --- vpr/src/route/route_timing.cpp | 2 +- vpr/src/route/route_traceback.cpp | 3 ++- vpr/src/timing/net_delay.cpp | 40 +++++++++++++++---------------- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 55b253b5ada..ac1776de8ee 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1470,7 +1470,7 @@ static bool timing_driven_check_net_delays(ClbNetPinsMatrix& net_delay) { unsigned int ipin; ClbNetPinsMatrix net_delay_check = make_net_pins_matrix(cluster_ctx.clb_nlist); - load_net_delay_from_routing(net_delay_check, false); + load_net_delay_from_routing(net_delay_check); for (auto net_id : cluster_ctx.clb_nlist.nets()) { for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { diff --git a/vpr/src/route/route_traceback.cpp b/vpr/src/route/route_traceback.cpp index 0a1badf8c10..7df192f31b2 100644 --- a/vpr/src/route/route_traceback.cpp +++ b/vpr/src/route/route_traceback.cpp @@ -8,10 +8,11 @@ t_traceback::t_traceback(const t_traceback& other) { //Deep-copy of traceback t_trace* prev = nullptr; for (t_trace* other_curr = other.head; other_curr; other_curr = other_curr->next) { - //VTR_LOG("Copying trace %p node: %d switch: %d\n", other_curr, other_curr->index, other_curr->iswitch); + //VTR_LOG("Copying trace %p node: %d switch: %d pin(for sink): %d\n", other_curr, other_curr->index, other_curr->iswitch. other_curr->ipin); t_trace* curr = alloc_trace_data(); curr->index = other_curr->index; + curr->ipin = other_curr->ipin; curr->iswitch = other_curr->iswitch; if (prev) { diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index c97a1a88325..e3457d39fc9 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -24,12 +24,13 @@ /********************** Variables local to this module ***********************/ -/* Unordered map below stores the pair whose key is the index of the rr_node * - * that corresponds to the rt_node, and whose value is the time delay * - * associated with that node. The map will be used to store delays while * - * traversing the nodes of the route tree in load_one_net_delay_recurr. */ +/* Unordered map below stores the pair whose key is the pin index (ranging * + * from 1 to net fan-out) that corresponds to the rt_node, and whose value * + * is the time delay associated with that node. The map will be used to * + * store delays while traversing the nodes of the route tree in * + * load_one_net_delay_recurr. */ -static std::unordered_map inode_to_Tdel_map; +static std::unordered_map ipin_to_Tdel_map; /*********************** Subroutines local to this module ********************/ @@ -63,11 +64,10 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId * net_delay[net_id][1..num_pins-1]. First, from the traceback, it * * constructs the route tree and computes its values for R, C, and Tdel. * * Next, it walks the route tree recursively, storing the time delays for * - * each node into the map inode_to_Tdel. Then, while looping through the * - * net_delay array we search for the inode corresponding to the pin * - * identifiers, and correspondingly update the entry in net_delay. * - * Finally, it frees the route tree and clears the inode_to_Tdel_map * - * associated with that net. */ + * each sink into the map ipin_to_Tdel. Then, while looping through the * + * net_delay array we search for the pin index in the map, and * + * correspondingly update the entry in net_delay. Finally, it frees the * + * route tree and clears the ipin_to_Tdel_map associated with that net. */ auto& route_ctx = g_vpr_ctx.routing(); @@ -77,29 +77,29 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId } auto& cluster_ctx = g_vpr_ctx.clustering(); - int inode; t_rt_node* rt_root = traceback_to_route_tree(net_id); // obtain the root of the tree constructed from the traceback load_new_subtree_R_upstream(rt_root); // load in the resistance values for the route tree load_new_subtree_C_downstream(rt_root); // load in the capacitance values for the route tree load_route_tree_Tdel(rt_root, 0.); // load the time delay values for the route tree - load_one_net_delay_recurr(rt_root, net_id); // recursively traverse the tree and load entries into the inode_to_Tdel map + load_one_net_delay_recurr(rt_root, net_id); // recursively traverse the tree and load entries into the ipin_to_Tdel map for (unsigned int ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { - inode = route_ctx.net_rr_terminals[net_id][ipin]; // look for the index of the rr node that corresponds to the sink that was used to route a certain connection. - auto itr = inode_to_Tdel_map.find(inode); - VTR_ASSERT(itr != inode_to_Tdel_map.end()); + auto itr = ipin_to_Tdel_map.find(ipin); + VTR_ASSERT(itr != ipin_to_Tdel_map.end()); - net_delay[net_id][ipin] = itr->second; // search for the value of Tdel in the inode map and load into net_delay + net_delay[net_id][ipin] = itr->second; // search for the value of Tdel in the ipin map and load into net_delay } free_route_tree(rt_root); // free the route tree - inode_to_Tdel_map.clear(); // clear the map + ipin_to_Tdel_map.clear(); // clear the map } static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id) { - /* This routine recursively traverses the route tree, and copies the Tdel of the node into the map. */ - - inode_to_Tdel_map[node->inode] = node->Tdel; // add to the map, process current node + /* This routine recursively traverses the route tree, and copies the Tdel of the sink_type nodes * + * into the map. */ + if (node->ipin != OPEN) { + ipin_to_Tdel_map[node->ipin] = node->Tdel; // add to the map, process current sink-type node + } for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { // process children load_one_net_delay_recurr(edge->child, net_id); From eb2caa1dbfaed4a07a0b2e5bc91cbeb10b0fb8a2 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Mon, 24 Aug 2020 23:47:11 -0400 Subject: [PATCH 06/15] set global clock net_pin index to -1 (illegal) as it isn't meaningful for this sink --- vpr/src/route/route_timing.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index ac1776de8ee..ed02af6d0d7 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -89,7 +89,6 @@ static bool timing_driven_pre_route_to_clock_root( ConnectionRouter& router, ClusterNetId net_id, int sink_node, - int sink_pin, const t_conn_cost_params cost_params, int high_fanout_threshold, t_rt_node* rt_root, @@ -998,8 +997,6 @@ bool timing_driven_route_net(ConnectionRouter& router, if (cluster_ctx.clb_nlist.net_is_global(net_id) && router_opts.two_stage_clock_routing) { //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); int sink_node = device_ctx.virtual_clock_network_root_idx; - auto& rr_sink_node_to_pin = connections_inf.get_rr_sink_node_to_pin(); - int sink_pin = rr_sink_node_to_pin[net_id][sink_node]; //clock net sink nodes all have unique node IDs so this mapping can be used enable_router_debug(router_opts, net_id, sink_node, itry, &router); @@ -1012,7 +1009,6 @@ bool timing_driven_route_net(ConnectionRouter& router, router, net_id, sink_node, - sink_pin, cost_params, router_opts.high_fanout_threshold, rt_root, @@ -1097,7 +1093,6 @@ static bool timing_driven_pre_route_to_clock_root( ConnectionRouter& router, ClusterNetId net_id, int sink_node, - int sink_pin, const t_conn_cost_params cost_params, int high_fanout_threshold, t_rt_node* rt_root, @@ -1150,9 +1145,14 @@ static bool timing_driven_pre_route_to_clock_root( * lets me reuse all the routines written for breadth-first routing, which * * all take a traceback structure as input. */ - t_trace* new_route_start_tptr = update_traceback(&cheapest, sink_pin, net_id); + /* This is a special pre-route to a sink that does not correspond to any * + * netlist pin, but which can be reached from the global clock root drive * + * points. Therefore, we can set the net pin index of the sink node to * + * OPEN (meaning illegal) as it is not meaningful for this sink. */ + + t_trace* new_route_start_tptr = update_traceback(&cheapest, OPEN, net_id); VTR_ASSERT_DEBUG(validate_traceback(route_ctx.trace[net_id].head)); - update_route_tree(&cheapest, sink_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr)); + update_route_tree(&cheapest, OPEN, ((high_fanout) ? &spatial_rt_lookup : nullptr)); VTR_ASSERT_DEBUG(verify_route_tree(rt_root)); VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(rt_root, spatial_rt_lookup)); From 81e8fb4bff5c1e98ab60acbb9eb32b8624dddc9d Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Tue, 25 Aug 2020 00:31:38 -0400 Subject: [PATCH 07/15] Comments update --- vpr/src/base/vpr_types.h | 11 +++++++++-- vpr/src/route/route_tree_type.h | 9 +++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 23aa26e5eed..bfba146318c 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1315,8 +1315,15 @@ typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_i * @brief Basic element used to store the traceback (routing) of each net. * * @param index Array index (ID) of this routing resource node. - * @param index Index of the pin for a sink node. Only used for rt_node - * tree traceback. Otherwise value is OPEN. + * @param ipin: Net pin index associated with the node. This value * + * ranges from 1 to fanout [1..num_pins-1]. For cases when * + * different speed paths are taken to the same sink for * + * different pins, node index cannot uniquely identify * + * each sink, so the net pin index guarentees an unique * + * identification for each sink-type node. For non-sink- * + * type nodes and for sink-type nodes with no associated * + * net pin index, the value for this member should be set * + * to OPEN (-1). * * @param iswitch Index of the switch type used to go from this rr_node to * the next one in the routing. OPEN if there is no next node * (i.e. this node is the last one (a SINK) in a branch of the diff --git a/vpr/src/route/route_tree_type.h b/vpr/src/route/route_tree_type.h index 6f2408adac9..20339f5969b 100644 --- a/vpr/src/route/route_tree_type.h +++ b/vpr/src/route/route_tree_type.h @@ -27,8 +27,13 @@ struct t_linked_rt_edge { * parent_switch: Index of the switch type driving this node (by its * * parent). * * inode: index (ID) of the rr_node that corresponds to this rt_node. * - * ipin: Pin index associated with the rt_node. Gives an unique identifier * - * or each rt_node. * + * ipin: Net pin index associated with the rt_node. This value ranges from * + * 1 to fanout [1..num_pins-1]. For cases when different speed paths * + * are taken to the same sink for different pins, inode cannot * + * uniquely identify each sink, so the net pin index guarentees an * + * unique identification for each sink-type rt_node. For non-sink- * + * type nodes and for sink-type nodes with no associated net pin * + * index, the value for this member should be set to OPEN (-1). * * C_downstream: Total downstream capacitance from this rt_node. That is, * * the total C of the subtree rooted at the current node, * * including the C of the current node. * From a5602dcb2e2577389ed05f8dfb03dd0f0be0e06c Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Tue, 25 Aug 2020 03:49:54 -0400 Subject: [PATCH 08/15] print and load net pin index for sink nodes --- vpr/src/base/read_route.cpp | 14 +++++++++++++- vpr/src/route/route_common.cpp | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 042baffcd92..bdcb1cbad7d 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -216,7 +216,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file /*remember the position of the last line in order to go back*/ std::streampos oldpos = fp.tellg(); - int inode, x, y, x2, y2, ptc, switch_id, offset; + int inode, x, y, x2, y2, ptc, switch_id, net_pin_id, offset; std::string prev_type; int node_count = 0; std::string input; @@ -348,10 +348,21 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file switch_id = atoi(tokens[7 + offset].c_str()); } + /*Process net pin index for sinks*/ + if (tokens[2] == "SINK") { + if (tokens[8 + offset] == "Net_pin_index:") { + net_pin_id = atoi(tokens[9 + offset].c_str()); + } else { + vpr_throw(VPR_ERROR_ROUTE, filename, lineno, + "%d (sink) node does not have net pin index", inode); + } + } + /* Allocate and load correct values to trace.head*/ if (node_count == 0) { route_ctx.trace[inet].head = alloc_trace_data(); route_ctx.trace[inet].head->index = inode; + route_ctx.trace[inet].head->ipin = net_pin_id; route_ctx.trace[inet].head->iswitch = switch_id; route_ctx.trace[inet].head->next = nullptr; tptr = route_ctx.trace[inet].head; @@ -360,6 +371,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file tptr->next = alloc_trace_data(); tptr = tptr->next; tptr->index = inode; + tptr->ipin = net_pin_id; tptr->iswitch = switch_id; tptr->next = nullptr; node_count++; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index c93888289d2..a72191cd36a 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -1294,6 +1294,11 @@ void print_route(FILE* fp, const vtr::vector& traceba * used in the routing. */ fprintf(fp, "Switch: %d", tptr->iswitch); + //Save net pin index for sinks + if (rr_type == SINK) { + fprintf(fp, " Net_pin_index: %d", tptr->ipin); + } + fprintf(fp, "\n"); tptr = tptr->next; From 52d0386b7047b65ec8d2559d5d88c214f5319b8b Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Wed, 26 Aug 2020 21:04:50 -0400 Subject: [PATCH 09/15] revert changes that are tracked under another pull request --- utils/vqm2blif/src/base/cleanup.cpp | 191 ------------------ utils/vqm2blif/src/base/cleanup.h | 5 +- utils/vqm2blif/src/main.cpp | 4 +- vpr/src/base/SetupGrid.cpp | 22 +- vtr_flow/arch/titan/stratixiv_arch.timing.xml | 44 +--- 5 files changed, 9 insertions(+), 257 deletions(-) diff --git a/utils/vqm2blif/src/base/cleanup.cpp b/utils/vqm2blif/src/base/cleanup.cpp index 9ec028cf32e..e4234600bc3 100644 --- a/utils/vqm2blif/src/base/cleanup.cpp +++ b/utils/vqm2blif/src/base/cleanup.cpp @@ -12,7 +12,6 @@ void build_netlist (t_module* module, busvec* buses, s_hash** hash_table); void init_nets (t_pin_def** pins, int num_pins, busvec* buses, struct s_hash** hash_table); void set_net_assigns (t_assign** assignments, int num_assigns, busvec* buses, struct s_hash** hash_table); void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); -void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ); void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ); void reassign_net_source (t_net* net); void print_to_module ( t_module* module, busvec* buses, struct s_hash** hash_table ); @@ -22,9 +21,6 @@ netvec* get_bus_from_hash (struct s_hash** hash_table, char* temp_name, busvec* void verify_netlist ( t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); void print_all_nets ( busvec* buses, const char* filename ); -bool is_onelut ( t_node* node ); -void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ); - //============================================================================================ //============================================================================================ @@ -43,22 +39,16 @@ void netlist_cleanup (t_module* module){ cout << "\t>> VQM Netlist contains " << buffer_count << " buffers.\n" ; cout << "\t>> VQM Netlist contains " << invert_count << " invertors.\n" ; - cout << "\t>> VQM Netlist contains " << onelut_count << " one-LUTs.\n" ; //Verify that the initial netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); - cout << "\t>> Removing One-LUTs" << "...\n"; - - remove_one_lut_nodes ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes, module ); - cout << "\t>> Removing buffered nets" << ((clean_mode == CL_BUFF)? "":" and inverted subckt inputs") << "...\n"; clean_netlist ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes ); cout << "\t>> Removed " << buffers_elim << " buffers of " << buffer_count << ".\n" ; cout << "\t>> Removed " << inverts_elim << " invertors of " << invert_count << ".\n" ; - cout << "\t>> Removed " << oneluts_elim << " one-LUTs of " << onelut_count << ".\n" ; //Verify that the final modified netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); @@ -205,13 +195,8 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** t_node* temp_node; t_node_port_association* temp_port; - onelut_count = 0; - for (int i = 0; i < num_nodes; i++){ temp_node = nodes[i]; - if(is_onelut(temp_node)){ - onelut_count++; - } for (int j = 0; j < temp_node->number_of_ports; j++){ temp_port = temp_node->array_of_ports[j]; @@ -238,123 +223,6 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** //============================================================================================ //============================================================================================ -void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ){ -/* - Go through all nodes, if a node's source net is the sink of a one-LUT, there are two cases: - 1. The one-LUT has an input and an output: - Re-associate the node with the source net of the one-LUT, then remove the one-LUT and the node's original source net - 2. The one-LUT just has an output (provides VCC to its sink): - Re-associate the node with the VCC net, then remove the one-LUT and the node's original source net -*/ - oneluts_elim = 0; - - t_node* temp_node; - t_node_port_association* temp_port; - netvec* temp_bus; - t_net* temp_net; - - t_node* source_node; - t_node_port_association* source_port; - t_node_port_association* prev_port; - netvec* prev_bus; - t_net* prev_net; - - netvec* vcc_bus = get_bus_from_hash (hash_table, const_cast("vcc"), buses); - VTR_ASSERT(vcc_bus != NULL); - t_net* vcc_net = &(vcc_bus->at(0)); //Find any VCC net - - for (int i = 0; i < original_num_nodes; i++){ - temp_node = nodes[i]; - if (temp_node == NULL) { //Node was deleted during a previous iteration - continue; - } - for (int j = 0; j < temp_node->number_of_ports; j++){ - temp_port = temp_node->array_of_ports[j]; - temp_bus = get_bus_from_hash (hash_table, temp_port->associated_net->name, buses); - VTR_ASSERT((unsigned int)temp_port->wire_index < temp_bus->size()); - temp_net = &(temp_bus->at(temp_port->wire_index)); - - if (temp_port != (t_node_port_association*)temp_net->source){ - //Must be an input port - if (temp_net->driver == BLACKBOX && is_onelut(temp_net->block_src) && temp_net->num_children == 1){ - source_port = (t_node_port_association*)temp_net->source; //The output port of the one-LUT - source_node = temp_net->block_src; //The one-LUT - - //Re-associate temp_port with the appropriate net - if(source_node->number_of_ports == 2){ - //For one-LUT with an input and an output, find the net before the one_LUT and associate temp_port with that net instead - VTR_ASSERT(source_node->number_of_ports == 2); - for (int k = 0; k < source_node->number_of_ports; k++){ - prev_port = source_node->array_of_ports[k]; - if(prev_port != source_port) { - //The input port of the one-LUT - prev_bus = get_bus_from_hash (hash_table, prev_port->associated_net->name, buses); - VTR_ASSERT((unsigned int)prev_port->wire_index < prev_bus->size()); - prev_net = &(prev_bus->at(prev_port->wire_index)); //Net associated with the input port - } - } - temp_port->associated_net = prev_net->pin; - temp_port->wire_index = prev_net->wire_index; - } else { - //For one-LUT with just an output, associate temp_port with VCC instead - VTR_ASSERT(source_node->number_of_ports == 1); //If is_onelut==true, there are only 1 or 2 ports - VTR_ASSERT(vcc_net != NULL); //Should have a VCC - temp_port->associated_net = vcc_net->pin; - temp_port->wire_index = vcc_net->wire_index; - vcc_net->num_children++; - } - - //Remove temp_net - temp_net->num_children--; - temp_net->source = NULL; - temp_net->driver = NODRIVE; - - //Free the LUT - remove_node(source_node, nodes, original_num_nodes); - - } - } - } - } - - //Regorganize nodes array by filling in gaps with the last available elements in the array to save CPU time - int new_array_size = original_num_nodes - oneluts_elim; - int curr_node_index = 0; - int replacement_node_index = original_num_nodes - 1; - while (curr_node_index < replacement_node_index) { - if (nodes[curr_node_index] == NULL) { - if (nodes[replacement_node_index] != NULL) { - //Replace gap with node - nodes[curr_node_index] = nodes[replacement_node_index]; - nodes[replacement_node_index] = NULL; - curr_node_index++; - } - replacement_node_index--; - } else { - curr_node_index++; - } - } - if (nodes[curr_node_index] == NULL) { - VTR_ASSERT(curr_node_index == new_array_size); //check array size - } else { - VTR_ASSERT(curr_node_index == new_array_size - 1); //check array size - } - - //Update array bounds - module -> number_of_nodes = new_array_size; - - //Reduce run-time by only verifying at the end - //verify_netlist (nodes, module->number_of_nodes, buses, hash_table); - -#ifdef CLEAN_DEBUG - cout << "\t\t>> Dumping to all_buff.out\n" ; - print_all_nets(buses, "all_buff.out"); -#endif -} - -//============================================================================================ -//============================================================================================ - void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ){ netvec* temp_bus; @@ -759,64 +627,5 @@ void print_all_nets ( busvec* buses, const char* filename ){ outfile.close(); } - -//============================================================================================ -//============================================================================================ - -bool is_onelut ( t_node* node ) { - if(node == NULL) return false; - - //Hardcoded for Stratix IV - string node_name = node->name; - string node_name_ending; - if (node_name.length() >= 8){ - node_name_ending = node_name.substr(node_name.length()-8); - } else { - node_name_ending = node_name; - } - -#ifdef CLEAN_DEBUG - cout << "\t\t Node Type: " << node->type << "\t" << "Node Name Ending: " << node_name_ending << "\t" << "Num of Ports: " << node->number_of_ports <<"\n"; -#endif - - //Only LUTs with 1 port (1 output port) or 2 ports (1 input and 1 output) are considered one-luts - if (node->number_of_ports == 1 || node->number_of_ports == 2){ - //Only stratixiv_lcell_comb one-LUTs that end in "feeder" can be removed at this stage - if (node->type == string("stratixiv_lcell_comb") && node_name_ending == string("feeder_I")) { - return true; - } - } - - return false; -} - -//============================================================================================ -//============================================================================================ - -void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ) { - //Free node and assign it to NULL on the spot - //Array will be re-organized to fill in the gaps later - - VTR_ASSERT(node != NULL); - VTR_ASSERT(nodes != NULL); - -#ifdef CLEAN_DEBUG - cout << "\t\t\t Removing " << node->name << "\n"; -#endif - bool found = false; - - for (int i = 0; i < original_num_nodes; i++){ - if(nodes[i] == node){ - free_node( (void*)nodes[i] ); - nodes[i] = NULL; - found = true; - break; - } - } - - VTR_ASSERT(found); - oneluts_elim++; -} - //============================================================================================ //============================================================================================ diff --git a/utils/vqm2blif/src/base/cleanup.h b/utils/vqm2blif/src/base/cleanup.h index 8fb60bba0cb..9d699feece9 100644 --- a/utils/vqm2blif/src/base/cleanup.h +++ b/utils/vqm2blif/src/base/cleanup.h @@ -8,14 +8,13 @@ #include "vqm2blif_util.h" #include "lut_recog.h" -#include "vqm_common.h" //============================================================================================ // GLOBALS //============================================================================================ -extern int buffer_count, invert_count, onelut_count; -extern int buffers_elim, inverts_elim, oneluts_elim; +extern int buffer_count, invert_count; +extern int buffers_elim, inverts_elim; void netlist_cleanup (t_module* module); diff --git a/utils/vqm2blif/src/main.cpp b/utils/vqm2blif/src/main.cpp index cfc9f033f9a..495a3c864df 100644 --- a/utils/vqm2blif/src/main.cpp +++ b/utils/vqm2blif/src/main.cpp @@ -109,8 +109,8 @@ e_elab elab_mode; //user-set flag dictating how to elaborate a VQM Primitive e_lut lut_mode; //user-set flag dictating how to treat LUTs (as blackboxes or .names) -int buffer_count, invert_count, onelut_count; -int buffers_elim, inverts_elim, oneluts_elim; +int buffer_count, invert_count; +int buffers_elim, inverts_elim; e_clean clean_mode; diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp index 584a5db6776..479f8f055d9 100644 --- a/vpr/src/base/SetupGrid.cpp +++ b/vpr/src/base/SetupGrid.cpp @@ -150,17 +150,6 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one "); - //Determine maximum device size to try before concluding that the circuit cannot fit on any device - //Calculate total number of required instances - //Then multiply by a factor of 100 as overhead - size_t max_size; - size_t total_minimum_instance_counts = 0; - for (auto& inst : minimum_instance_counts) { - size_t count = inst.second; - total_minimum_instance_counts += count; - } - max_size = total_minimum_instance_counts * 10000; - const auto& grid_def = *auto_layout_itr; VTR_ASSERT(grid_def.aspect_ratio >= 0.); @@ -170,7 +159,6 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo size_t width = 3; size_t height = 3; std::vector limiting_resources; - size_t grid_size = 0; do { //Scale opposite dimension to match aspect ratio height = vtr::nint(width / grid_def.aspect_ratio); @@ -195,18 +183,10 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo limiting_resources = grid_overused_resources(grid, minimum_instance_counts); - //Determine grid size - grid_size = width * height; - //Increase the grid size width++; - } while (grid_size < max_size); - - //Maximum device size reached - VPR_FATAL_ERROR(VPR_ERROR_OTHER, - "Device auto-fit aborted: device size already exceeds required resources count by 100 times yet still cannot fit the design. " - "Might be using more instances of a particular type of resource than the StratixIV devices can support (e.g. PLLs)\n"); + } while (true); } else { VTR_ASSERT(auto_layout_itr == grid_layouts.end()); diff --git a/vtr_flow/arch/titan/stratixiv_arch.timing.xml b/vtr_flow/arch/titan/stratixiv_arch.timing.xml index cc3685c593c..ca0b67a68a1 100644 --- a/vtr_flow/arch/titan/stratixiv_arch.timing.xml +++ b/vtr_flow/arch/titan/stratixiv_arch.timing.xml @@ -4408,14 +4408,6 @@ - - - - - - - - @@ -4737,9 +4729,8 @@ - @@ -4762,18 +4753,10 @@ - - - - - - - - @@ -4803,9 +4786,8 @@ - @@ -4828,18 +4810,10 @@ - - - - - - - - @@ -6189,16 +6163,6 @@ - - - - - - - - - - From 3435ba17bf16de4591b9e93ab2b1528e25bd3b11 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Wed, 26 Aug 2020 21:12:33 -0400 Subject: [PATCH 10/15] Format fix --- vpr/src/route/route_timing.cpp | 2 -- vpr/src/route/route_tree_timing.cpp | 4 ++-- vpr/src/timing/net_delay.cpp | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index ed02af6d0d7..0bb717c9730 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1202,8 +1202,6 @@ static bool timing_driven_route_sink( profiling::sink_criticality_start(); int sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; -auto& device_ctx = g_vpr_ctx.device(); -VTR_ASSERT(device_ctx.rr_nodes[sink_node].type() == SINK); VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(sink_node).c_str()); VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); diff --git a/vpr/src/route/route_tree_timing.cpp b/vpr/src/route/route_tree_timing.cpp index f7cf21c5b88..6d899d8b026 100644 --- a/vpr/src/route/route_tree_timing.cpp +++ b/vpr/src/route/route_tree_timing.cpp @@ -302,7 +302,7 @@ add_subtree_to_route_tree(t_heap* hptr, int target_pin, t_rt_node** sink_rt_node downstream_rt_node = sink_rt_node; std::unordered_set main_branch_visited; //does not include sink - std::unordered_set all_visited; //does not include sink + std::unordered_set all_visited; //does not include sink inode = hptr->prev_node(); RREdgeId edge = hptr->prev_edge(); short iswitch = device_ctx.rr_nodes.edge_switch(edge); @@ -676,7 +676,7 @@ void print_route_tree(const t_rt_node* rt_node, int depth) { auto& device_ctx = g_vpr_ctx.device(); VTR_LOG("%srt_node: %d (%s) \t ipin: %d \t R: %g \t C: %g \t delay: %g", - indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin, rt_node->R_upstream, rt_node->C_downstream, rt_node->Tdel); + indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin, rt_node->R_upstream, rt_node->C_downstream, rt_node->Tdel); if (rt_node->parent_switch != OPEN) { bool parent_edge_configurable = device_ctx.rr_switch_inf[rt_node->parent_switch].configurable(); diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index e3457d39fc9..5cd48b115d8 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -90,7 +90,7 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId net_delay[net_id][ipin] = itr->second; // search for the value of Tdel in the ipin map and load into net_delay } - free_route_tree(rt_root); // free the route tree + free_route_tree(rt_root); // free the route tree ipin_to_Tdel_map.clear(); // clear the map } From 24f3326ad8cba2d1b98ba2c024f7539ade7091fa Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Wed, 26 Aug 2020 22:55:01 -0400 Subject: [PATCH 11/15] Fix bfs and compiler warning in read_route --- utils/vqm2blif/src/base/cleanup.cpp | 224 ++++++++++++++++++ utils/vqm2blif/src/base/cleanup.h | 5 +- utils/vqm2blif/src/main.cpp | 4 +- vpr/src/base/SetupGrid.cpp | 27 ++- vpr/src/base/read_route.cpp | 2 + vpr/src/route/route_breadth_first.cpp | 3 +- vtr_flow/arch/titan/stratixiv_arch.timing.xml | 45 +++- 7 files changed, 300 insertions(+), 10 deletions(-) diff --git a/utils/vqm2blif/src/base/cleanup.cpp b/utils/vqm2blif/src/base/cleanup.cpp index e4234600bc3..8e86fc4c210 100644 --- a/utils/vqm2blif/src/base/cleanup.cpp +++ b/utils/vqm2blif/src/base/cleanup.cpp @@ -12,6 +12,7 @@ void build_netlist (t_module* module, busvec* buses, s_hash** hash_table); void init_nets (t_pin_def** pins, int num_pins, busvec* buses, struct s_hash** hash_table); void set_net_assigns (t_assign** assignments, int num_assigns, busvec* buses, struct s_hash** hash_table); void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); +void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ); void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ); void reassign_net_source (t_net* net); void print_to_module ( t_module* module, busvec* buses, struct s_hash** hash_table ); @@ -21,6 +22,9 @@ netvec* get_bus_from_hash (struct s_hash** hash_table, char* temp_name, busvec* void verify_netlist ( t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); void print_all_nets ( busvec* buses, const char* filename ); +bool is_feeder_onelut ( t_node* node ); +void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ); + //============================================================================================ //============================================================================================ @@ -39,16 +43,22 @@ void netlist_cleanup (t_module* module){ cout << "\t>> VQM Netlist contains " << buffer_count << " buffers.\n" ; cout << "\t>> VQM Netlist contains " << invert_count << " invertors.\n" ; + cout << "\t>> VQM Netlist contains " << onelut_count << " one-LUTs.\n" ; //Verify that the initial netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); + cout << "\t>> Removing One-LUTs" << "...\n"; + + remove_one_lut_nodes ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes, module ); + cout << "\t>> Removing buffered nets" << ((clean_mode == CL_BUFF)? "":" and inverted subckt inputs") << "...\n"; clean_netlist ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes ); cout << "\t>> Removed " << buffers_elim << " buffers of " << buffer_count << ".\n" ; cout << "\t>> Removed " << inverts_elim << " invertors of " << invert_count << ".\n" ; + cout << "\t>> Removed " << oneluts_elim << " one-LUTs of " << onelut_count << ".\n" ; //Verify that the final modified netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); @@ -195,8 +205,13 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** t_node* temp_node; t_node_port_association* temp_port; + onelut_count = 0; + for (int i = 0; i < num_nodes; i++){ temp_node = nodes[i]; + if(is_feeder_onelut(temp_node)){ + onelut_count++; + } for (int j = 0; j < temp_node->number_of_ports; j++){ temp_port = temp_node->array_of_ports[j]; @@ -223,6 +238,156 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** //============================================================================================ //============================================================================================ +void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ){ +/* + Quartus fitter may have introduced some one-LUTs in the post-fit netlist that makes it harder for VPR to place and route. + Generally, these one-LUTs are inserted by the Quartus router in order to pass a signal through a LUT to the FF in the same + BLE. For Stratix IV, the names of these one-LUTs all end with the substring "feeder". This function serves to remove the + feeder one LUTs from the netlist, if they exist, before converting it into the BLIF file format. + + Go through all nodes, if a node's source net is driven by a one-LUT-type node and if this source net only has one + child (the node itself): + + 1. If the one-LUT has an input and an output, the one-LUT acts as either a buffer LUT or as an inverter, but we do not + check as we only care about structure in this converter, not logical functionality. + Re-associate the node's input port with the source net of the one-LUT, then remove the one-LUT and the node's original + source net. + + ----- ------- ----- + | X |---> net m ---> | LUT | ---> net n ---> | Y | + ----- ------- ----- + + becomes + + ----- ----- + | X |---> net m ---> | Y | + ----- ----- + + 2. If the one-LUT has just an output (feeds VCC downstream): + Re-associate the node with the VCC net, then remove the one-LUT and the node's original source net. + + ------- ----- + | LUT | ---> net m ---> | Y | + ------- ----- + + becomes + + ------- ----- + | VCC |---> net v -------> | Y | + ------- | ----- + ----> + | + ----> +*/ + oneluts_elim = 0; + + t_node* temp_node; + t_node_port_association* temp_port; + netvec* temp_bus; + t_net* temp_net; + + t_node* source_node; + t_node_port_association* source_port; + t_node_port_association* prev_port; + netvec* prev_bus; + t_net* prev_net; + + netvec* vcc_bus = get_bus_from_hash (hash_table, const_cast("vcc"), buses); + VTR_ASSERT(vcc_bus != NULL); + t_net* vcc_net = &(vcc_bus->at(0)); //Find any VCC net + + for (int i = 0; i < original_num_nodes; i++){ + temp_node = nodes[i]; + if (temp_node == NULL) { //Node was deleted during a previous iteration + continue; + } + for (int j = 0; j < temp_node->number_of_ports; j++){ + temp_port = temp_node->array_of_ports[j]; + temp_bus = get_bus_from_hash (hash_table, temp_port->associated_net->name, buses); + VTR_ASSERT((unsigned int)temp_port->wire_index < temp_bus->size()); + temp_net = &(temp_bus->at(temp_port->wire_index)); + + if (temp_port != (t_node_port_association*)temp_net->source){ + //Must be an input port + if (temp_net->driver == BLACKBOX && is_feeder_onelut(temp_net->block_src) && temp_net->num_children == 1){ + source_port = (t_node_port_association*)temp_net->source; //The output port of the one-LUT + source_node = temp_net->block_src; //The one-LUT + + //Re-associate temp_port with the appropriate net + if(source_node->number_of_ports == 2){ + //For one-LUT with an input and an output, find the net before the one_LUT and associate temp_port with that net instead + VTR_ASSERT(source_node->number_of_ports == 2); + for (int k = 0; k < source_node->number_of_ports; k++){ + prev_port = source_node->array_of_ports[k]; + if(prev_port != source_port) { + //The input port of the one-LUT + prev_bus = get_bus_from_hash (hash_table, prev_port->associated_net->name, buses); + VTR_ASSERT((unsigned int)prev_port->wire_index < prev_bus->size()); + prev_net = &(prev_bus->at(prev_port->wire_index)); //Net associated with the input port + } + } + temp_port->associated_net = prev_net->pin; + temp_port->wire_index = prev_net->wire_index; + } else { + //For one-LUT with just an output, associate temp_port with VCC instead + VTR_ASSERT(source_node->number_of_ports == 1); //If is_feeder_onelut==true, there are only 1 or 2 ports + VTR_ASSERT(vcc_net != NULL); //Should have a VCC + temp_port->associated_net = vcc_net->pin; + temp_port->wire_index = vcc_net->wire_index; + vcc_net->num_children++; + } + + //Remove temp_net + temp_net->num_children--; + temp_net->source = NULL; + temp_net->driver = NODRIVE; + + //Free the LUT + remove_node(source_node, nodes, original_num_nodes); + + } + } + } + } + + //Regorganize nodes array by filling in gaps with the last available elements in the array to save CPU time + int new_array_size = original_num_nodes - oneluts_elim; + int curr_node_index = 0; + int replacement_node_index = original_num_nodes - 1; + while (curr_node_index < replacement_node_index) { + if (nodes[curr_node_index] == NULL) { + if (nodes[replacement_node_index] != NULL) { + //Replace gap with node + nodes[curr_node_index] = nodes[replacement_node_index]; + nodes[replacement_node_index] = NULL; + curr_node_index++; + } + replacement_node_index--; + } else { + curr_node_index++; + } + } + if (nodes[curr_node_index] == NULL) { + VTR_ASSERT(curr_node_index == new_array_size); //check array size + } else { + VTR_ASSERT(curr_node_index == new_array_size - 1); //check array size + } + + //Update array bounds + module -> number_of_nodes = new_array_size; + + //Reduce run-time by only verifying at the end + //verify_netlist (nodes, module->number_of_nodes, buses, hash_table); + +#ifdef CLEAN_DEBUG + cout << "\t\t>> Dumping to all_buff.out\n" ; + print_all_nets(buses, "all_buff.out"); +#endif +} + +//============================================================================================ +//============================================================================================ + void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ){ netvec* temp_bus; @@ -627,5 +792,64 @@ void print_all_nets ( busvec* buses, const char* filename ){ outfile.close(); } + +//============================================================================================ +//============================================================================================ + +bool is_feeder_onelut ( t_node* node ) { + if(node == NULL) return false; + + //Hardcoded for Stratix IV + string node_name = node->name; + string node_name_ending; + if (node_name.length() >= 8){ + node_name_ending = node_name.substr(node_name.length()-8); + } else { + node_name_ending = node_name; + } + +#ifdef CLEAN_DEBUG + cout << "\t\t Node Type: " << node->type << "\t" << "Node Name Ending: " << node_name_ending << "\t" << "Num of Ports: " << node->number_of_ports <<"\n"; +#endif + + //Only LUTs with 1 port (1 output port) or 2 ports (1 input and 1 output) are considered one-luts + if (node->number_of_ports == 1 || node->number_of_ports == 2){ + //Only stratixiv_lcell_comb one-LUTs that end in "feeder" can be removed at this stage + if (node->type == string("stratixiv_lcell_comb") && node_name_ending == string("feeder_I")) { + return true; + } + } + + return false; +} + +//============================================================================================ +//============================================================================================ + +void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ) { + //Free node and assign it to NULL on the spot + //Array will be re-organized to fill in the gaps later + + VTR_ASSERT(node != NULL); + VTR_ASSERT(nodes != NULL); + +#ifdef CLEAN_DEBUG + cout << "\t\t\t Removing " << node->name << "\n"; +#endif + bool found = false; + + for (int i = 0; i < original_num_nodes; i++){ + if(nodes[i] == node){ + free_node( (void*)nodes[i] ); + nodes[i] = NULL; + found = true; + break; + } + } + + VTR_ASSERT(found); + oneluts_elim++; +} + //============================================================================================ //============================================================================================ diff --git a/utils/vqm2blif/src/base/cleanup.h b/utils/vqm2blif/src/base/cleanup.h index 9d699feece9..8fb60bba0cb 100644 --- a/utils/vqm2blif/src/base/cleanup.h +++ b/utils/vqm2blif/src/base/cleanup.h @@ -8,13 +8,14 @@ #include "vqm2blif_util.h" #include "lut_recog.h" +#include "vqm_common.h" //============================================================================================ // GLOBALS //============================================================================================ -extern int buffer_count, invert_count; -extern int buffers_elim, inverts_elim; +extern int buffer_count, invert_count, onelut_count; +extern int buffers_elim, inverts_elim, oneluts_elim; void netlist_cleanup (t_module* module); diff --git a/utils/vqm2blif/src/main.cpp b/utils/vqm2blif/src/main.cpp index 495a3c864df..cfc9f033f9a 100644 --- a/utils/vqm2blif/src/main.cpp +++ b/utils/vqm2blif/src/main.cpp @@ -109,8 +109,8 @@ e_elab elab_mode; //user-set flag dictating how to elaborate a VQM Primitive e_lut lut_mode; //user-set flag dictating how to treat LUTs (as blackboxes or .names) -int buffer_count, invert_count; -int buffers_elim, inverts_elim; +int buffer_count, invert_count, onelut_count; +int buffers_elim, inverts_elim, oneluts_elim; e_clean clean_mode; diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp index 479f8f055d9..a17977d35e5 100644 --- a/vpr/src/base/SetupGrid.cpp +++ b/vpr/src/base/SetupGrid.cpp @@ -24,6 +24,8 @@ #include "SetupGrid.h" #include "vtr_expr_eval.h" +#define MAX_SIZE_FACTOR 10000 + using vtr::FormulaParser; using vtr::t_formula_data; @@ -150,6 +152,19 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one "); + //Determine maximum device size to try before concluding that the circuit cannot fit on any device + //Calculate total number of required instances + //Then multiply by a factor of MAX_SIZE_FACTOR as overhead + //This is to avoid infinite loop if increasing the grid size never gets you more of the instance + //type you need and hence never lets you fit the design + size_t max_size; + size_t total_minimum_instance_counts = 0; + for (auto& inst : minimum_instance_counts) { + size_t count = inst.second; + total_minimum_instance_counts += count; + } + max_size = total_minimum_instance_counts * MAX_SIZE_FACTOR; + const auto& grid_def = *auto_layout_itr; VTR_ASSERT(grid_def.aspect_ratio >= 0.); @@ -159,6 +174,7 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo size_t width = 3; size_t height = 3; std::vector limiting_resources; + size_t grid_size = 0; do { //Scale opposite dimension to match aspect ratio height = vtr::nint(width / grid_def.aspect_ratio); @@ -183,10 +199,19 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo limiting_resources = grid_overused_resources(grid, minimum_instance_counts); + //Determine grid size + grid_size = width * height; + //Increase the grid size width++; - } while (true); + } while (grid_size < max_size); + + //Maximum device size reached + VPR_FATAL_ERROR(VPR_ERROR_OTHER, + "Device auto-fit aborted: device size already exceeds required resources count by %d times yet still cannot fit the design. " + "This may be due to resources that do not grow as the grid size increases (e.g. PLLs in the Titan Stratix IV architecture capture).\n", + MAX_SIZE_FACTOR); } else { VTR_ASSERT(auto_layout_itr == grid_layouts.end()); diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index bdcb1cbad7d..970e6965948 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -356,6 +356,8 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file vpr_throw(VPR_ERROR_ROUTE, filename, lineno, "%d (sink) node does not have net pin index", inode); } + } else { + net_pin_id = OPEN; } /* Allocate and load correct values to trace.head*/ diff --git a/vpr/src/route/route_breadth_first.cpp b/vpr/src/route/route_breadth_first.cpp index 39bee911471..e115ba9d5da 100644 --- a/vpr/src/route/route_breadth_first.cpp +++ b/vpr/src/route/route_breadth_first.cpp @@ -265,7 +265,8 @@ static bool breadth_first_route_net(BinaryHeap& heap, ClusterNetId net_id, float route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ remaining_connections_to_sink = route_ctx.rr_node_route_inf[inode].target_flag; - tptr = update_traceback(current, OPEN, net_id); + size_t ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); + tptr = update_traceback(current, ipin, net_id); heap.free(current); } diff --git a/vtr_flow/arch/titan/stratixiv_arch.timing.xml b/vtr_flow/arch/titan/stratixiv_arch.timing.xml index ca0b67a68a1..02d4db9dfe8 100644 --- a/vtr_flow/arch/titan/stratixiv_arch.timing.xml +++ b/vtr_flow/arch/titan/stratixiv_arch.timing.xml @@ -4408,6 +4408,15 @@ + + + + + + + + + @@ -4729,8 +4738,9 @@ - @@ -4753,10 +4763,18 @@ + + + + + + + + @@ -4786,8 +4804,9 @@ - @@ -4810,10 +4829,18 @@ + + + + + + + + @@ -6163,6 +6190,16 @@ + + + + + + + + + + From 8a5a5a87341cb91f1be2ee3cb3432adcb53334c7 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Wed, 26 Aug 2020 23:06:56 -0400 Subject: [PATCH 12/15] revert changes that are tracked under another pull request (accidentally checked in last commit) --- utils/vqm2blif/src/base/cleanup.cpp | 224 ------------------ utils/vqm2blif/src/base/cleanup.h | 5 +- utils/vqm2blif/src/main.cpp | 4 +- vpr/src/base/SetupGrid.cpp | 27 +-- vtr_flow/arch/titan/stratixiv_arch.timing.xml | 45 +--- 5 files changed, 9 insertions(+), 296 deletions(-) diff --git a/utils/vqm2blif/src/base/cleanup.cpp b/utils/vqm2blif/src/base/cleanup.cpp index 8e86fc4c210..e4234600bc3 100644 --- a/utils/vqm2blif/src/base/cleanup.cpp +++ b/utils/vqm2blif/src/base/cleanup.cpp @@ -12,7 +12,6 @@ void build_netlist (t_module* module, busvec* buses, s_hash** hash_table); void init_nets (t_pin_def** pins, int num_pins, busvec* buses, struct s_hash** hash_table); void set_net_assigns (t_assign** assignments, int num_assigns, busvec* buses, struct s_hash** hash_table); void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); -void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ); void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ); void reassign_net_source (t_net* net); void print_to_module ( t_module* module, busvec* buses, struct s_hash** hash_table ); @@ -22,9 +21,6 @@ netvec* get_bus_from_hash (struct s_hash** hash_table, char* temp_name, busvec* void verify_netlist ( t_node** nodes, int num_nodes, busvec* buses, struct s_hash** hash_table); void print_all_nets ( busvec* buses, const char* filename ); -bool is_feeder_onelut ( t_node* node ); -void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ); - //============================================================================================ //============================================================================================ @@ -43,22 +39,16 @@ void netlist_cleanup (t_module* module){ cout << "\t>> VQM Netlist contains " << buffer_count << " buffers.\n" ; cout << "\t>> VQM Netlist contains " << invert_count << " invertors.\n" ; - cout << "\t>> VQM Netlist contains " << onelut_count << " one-LUTs.\n" ; //Verify that the initial netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); - cout << "\t>> Removing One-LUTs" << "...\n"; - - remove_one_lut_nodes ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes, module ); - cout << "\t>> Removing buffered nets" << ((clean_mode == CL_BUFF)? "":" and inverted subckt inputs") << "...\n"; clean_netlist ( &buses, hash_table, module->array_of_nodes, module->number_of_nodes ); cout << "\t>> Removed " << buffers_elim << " buffers of " << buffer_count << ".\n" ; cout << "\t>> Removed " << inverts_elim << " invertors of " << invert_count << ".\n" ; - cout << "\t>> Removed " << oneluts_elim << " one-LUTs of " << onelut_count << ".\n" ; //Verify that the final modified netlist is ok verify_netlist ( module->array_of_nodes, module->number_of_nodes, &buses, hash_table ); @@ -205,13 +195,8 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** t_node* temp_node; t_node_port_association* temp_port; - onelut_count = 0; - for (int i = 0; i < num_nodes; i++){ temp_node = nodes[i]; - if(is_feeder_onelut(temp_node)){ - onelut_count++; - } for (int j = 0; j < temp_node->number_of_ports; j++){ temp_port = temp_node->array_of_ports[j]; @@ -238,156 +223,6 @@ void add_subckts (t_node** nodes, int num_nodes, busvec* buses, struct s_hash** //============================================================================================ //============================================================================================ -void remove_one_lut_nodes ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int original_num_nodes, t_module* module ){ -/* - Quartus fitter may have introduced some one-LUTs in the post-fit netlist that makes it harder for VPR to place and route. - Generally, these one-LUTs are inserted by the Quartus router in order to pass a signal through a LUT to the FF in the same - BLE. For Stratix IV, the names of these one-LUTs all end with the substring "feeder". This function serves to remove the - feeder one LUTs from the netlist, if they exist, before converting it into the BLIF file format. - - Go through all nodes, if a node's source net is driven by a one-LUT-type node and if this source net only has one - child (the node itself): - - 1. If the one-LUT has an input and an output, the one-LUT acts as either a buffer LUT or as an inverter, but we do not - check as we only care about structure in this converter, not logical functionality. - Re-associate the node's input port with the source net of the one-LUT, then remove the one-LUT and the node's original - source net. - - ----- ------- ----- - | X |---> net m ---> | LUT | ---> net n ---> | Y | - ----- ------- ----- - - becomes - - ----- ----- - | X |---> net m ---> | Y | - ----- ----- - - 2. If the one-LUT has just an output (feeds VCC downstream): - Re-associate the node with the VCC net, then remove the one-LUT and the node's original source net. - - ------- ----- - | LUT | ---> net m ---> | Y | - ------- ----- - - becomes - - ------- ----- - | VCC |---> net v -------> | Y | - ------- | ----- - ----> - | - ----> -*/ - oneluts_elim = 0; - - t_node* temp_node; - t_node_port_association* temp_port; - netvec* temp_bus; - t_net* temp_net; - - t_node* source_node; - t_node_port_association* source_port; - t_node_port_association* prev_port; - netvec* prev_bus; - t_net* prev_net; - - netvec* vcc_bus = get_bus_from_hash (hash_table, const_cast("vcc"), buses); - VTR_ASSERT(vcc_bus != NULL); - t_net* vcc_net = &(vcc_bus->at(0)); //Find any VCC net - - for (int i = 0; i < original_num_nodes; i++){ - temp_node = nodes[i]; - if (temp_node == NULL) { //Node was deleted during a previous iteration - continue; - } - for (int j = 0; j < temp_node->number_of_ports; j++){ - temp_port = temp_node->array_of_ports[j]; - temp_bus = get_bus_from_hash (hash_table, temp_port->associated_net->name, buses); - VTR_ASSERT((unsigned int)temp_port->wire_index < temp_bus->size()); - temp_net = &(temp_bus->at(temp_port->wire_index)); - - if (temp_port != (t_node_port_association*)temp_net->source){ - //Must be an input port - if (temp_net->driver == BLACKBOX && is_feeder_onelut(temp_net->block_src) && temp_net->num_children == 1){ - source_port = (t_node_port_association*)temp_net->source; //The output port of the one-LUT - source_node = temp_net->block_src; //The one-LUT - - //Re-associate temp_port with the appropriate net - if(source_node->number_of_ports == 2){ - //For one-LUT with an input and an output, find the net before the one_LUT and associate temp_port with that net instead - VTR_ASSERT(source_node->number_of_ports == 2); - for (int k = 0; k < source_node->number_of_ports; k++){ - prev_port = source_node->array_of_ports[k]; - if(prev_port != source_port) { - //The input port of the one-LUT - prev_bus = get_bus_from_hash (hash_table, prev_port->associated_net->name, buses); - VTR_ASSERT((unsigned int)prev_port->wire_index < prev_bus->size()); - prev_net = &(prev_bus->at(prev_port->wire_index)); //Net associated with the input port - } - } - temp_port->associated_net = prev_net->pin; - temp_port->wire_index = prev_net->wire_index; - } else { - //For one-LUT with just an output, associate temp_port with VCC instead - VTR_ASSERT(source_node->number_of_ports == 1); //If is_feeder_onelut==true, there are only 1 or 2 ports - VTR_ASSERT(vcc_net != NULL); //Should have a VCC - temp_port->associated_net = vcc_net->pin; - temp_port->wire_index = vcc_net->wire_index; - vcc_net->num_children++; - } - - //Remove temp_net - temp_net->num_children--; - temp_net->source = NULL; - temp_net->driver = NODRIVE; - - //Free the LUT - remove_node(source_node, nodes, original_num_nodes); - - } - } - } - } - - //Regorganize nodes array by filling in gaps with the last available elements in the array to save CPU time - int new_array_size = original_num_nodes - oneluts_elim; - int curr_node_index = 0; - int replacement_node_index = original_num_nodes - 1; - while (curr_node_index < replacement_node_index) { - if (nodes[curr_node_index] == NULL) { - if (nodes[replacement_node_index] != NULL) { - //Replace gap with node - nodes[curr_node_index] = nodes[replacement_node_index]; - nodes[replacement_node_index] = NULL; - curr_node_index++; - } - replacement_node_index--; - } else { - curr_node_index++; - } - } - if (nodes[curr_node_index] == NULL) { - VTR_ASSERT(curr_node_index == new_array_size); //check array size - } else { - VTR_ASSERT(curr_node_index == new_array_size - 1); //check array size - } - - //Update array bounds - module -> number_of_nodes = new_array_size; - - //Reduce run-time by only verifying at the end - //verify_netlist (nodes, module->number_of_nodes, buses, hash_table); - -#ifdef CLEAN_DEBUG - cout << "\t\t>> Dumping to all_buff.out\n" ; - print_all_nets(buses, "all_buff.out"); -#endif -} - -//============================================================================================ -//============================================================================================ - void clean_netlist ( busvec* buses, struct s_hash** hash_table, t_node** nodes, int num_nodes ){ netvec* temp_bus; @@ -792,64 +627,5 @@ void print_all_nets ( busvec* buses, const char* filename ){ outfile.close(); } - -//============================================================================================ -//============================================================================================ - -bool is_feeder_onelut ( t_node* node ) { - if(node == NULL) return false; - - //Hardcoded for Stratix IV - string node_name = node->name; - string node_name_ending; - if (node_name.length() >= 8){ - node_name_ending = node_name.substr(node_name.length()-8); - } else { - node_name_ending = node_name; - } - -#ifdef CLEAN_DEBUG - cout << "\t\t Node Type: " << node->type << "\t" << "Node Name Ending: " << node_name_ending << "\t" << "Num of Ports: " << node->number_of_ports <<"\n"; -#endif - - //Only LUTs with 1 port (1 output port) or 2 ports (1 input and 1 output) are considered one-luts - if (node->number_of_ports == 1 || node->number_of_ports == 2){ - //Only stratixiv_lcell_comb one-LUTs that end in "feeder" can be removed at this stage - if (node->type == string("stratixiv_lcell_comb") && node_name_ending == string("feeder_I")) { - return true; - } - } - - return false; -} - -//============================================================================================ -//============================================================================================ - -void remove_node ( t_node* node, t_node** nodes, int original_num_nodes ) { - //Free node and assign it to NULL on the spot - //Array will be re-organized to fill in the gaps later - - VTR_ASSERT(node != NULL); - VTR_ASSERT(nodes != NULL); - -#ifdef CLEAN_DEBUG - cout << "\t\t\t Removing " << node->name << "\n"; -#endif - bool found = false; - - for (int i = 0; i < original_num_nodes; i++){ - if(nodes[i] == node){ - free_node( (void*)nodes[i] ); - nodes[i] = NULL; - found = true; - break; - } - } - - VTR_ASSERT(found); - oneluts_elim++; -} - //============================================================================================ //============================================================================================ diff --git a/utils/vqm2blif/src/base/cleanup.h b/utils/vqm2blif/src/base/cleanup.h index 8fb60bba0cb..9d699feece9 100644 --- a/utils/vqm2blif/src/base/cleanup.h +++ b/utils/vqm2blif/src/base/cleanup.h @@ -8,14 +8,13 @@ #include "vqm2blif_util.h" #include "lut_recog.h" -#include "vqm_common.h" //============================================================================================ // GLOBALS //============================================================================================ -extern int buffer_count, invert_count, onelut_count; -extern int buffers_elim, inverts_elim, oneluts_elim; +extern int buffer_count, invert_count; +extern int buffers_elim, inverts_elim; void netlist_cleanup (t_module* module); diff --git a/utils/vqm2blif/src/main.cpp b/utils/vqm2blif/src/main.cpp index cfc9f033f9a..495a3c864df 100644 --- a/utils/vqm2blif/src/main.cpp +++ b/utils/vqm2blif/src/main.cpp @@ -109,8 +109,8 @@ e_elab elab_mode; //user-set flag dictating how to elaborate a VQM Primitive e_lut lut_mode; //user-set flag dictating how to treat LUTs (as blackboxes or .names) -int buffer_count, invert_count, onelut_count; -int buffers_elim, inverts_elim, oneluts_elim; +int buffer_count, invert_count; +int buffers_elim, inverts_elim; e_clean clean_mode; diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp index a17977d35e5..479f8f055d9 100644 --- a/vpr/src/base/SetupGrid.cpp +++ b/vpr/src/base/SetupGrid.cpp @@ -24,8 +24,6 @@ #include "SetupGrid.h" #include "vtr_expr_eval.h" -#define MAX_SIZE_FACTOR 10000 - using vtr::FormulaParser; using vtr::t_formula_data; @@ -152,19 +150,6 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one "); - //Determine maximum device size to try before concluding that the circuit cannot fit on any device - //Calculate total number of required instances - //Then multiply by a factor of MAX_SIZE_FACTOR as overhead - //This is to avoid infinite loop if increasing the grid size never gets you more of the instance - //type you need and hence never lets you fit the design - size_t max_size; - size_t total_minimum_instance_counts = 0; - for (auto& inst : minimum_instance_counts) { - size_t count = inst.second; - total_minimum_instance_counts += count; - } - max_size = total_minimum_instance_counts * MAX_SIZE_FACTOR; - const auto& grid_def = *auto_layout_itr; VTR_ASSERT(grid_def.aspect_ratio >= 0.); @@ -174,7 +159,6 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo size_t width = 3; size_t height = 3; std::vector limiting_resources; - size_t grid_size = 0; do { //Scale opposite dimension to match aspect ratio height = vtr::nint(width / grid_def.aspect_ratio); @@ -199,19 +183,10 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo limiting_resources = grid_overused_resources(grid, minimum_instance_counts); - //Determine grid size - grid_size = width * height; - //Increase the grid size width++; - } while (grid_size < max_size); - - //Maximum device size reached - VPR_FATAL_ERROR(VPR_ERROR_OTHER, - "Device auto-fit aborted: device size already exceeds required resources count by %d times yet still cannot fit the design. " - "This may be due to resources that do not grow as the grid size increases (e.g. PLLs in the Titan Stratix IV architecture capture).\n", - MAX_SIZE_FACTOR); + } while (true); } else { VTR_ASSERT(auto_layout_itr == grid_layouts.end()); diff --git a/vtr_flow/arch/titan/stratixiv_arch.timing.xml b/vtr_flow/arch/titan/stratixiv_arch.timing.xml index 02d4db9dfe8..ca0b67a68a1 100644 --- a/vtr_flow/arch/titan/stratixiv_arch.timing.xml +++ b/vtr_flow/arch/titan/stratixiv_arch.timing.xml @@ -4408,15 +4408,6 @@ - - - - - - - - - @@ -4738,9 +4729,8 @@ - @@ -4763,18 +4753,10 @@ - - - - - - - - @@ -4804,9 +4786,8 @@ - @@ -4829,18 +4810,10 @@ - - - - - - - - @@ -6190,16 +6163,6 @@ - - - - - - - - - - From 19e519688596b2d37a057afe72aa8e4a7fbab0e2 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Fri, 28 Aug 2020 06:04:58 -0400 Subject: [PATCH 13/15] Changes made based on suggestions from pull request --- vpr/src/base/read_route.cpp | 17 ++++--- vpr/src/base/vpr_types.h | 21 ++++---- vpr/src/route/connection_based_routing.cpp | 28 +---------- vpr/src/route/connection_based_routing.h | 10 ---- vpr/src/route/route_common.cpp | 45 +++++++++-------- vpr/src/route/route_common.h | 5 +- vpr/src/route/route_timing.cpp | 3 +- vpr/src/route/route_traceback.cpp | 4 +- vpr/src/route/route_tree_timing.cpp | 57 +++++++++++++--------- vpr/src/route/route_tree_timing.h | 2 +- vpr/src/route/route_tree_type.h | 19 +++++--- vpr/src/timing/net_delay.cpp | 4 +- 12 files changed, 101 insertions(+), 114 deletions(-) diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 970e6965948..5a16f4d7c48 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -216,7 +216,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file /*remember the position of the last line in order to go back*/ std::streampos oldpos = fp.tellg(); - int inode, x, y, x2, y2, ptc, switch_id, net_pin_id, offset; + int inode, x, y, x2, y2, ptc, switch_id, net_pin_index, offset; std::string prev_type; int node_count = 0; std::string input; @@ -348,23 +348,26 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file switch_id = atoi(tokens[7 + offset].c_str()); } - /*Process net pin index for sinks*/ + /* Process net pin index for sinks * + * If you have an old .route file, it may not have this information * + * Please check your .route file to see if it contains Net_pin_index * + * information for sinks. If not, plrase re-generate the routing. */ if (tokens[2] == "SINK") { if (tokens[8 + offset] == "Net_pin_index:") { - net_pin_id = atoi(tokens[9 + offset].c_str()); + net_pin_index = atoi(tokens[9 + offset].c_str()); } else { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, - "%d (sink) node does not have net pin index", inode); + "%d (sink) node does not have net pin index. If you are using an old .route file without this information, please re-generate the routing.", inode); } } else { - net_pin_id = OPEN; + net_pin_index = OPEN; //net pin index is invalid for non-SINKs } /* Allocate and load correct values to trace.head*/ if (node_count == 0) { route_ctx.trace[inet].head = alloc_trace_data(); route_ctx.trace[inet].head->index = inode; - route_ctx.trace[inet].head->ipin = net_pin_id; + route_ctx.trace[inet].head->net_pin_index = net_pin_index; route_ctx.trace[inet].head->iswitch = switch_id; route_ctx.trace[inet].head->next = nullptr; tptr = route_ctx.trace[inet].head; @@ -373,7 +376,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file tptr->next = alloc_trace_data(); tptr = tptr->next; tptr->index = inode; - tptr->ipin = net_pin_id; + tptr->net_pin_index = net_pin_index; tptr->iswitch = switch_id; tptr->next = nullptr; node_count++; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index bfba146318c..b88e9f36184 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1315,15 +1315,16 @@ typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_i * @brief Basic element used to store the traceback (routing) of each net. * * @param index Array index (ID) of this routing resource node. - * @param ipin: Net pin index associated with the node. This value * - * ranges from 1 to fanout [1..num_pins-1]. For cases when * - * different speed paths are taken to the same sink for * - * different pins, node index cannot uniquely identify * - * each sink, so the net pin index guarentees an unique * - * identification for each sink-type node. For non-sink- * - * type nodes and for sink-type nodes with no associated * - * net pin index, the value for this member should be set * - * to OPEN (-1). * + * @param net_pin_index: Net pin index associated with the node. This value + * ranges from 1 to fanout [1..num_pins-1]. For cases when + * different speed paths are taken to the same SINK for + * different pins, node index cannot uniquely identify + * each SINK, so the net pin index guarantees an unique + * identification for each SINK node. For non-SINK nodes + * and for SINK nodes with no associated net pin index + * (i.e. special SINKs like the source of a clock tree + * which do not correspond to an actual netlist connection), + * the value for this member should be set to OPEN (-1). * @param iswitch Index of the switch type used to go from this rr_node to * the next one in the routing. OPEN if there is no next node * (i.e. this node is the last one (a SINK) in a branch of the @@ -1333,7 +1334,7 @@ typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_i struct t_trace { t_trace* next; int index; - int ipin = OPEN; + int net_pin_index = OPEN; short iswitch; }; diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index a46b7fbe6a2..a35a8d35ac3 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -12,8 +12,6 @@ Connection_based_routing_resources::Connection_based_routing_resources() , connection_criticality_tolerance{0.9f} , connection_delay_optimality_tolerance{1.1f} { /* Initialize the persistent data structures for incremental rerouting - * this includes rr_sink_node_to_pin, which provides pin lookup given a - * sink node for a specific net. * * remaining_targets will reserve enough space to ensure it won't need * to grow while storing the sinks that still need routing after pruning @@ -31,18 +29,14 @@ Connection_based_routing_resources::Connection_based_routing_resources() reached_rt_sinks.reserve(max_sink_pins_per_net); size_t routing_num_nets = cluster_ctx.clb_nlist.nets().size(); - rr_sink_node_to_pin.resize(routing_num_nets); lower_bound_connection_delay.resize(routing_num_nets); forcible_reroute_connection_flag.resize(routing_num_nets); for (auto net_id : cluster_ctx.clb_nlist.nets()) { - // unordered_map net_node_to_pin; - auto& net_node_to_pin = rr_sink_node_to_pin[net_id]; auto& net_lower_bound_connection_delay = lower_bound_connection_delay[net_id]; auto& net_forcible_reroute_connection_flag = forcible_reroute_connection_flag[net_id]; - unsigned int num_pins = cluster_ctx.clb_nlist.net_pins(net_id).size(); - net_node_to_pin.reserve(num_pins); // not looking up on the SOURCE pin + unsigned int num_pins = cluster_ctx.clb_nlist.net_pins(net_id).size(); // not looking up on the SOURCE pin net_lower_bound_connection_delay.resize(num_pins, std::numeric_limits::infinity()); // will be filled in after the 1st iteration's net_forcible_reroute_connection_flag.reserve(num_pins); // all false to begin with @@ -50,31 +44,11 @@ Connection_based_routing_resources::Connection_based_routing_resources() // rr sink node index corresponding to this connection terminal auto rr_sink_node = route_ctx.net_rr_terminals[net_id][ipin]; - net_node_to_pin.insert({rr_sink_node, ipin}); net_forcible_reroute_connection_flag.insert({rr_sink_node, false}); } } } -bool Connection_based_routing_resources::sanity_check_lookup() const { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& route_ctx = g_vpr_ctx.routing(); - - for (auto net_id : cluster_ctx.clb_nlist.nets()) { - const auto& net_node_to_pin = rr_sink_node_to_pin[net_id]; - - for (auto mapping : net_node_to_pin) { - auto sanity = net_node_to_pin.find(mapping.first); - if (sanity == net_node_to_pin.end()) { - VTR_LOG("%d cannot find itself (net %lu)\n", mapping.first, size_t(net_id)); - return false; - } - VTR_ASSERT(route_ctx.net_rr_terminals[net_id][mapping.second] == mapping.first); - } - } - return true; -} - void Connection_based_routing_resources::set_lower_bound_connection_delays(ClbNetPinsMatrix& net_delay) { /* Set the lower bound connection delays after first iteration, which only optimizes for timing delay. * This will be used later to judge the optimality of a connection, with suboptimal ones being candidates diff --git a/vpr/src/route/connection_based_routing.h b/vpr/src/route/connection_based_routing.h index cea2d34caa5..1179e044daf 100644 --- a/vpr/src/route/connection_based_routing.h +++ b/vpr/src/route/connection_based_routing.h @@ -15,15 +15,6 @@ // reroute only the connections to the ones that did not have a legal connection the previous time class Connection_based_routing_resources { // Incremental reroute resources -------------- - // conceptually works like rr_sink_node_to_pin[inet][sink_rr_node_index] to get the pin index for that net - // each net maps SINK node index -> PIN index for net - // only need to be built once at the start since the SINK nodes never change - // the reverse lookup of route_ctx.net_rr_terminals - // be careful: it is possible for multiple sinks to share the same node index in some cases. - // rt_nodes already have pin index stored as a member, so in most cases, you do not - // need this lookup. Only use this if necessary and if you are sure that a node index - // can uniquely identify the node. - vtr::vector> rr_sink_node_to_pin; // a property of each net, but only valid after pruning the previous route tree // the "targets" in question can be either rr_node indices or pin indices, the @@ -46,7 +37,6 @@ class Connection_based_routing_resources { // get a handle on the resources std::vector& get_remaining_targets() { return remaining_targets; } std::vector& get_reached_rt_sinks() { return reached_rt_sinks; } - vtr::vector>& get_rr_sink_node_to_pin() { return rr_sink_node_to_pin; }; bool sanity_check_lookup() const; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index a72191cd36a..9547a09b543 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -86,7 +86,7 @@ static int num_linked_f_pointer_allocated = 0; * */ /******************** Subroutines local to route_common.c *******************/ -static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_set& main_branch_visited); +static t_trace_branch traceback_branch(int node, int target_net_pin_index, std::unordered_set& main_branch_visited); static std::pair add_trace_non_configurable(t_trace* head, t_trace* tail, int node, std::unordered_set& visited); static std::pair add_trace_non_configurable_recurr(int node, std::unordered_set& visited, int depth = 0); @@ -494,26 +494,28 @@ void init_route_structs(int bb_factor) { route_ctx.net_status.resize(cluster_ctx.clb_nlist.nets().size()); } -t_trace* update_traceback(t_heap* hptr, int target_pin, ClusterNetId net_id) { - /* This routine adds the most recently finished wire segment to the * - * traceback linked list. The first connection starts with the net SOURCE * - * and begins at the structure pointed to by route_ctx.trace[net_id].head. * - * Each connection ends with a SINK. After each SINK, the next connection * - * begins (if the net has more than 2 pins). The first element after the * - * SINK gives the routing node on a previous piece of the routing, which is * - * the link from the existing net to this new piece of the net. * - * In each traceback I start at the end of a path and trace back through * - * its predecessors to the beginning. I have stored information on the * - * predecesser of each node to make traceback easy -- this sacrificies some * - * memory for easier code maintenance. This routine returns a pointer to * - * the first "new" node in the traceback (node not previously in trace). */ +/* This routine adds the most recently finished wire segment to the * + * traceback linked list. The first connection starts with the net SOURCE * + * and begins at the structure pointed to by route_ctx.trace[net_id].head. * + * Each connection ends with a SINK. After each SINK, the next connection * + * begins (if the net has more than 2 pins). The first element after the * + * SINK gives the routing node on a previous piece of the routing, which is * + * the link from the existing net to this new piece of the net. * + * In each traceback I start at the end of a path, which is a SINK with * + * target_net_pin_index (net pin index corresponding to the SINK, ranging * + * from 1 to fanout), and trace back through its predecessors to the * + * beginning. I have stored information on the predecesser of each node to * + * make traceback easy -- this sacrificies some memory for easier code * + * maintenance. This routine returns a pointer to the first "new" node in * + * the traceback (node not previously in trace). */ +t_trace* update_traceback(t_heap* hptr, int target_net_pin_index, ClusterNetId net_id) { auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& trace_nodes = route_ctx.trace_nodes[net_id]; VTR_ASSERT_SAFE(validate_trace_nodes(route_ctx.trace[net_id].head, trace_nodes)); - t_trace_branch branch = traceback_branch(hptr->index, target_pin, trace_nodes); + t_trace_branch branch = traceback_branch(hptr->index, target_net_pin_index, trace_nodes); VTR_ASSERT_SAFE(validate_trace_nodes(branch.head, trace_nodes)); @@ -530,9 +532,10 @@ t_trace* update_traceback(t_heap* hptr, int target_pin, ClusterNetId net_id) { return (ret_ptr); } -//Traces back a new routing branch starting from the specified 'node' and working backwards to any existing routing. +//Traces back a new routing branch starting from the specified SINK 'node' with target_net_pin_index, which is the +//net pin index corresponding to the SINK (ranging from 1 to fanout), and working backwards to any existing routing. //Returns the new branch, and also updates trace_nodes for any new nodes which are included in the branches traceback. -static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_set& trace_nodes) { +static t_trace_branch traceback_branch(int node, int target_net_pin_index, std::unordered_set& trace_nodes) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.routing(); @@ -547,7 +550,7 @@ static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_ t_trace* branch_head = alloc_trace_data(); t_trace* branch_tail = branch_head; branch_head->index = node; - branch_head->ipin = target_pin; + branch_head->net_pin_index = target_net_pin_index; //The first node is the SINK node, so store its net pin index branch_head->iswitch = OPEN; branch_head->next = nullptr; @@ -562,7 +565,7 @@ static t_trace_branch traceback_branch(int node, int target_pin, std::unordered_ //Add the current node to the head of traceback t_trace* prev_ptr = alloc_trace_data(); prev_ptr->index = inode; - prev_ptr->ipin = OPEN; + prev_ptr->net_pin_index = OPEN; //Net pin index is invalid for Non-SINK nodes prev_ptr->iswitch = device_ctx.rr_nodes.edge_switch(iedge); prev_ptr->next = branch_head; branch_head = prev_ptr; @@ -1199,7 +1202,7 @@ alloc_trace_data() { trace_free_head->next = nullptr; } temp_ptr = trace_free_head; - temp_ptr->ipin = OPEN; //default + temp_ptr->net_pin_index = OPEN; //default trace_free_head = trace_free_head->next; num_trace_allocated++; return (temp_ptr); @@ -1296,7 +1299,7 @@ void print_route(FILE* fp, const vtr::vector& traceba //Save net pin index for sinks if (rr_type == SINK) { - fprintf(fp, " Net_pin_index: %d", tptr->ipin); + fprintf(fp, " Net_pin_index: %d", tptr->net_pin_index); } fprintf(fp, "\n"); diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index de2dc983509..f8898616dcf 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -22,7 +22,10 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove float update_pres_fac(float new_pres_fac); -t_trace* update_traceback(t_heap* hptr, int target_pin, ClusterNetId net_id); +/* Pass in the hptr starting at a SINK with target_net_pin_index, which is the net pin index corresonding * + * to the sink (ranging from 1 to fanout). Returns a pointer to the first "new" node in the traceback * + * (node not previously in trace). */ +t_trace* update_traceback(t_heap* hptr, int target_net_pin_index, ClusterNetId net_id); void reset_path_costs(const std::vector& visited_rr_nodes); diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 0bb717c9730..38f2b7fd50b 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -273,7 +273,6 @@ bool try_timing_driven_route_tmpl(const t_router_opts& router_opts, } CBRR connections_inf{}; - VTR_ASSERT_SAFE(connections_inf.sanity_check_lookup()); route_budgets budgeting_inf; @@ -1386,7 +1385,7 @@ static t_rt_node* setup_routing_resources(int itry, // give lookup on the reached sinks for (t_rt_node* sink_node : reached_rt_sinks) { - rt_node_of_sink[sink_node->ipin] = sink_node; + rt_node_of_sink[sink_node->net_pin_index] = sink_node; } profiling::net_rebuild_end(num_sinks, remaining_targets.size()); diff --git a/vpr/src/route/route_traceback.cpp b/vpr/src/route/route_traceback.cpp index 7df192f31b2..7f4ad67f72b 100644 --- a/vpr/src/route/route_traceback.cpp +++ b/vpr/src/route/route_traceback.cpp @@ -8,11 +8,11 @@ t_traceback::t_traceback(const t_traceback& other) { //Deep-copy of traceback t_trace* prev = nullptr; for (t_trace* other_curr = other.head; other_curr; other_curr = other_curr->next) { - //VTR_LOG("Copying trace %p node: %d switch: %d pin(for sink): %d\n", other_curr, other_curr->index, other_curr->iswitch. other_curr->ipin); + //VTR_LOG("Copying trace %p node: %d switch: %d pin(for sink): %d\n", other_curr, other_curr->index, other_curr->iswitch. other_curr->net_pin_index); t_trace* curr = alloc_trace_data(); curr->index = other_curr->index; - curr->ipin = other_curr->ipin; + curr->net_pin_index = other_curr->net_pin_index; curr->iswitch = other_curr->iswitch; if (prev) { diff --git a/vpr/src/route/route_tree_timing.cpp b/vpr/src/route/route_tree_timing.cpp index 6d899d8b026..dc7cb0ddac0 100644 --- a/vpr/src/route/route_tree_timing.cpp +++ b/vpr/src/route/route_tree_timing.cpp @@ -52,7 +52,7 @@ static t_linked_rt_edge* alloc_linked_rt_edge(); static void free_linked_rt_edge(t_linked_rt_edge* rt_edge); static t_rt_node* add_subtree_to_route_tree(t_heap* hptr, - int target_pin, + int target_net_pin_index, t_rt_node** sink_rt_node_ptr); static t_rt_node* add_non_configurable_to_route_tree(const int rr_node, const bool reached_by_non_configurable_edge, std::unordered_set& visited); @@ -195,7 +195,7 @@ t_rt_node* init_route_tree_to_source(ClusterNetId inet) { inode = route_ctx.net_rr_terminals[inet][0]; /* Net source */ rt_root->inode = inode; - rt_root->ipin = OPEN; + rt_root->net_pin_index = OPEN; rt_root->C_downstream = device_ctx.rr_nodes[inode].C(); rt_root->R_upstream = device_ctx.rr_nodes[inode].R(); rt_root->Tdel = 0.5 * device_ctx.rr_nodes[inode].R() * device_ctx.rr_nodes[inode].C(); @@ -206,9 +206,10 @@ t_rt_node* init_route_tree_to_source(ClusterNetId inet) { /* Adds the most recently finished wire segment to the routing tree, and * updates the Tdel, etc. numbers for the rest of the routing tree. hptr - * is the heap pointer of the SINK that was reached. This routine returns - * a pointer to the rt_node of the SINK that it adds to the routing. */ -t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLookup* spatial_rt_lookup) { + * is the heap pointer of the SINK that was reached, and target_net_pin_index + * is the net pin index corresponding to the SINK that was reached. This routine + * returns a pointer to the rt_node of the SINK that it adds to the routing. */ +t_rt_node* update_route_tree(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup) { t_rt_node *start_of_new_subtree_rt_node, *sink_rt_node; t_rt_node *unbuffered_subtree_rt_root, *subtree_parent_rt_node; float Tdel_start; @@ -217,7 +218,7 @@ t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLooku auto& device_ctx = g_vpr_ctx.device(); //Create a new subtree from the target in hptr to existing routing - start_of_new_subtree_rt_node = add_subtree_to_route_tree(hptr, target_pin, &sink_rt_node); + start_of_new_subtree_rt_node = add_subtree_to_route_tree(hptr, target_net_pin_index, &sink_rt_node); //Propagate R_upstream down into the new subtree load_new_subtree_R_upstream(start_of_new_subtree_rt_node); @@ -248,13 +249,23 @@ t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLooku return (sink_rt_node); } +/* Records all nodes from the rt_tree into the rr_node_to_rt_node lookup, which + * maps the node's corresponding rr_node index (inode) to the node itself. This + * is done recursively, starting from the root of the tree to its leafs (SINKs) + * in a depth-first traversal. The rt_node we are currently processing should not + * have had its rr_node index mapped previously, with the exception of SINK nodes. + * Some netlists and input pin equivalence can lead to us routing to the same SINK + * more than once on a net (resulting in different rt_nodes sharing the same rr_node + * index). Hence for SINKs we assert on a weaker condition that if the rr_node index + * corresponding to this SINK is already mapped, the rr_node_to_rt_node mapping + * structure must be pointing to a different rt_node containing the SINK. */ void add_route_tree_to_rr_node_lookup(t_rt_node* node) { if (node) { auto& device_ctx = g_vpr_ctx.device(); if (device_ctx.rr_nodes[node->inode].type() == SINK) { - VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode]->inode == node->inode); + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] != node); } else { - VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] == node); + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr); } rr_node_to_rt_node[node->inode] = node; @@ -265,12 +276,12 @@ void add_route_tree_to_rr_node_lookup(t_rt_node* node) { } } +/* Adds the most recent wire segment, ending at the SINK indicated by hptr, + * to the routing tree. target_net_pin_index is the net pin index correspinding + * to the SINK indicated by hptr. Returns the first (most upstream) new rt_node, + * and (via a pointer) the rt_node of the new SINK. Traverses up from SINK */ static t_rt_node* -add_subtree_to_route_tree(t_heap* hptr, int target_pin, t_rt_node** sink_rt_node_ptr) { - /* Adds the most recent wire segment, ending at the SINK indicated by hptr, - * to the routing tree. It returns the first (most upstream) new rt_node, - * and (via a pointer) the rt_node of the new SINK. Traverses up from SINK */ - +add_subtree_to_route_tree(t_heap* hptr, int target_net_pin_index, t_rt_node** sink_rt_node_ptr) { t_rt_node *rt_node, *downstream_rt_node, *sink_rt_node; t_linked_rt_edge* linked_rt_edge; @@ -288,7 +299,7 @@ add_subtree_to_route_tree(t_heap* hptr, int target_pin, t_rt_node** sink_rt_node sink_rt_node = alloc_rt_node(); sink_rt_node->u.child_list = nullptr; sink_rt_node->inode = inode; - sink_rt_node->ipin = target_pin; + sink_rt_node->net_pin_index = target_net_pin_index; //hptr is the heap pointer of the SINK that was reached, which corresponds to the target pin rr_node_to_rt_node[inode] = sink_rt_node; /* In the code below I'm marking SINKs and IPINs as not to be re-expanded. @@ -331,7 +342,7 @@ add_subtree_to_route_tree(t_heap* hptr, int target_pin, t_rt_node** sink_rt_node rt_node->u.child_list = linked_rt_edge; rt_node->inode = inode; - rt_node->ipin = OPEN; + rt_node->net_pin_index = OPEN; //net pin index is invalid for non-SINK nodes rr_node_to_rt_node[inode] = rt_node; @@ -391,7 +402,7 @@ static t_rt_node* add_non_configurable_to_route_tree(const int rr_node, const bo rt_node = alloc_rt_node(); rt_node->u.child_list = nullptr; rt_node->inode = rr_node; - rt_node->ipin = OPEN; + rt_node->net_pin_index = OPEN; if (device_ctx.rr_nodes[rr_node].type() == IPIN) { rt_node->re_expand = false; @@ -676,7 +687,7 @@ void print_route_tree(const t_rt_node* rt_node, int depth) { auto& device_ctx = g_vpr_ctx.device(); VTR_LOG("%srt_node: %d (%s) \t ipin: %d \t R: %g \t C: %g \t delay: %g", - indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->ipin, rt_node->R_upstream, rt_node->C_downstream, rt_node->Tdel); + indent.c_str(), rt_node->inode, device_ctx.rr_nodes[rt_node->inode].type_string(), rt_node->net_pin_index, rt_node->R_upstream, rt_node->C_downstream, rt_node->Tdel); if (rt_node->parent_switch != OPEN) { bool parent_edge_configurable = device_ctx.rr_switch_inf[rt_node->parent_switch].configurable(); @@ -779,7 +790,7 @@ static t_trace* traceback_to_route_tree_branch(t_trace* trace, t_rt_node* node = nullptr; int inode = trace->index; - int ipin = trace->ipin; + int ipin = trace->net_pin_index; int iswitch = trace->iswitch; auto& device_ctx = g_vpr_ctx.device(); @@ -794,7 +805,7 @@ static t_trace* traceback_to_route_tree_branch(t_trace* trace, //Initialize route tree node node = alloc_rt_node(); node->inode = inode; - node->ipin = ipin; + node->net_pin_index = ipin; node->u.child_list = nullptr; node->R_upstream = std::numeric_limits::quiet_NaN(); @@ -877,7 +888,7 @@ static std::pair traceback_from_route_tree_recurr(t_trace* h for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { t_trace* curr = alloc_trace_data(); curr->index = node->inode; - curr->ipin = node->ipin; + curr->net_pin_index = node->net_pin_index; curr->iswitch = edge->iswitch; curr->next = nullptr; @@ -898,7 +909,7 @@ static std::pair traceback_from_route_tree_recurr(t_trace* h //Leaf t_trace* curr = alloc_trace_data(); curr->index = node->inode; - curr->ipin = node->ipin; + curr->net_pin_index = node->net_pin_index; curr->iswitch = OPEN; curr->next = nullptr; @@ -1036,7 +1047,7 @@ static t_rt_node* prune_route_tree_recurr(t_rt_node* node, CBRR& connections_inf VTR_ASSERT(force_prune); //Record as not reached - connections_inf.toreach_rr_sink(node->ipin); + connections_inf.toreach_rr_sink(node->net_pin_index); free_rt_node(node); return nullptr; //Pruned @@ -1457,7 +1468,7 @@ init_route_tree_to_source_no_net(int inode) { rt_root->parent_switch = OPEN; rt_root->re_expand = true; rt_root->inode = inode; - rt_root->ipin = OPEN; + rt_root->net_pin_index = OPEN; rt_root->C_downstream = device_ctx.rr_nodes[inode].C(); rt_root->R_upstream = device_ctx.rr_nodes[inode].R(); rt_root->Tdel = 0.5 * device_ctx.rr_nodes[inode].R() * device_ctx.rr_nodes[inode].C(); diff --git a/vpr/src/route/route_tree_timing.h b/vpr/src/route/route_tree_timing.h index 106efd27041..b6ca587d86b 100644 --- a/vpr/src/route/route_tree_timing.h +++ b/vpr/src/route/route_tree_timing.h @@ -19,7 +19,7 @@ void free_route_tree(t_rt_node* rt_node); void print_route_tree(const t_rt_node* rt_node); void print_route_tree(const t_rt_node* rt_node, int depth); -t_rt_node* update_route_tree(t_heap* hptr, int target_pin, SpatialRouteTreeLookup* spatial_rt_lookup); +t_rt_node* update_route_tree(t_heap* hptr, int target_net_pin_index, SpatialRouteTreeLookup* spatial_rt_lookup); void update_net_delays_from_route_tree(float* net_delay, const t_rt_node* const* rt_node_of_sink, diff --git a/vpr/src/route/route_tree_type.h b/vpr/src/route/route_tree_type.h index 20339f5969b..edf2cebb393 100644 --- a/vpr/src/route/route_tree_type.h +++ b/vpr/src/route/route_tree_type.h @@ -27,13 +27,16 @@ struct t_linked_rt_edge { * parent_switch: Index of the switch type driving this node (by its * * parent). * * inode: index (ID) of the rr_node that corresponds to this rt_node. * - * ipin: Net pin index associated with the rt_node. This value ranges from * - * 1 to fanout [1..num_pins-1]. For cases when different speed paths * - * are taken to the same sink for different pins, inode cannot * - * uniquely identify each sink, so the net pin index guarentees an * - * unique identification for each sink-type rt_node. For non-sink- * - * type nodes and for sink-type nodes with no associated net pin * - * index, the value for this member should be set to OPEN (-1). * + * net_pin_index: Net pin index associated with the rt_node. This value + * ranges from 1 to fanout [1..num_pins-1]. For cases when * + * different speed paths are taken to the same SINK for * + * different pins, inode cannot uniquely identify each SINK, * + * so the net pin index guarantees an unique identification * + * for each SINK rt_node. For non-SINK nodes and for SINK * + * nodes with no associated net pin index, (i.e. special * + * SINKs like the source of a clock tree which do not * + * correspond to an actual netlist connection), the value * + * for this member should be set to OPEN (-1). * * C_downstream: Total downstream capacitance from this rt_node. That is, * * the total C of the subtree rooted at the current node, * * including the C of the current node. * @@ -50,7 +53,7 @@ struct t_rt_node { short parent_switch; bool re_expand; int inode; - int ipin; + int net_pin_index; float C_downstream; float R_upstream; float Tdel; diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index 5cd48b115d8..8e7d99e5643 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -97,8 +97,8 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id) { /* This routine recursively traverses the route tree, and copies the Tdel of the sink_type nodes * * into the map. */ - if (node->ipin != OPEN) { - ipin_to_Tdel_map[node->ipin] = node->Tdel; // add to the map, process current sink-type node + if (node->net_pin_index != OPEN) { // value of OPEN indicates a non-SINK + ipin_to_Tdel_map[node->net_pin_index] = node->Tdel; // add to the map, process current sink-type node } for (t_linked_rt_edge* edge = node->u.child_list; edge != nullptr; edge = edge->next) { // process children From e0e70ea7b202ad8eca2e6d5cd27bf909932526f8 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Fri, 28 Aug 2020 06:47:54 -0400 Subject: [PATCH 14/15] Fixed assertion error introduced in previous commit and updated comments --- vpr/src/route/route_tree_timing.cpp | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/vpr/src/route/route_tree_timing.cpp b/vpr/src/route/route_tree_timing.cpp index dc7cb0ddac0..a3121a63167 100644 --- a/vpr/src/route/route_tree_timing.cpp +++ b/vpr/src/route/route_tree_timing.cpp @@ -249,23 +249,25 @@ t_rt_node* update_route_tree(t_heap* hptr, int target_net_pin_index, SpatialRout return (sink_rt_node); } -/* Records all nodes from the rt_tree into the rr_node_to_rt_node lookup, which - * maps the node's corresponding rr_node index (inode) to the node itself. This - * is done recursively, starting from the root of the tree to its leafs (SINKs) - * in a depth-first traversal. The rt_node we are currently processing should not - * have had its rr_node index mapped previously, with the exception of SINK nodes. - * Some netlists and input pin equivalence can lead to us routing to the same SINK - * more than once on a net (resulting in different rt_nodes sharing the same rr_node - * index). Hence for SINKs we assert on a weaker condition that if the rr_node index - * corresponding to this SINK is already mapped, the rr_node_to_rt_node mapping - * structure must be pointing to a different rt_node containing the SINK. */ +/* Records all nodes from the current routing (rt_tree) into the rr_node_to_rt_node + * lookup, which maps the node's corresponding rr_node index (inode) to the node + * itself. This is done recursively, starting from the root of the tree to its leafs + * (SINKs) in a depth-first traversal. The rt_node we are currently processing has + * either not been added to the routing for this net before or if it was added, the + * rr_node_to_rt_node mapping structure should point back at the rt_node itself so + * we are just branching off that point. Exceptions are the SINK nodes, some + * netlists and input pin equivalence can lead to us routing to the same SINK more + * than once on a net (resulting in different rt_nodes sharing the same rr_node index). + * Hence for SINKs we assert on a weaker condition that if this SINK is already in the + * rt_tree, the rr_node_to_rt_node mapping structure points to a legal rt_node (but + * not necessarily the only one) containing the SINK */ void add_route_tree_to_rr_node_lookup(t_rt_node* node) { if (node) { auto& device_ctx = g_vpr_ctx.device(); if (device_ctx.rr_nodes[node->inode].type() == SINK) { - VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] != node); + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode]->inode == node->inode); } else { - VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr); + VTR_ASSERT(rr_node_to_rt_node[node->inode] == nullptr || rr_node_to_rt_node[node->inode] == node); } rr_node_to_rt_node[node->inode] = node; From 45f56233bb0c621f1bd58d5e38afe5b2bfefc420 Mon Sep 17 00:00:00 2001 From: Helen Dai Date: Sun, 30 Aug 2020 14:06:14 -0400 Subject: [PATCH 15/15] Formating fix --- vpr/src/route/connection_based_routing.cpp | 2 +- vpr/src/timing/net_delay.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index a35a8d35ac3..4d442462f92 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -36,7 +36,7 @@ Connection_based_routing_resources::Connection_based_routing_resources() auto& net_lower_bound_connection_delay = lower_bound_connection_delay[net_id]; auto& net_forcible_reroute_connection_flag = forcible_reroute_connection_flag[net_id]; - unsigned int num_pins = cluster_ctx.clb_nlist.net_pins(net_id).size(); // not looking up on the SOURCE pin + unsigned int num_pins = cluster_ctx.clb_nlist.net_pins(net_id).size(); // not looking up on the SOURCE pin net_lower_bound_connection_delay.resize(num_pins, std::numeric_limits::infinity()); // will be filled in after the 1st iteration's net_forcible_reroute_connection_flag.reserve(num_pins); // all false to begin with diff --git a/vpr/src/timing/net_delay.cpp b/vpr/src/timing/net_delay.cpp index 8e7d99e5643..6bf3f9bbe0d 100644 --- a/vpr/src/timing/net_delay.cpp +++ b/vpr/src/timing/net_delay.cpp @@ -97,7 +97,7 @@ static void load_one_net_delay(ClbNetPinsMatrix& net_delay, ClusterNetId static void load_one_net_delay_recurr(t_rt_node* node, ClusterNetId net_id) { /* This routine recursively traverses the route tree, and copies the Tdel of the sink_type nodes * * into the map. */ - if (node->net_pin_index != OPEN) { // value of OPEN indicates a non-SINK + if (node->net_pin_index != OPEN) { // value of OPEN indicates a non-SINK ipin_to_Tdel_map[node->net_pin_index] = node->Tdel; // add to the map, process current sink-type node }