51
51
/* we will profile delay/congestion using this many tracks for each wire type */
52
52
#define MAX_TRACK_OFFSET 16
53
53
54
+ #define X_OFFSET 2
55
+ #define Y_OFFSET 2
56
+
57
+ #define MAX_EXPANSION_LEVEL 1
58
+
54
59
/* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset.
55
60
* there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination --
56
61
* we can take the smallest cost, the average, median, etc. This define selects the method we use.
@@ -202,6 +207,13 @@ typedef std::vector<std::vector<std::map<int, t_reachable_wire_inf>>> t_src_opin
202
207
// |
203
208
// SOURCE/OPIN ptc
204
209
210
+ typedef std::vector<std::vector<std::map<int , t_reachable_wire_inf>>> t_chan_reachable_ipins; // [0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index]
211
+ // ^ ^ ^
212
+ // | | |
213
+ // physical block type index | Wire to IPIN segment info
214
+ // |
215
+ // SINK/IPIN ptc
216
+
205
217
struct t_dijkstra_data {
206
218
/* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */
207
219
vtr::vector<RRNodeId, bool > node_expanded;
@@ -222,12 +234,17 @@ t_wire_cost_map f_wire_cost_map;
222
234
// Look-up table from SOURCE/OPIN to CHANX/CHANY of various types
223
235
t_src_opin_reachable_wires f_src_opin_reachable_wires;
224
236
237
+ // Look-up table from CHANX/CHANY to SINK/IPIN of various types
238
+ t_chan_reachable_ipins f_chan_reachable_ipins;
239
+
225
240
/* ******* File-Scope Functions ********/
226
241
Cost_Entry get_wire_cost_entry (e_rr_type rr_type, int seg_index, int delta_x, int delta_y);
227
242
static void compute_router_wire_lookahead (const std::vector<t_segment_inf>& segment_inf);
228
243
static void compute_router_src_opin_lookahead ();
244
+ static void compute_router_chan_ipin_lookahead ();
229
245
static vtr::Point <int > pick_sample_tile (t_physical_tile_type_ptr tile_type, vtr::Point <int > start);
230
246
void dijkstra_flood_to_wires (int itile, RRNodeId inode, t_src_opin_reachable_wires& src_opin_reachable_wires);
247
+ void dijkstra_flood_to_ipins (RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins);
231
248
232
249
/* returns index of a node from which to start routing */
233
250
static RRNodeId get_start_node (int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
@@ -281,6 +298,7 @@ void MapLookahead::read(const std::string& file) {
281
298
// Next, compute which wire types are accessible (and the cost to reach them)
282
299
// from the different physical tile type's SOURCEs & OPINs
283
300
compute_router_src_opin_lookahead ();
301
+ compute_router_chan_ipin_lookahead ();
284
302
}
285
303
286
304
void MapLookahead::write (const std::string& file) const {
@@ -380,7 +398,28 @@ float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float critica
380
398
float expected_delay = cost_entry.delay ;
381
399
float expected_congestion = cost_entry.congestion ;
382
400
383
- expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion;
401
+ if (rr_graph.node_type (to_node) == SINK) {
402
+ auto to_tile_type = device_ctx.grid [rr_graph.node_xlow (to_node)][rr_graph.node_ylow (to_node)].type ;
403
+ auto to_tile_index = to_tile_type->index ;
404
+
405
+ auto to_ptc = rr_graph.node_ptc_num (to_node);
406
+
407
+ if (f_chan_reachable_ipins[to_tile_index].size () != 0 ) {
408
+ for (const auto & kv : f_chan_reachable_ipins[to_tile_index][to_ptc]) {
409
+ const t_reachable_wire_inf& reachable_wire_inf = kv.second ;
410
+
411
+ float this_delay = reachable_wire_inf.delay ;
412
+ float this_congestion = reachable_wire_inf.congestion ;
413
+
414
+ float this_cost = criticality_fac * (expected_delay + this_delay) + (1.0 - criticality_fac) * (expected_congestion + this_congestion);
415
+ expected_cost = std::min (this_cost, expected_cost);
416
+ }
417
+ }
418
+ }
419
+
420
+ if (cost_entry.delay == 0 ) {
421
+ expected_cost = std::numeric_limits<float >::max () / 1e12 ;
422
+ }
384
423
385
424
VTR_ASSERT_SAFE_MSG (std::isfinite (expected_cost),
386
425
vtr::string_fmt (" Lookahead failed to estimate cost from %s: %s" ,
@@ -418,6 +457,7 @@ void compute_router_lookahead(const std::vector<t_segment_inf>& segment_inf) {
418
457
// Next, compute which wire types are accessible (and the cost to reach them)
419
458
// from the different physical tile type's SOURCEs & OPINs
420
459
compute_router_src_opin_lookahead ();
460
+ compute_router_chan_ipin_lookahead ();
421
461
}
422
462
423
463
static void compute_router_wire_lookahead (const std::vector<t_segment_inf>& segment_inf) {
@@ -630,6 +670,56 @@ static void compute_router_src_opin_lookahead() {
630
670
}
631
671
}
632
672
673
+ static void compute_router_chan_ipin_lookahead () {
674
+ vtr::ScopedStartFinishTimer timer (" Computing chan/ipin lookahead" );
675
+ auto & device_ctx = g_vpr_ctx.device ();
676
+
677
+ f_chan_reachable_ipins.clear ();
678
+
679
+ f_chan_reachable_ipins.resize (device_ctx.physical_tile_types .size ());
680
+
681
+ std::vector<int > rr_nodes_at_loc;
682
+
683
+ // We assume that the routing connectivity of each instance of a physical tile is the same,
684
+ // and so only measure one instance of each type
685
+ for (auto tile_type : device_ctx.physical_tile_types ) {
686
+ vtr::Point <int > sample_loc (-1 , -1 );
687
+
688
+ sample_loc = pick_sample_tile (&tile_type, sample_loc);
689
+
690
+ if (sample_loc.x () == -1 && sample_loc.y () == -1 ) {
691
+ // No untried instances of the current tile type left
692
+ VTR_LOG_WARN (" Found no sample locations for %s\n " ,
693
+ tile_type.name );
694
+ continue ;
695
+ }
696
+
697
+ int min_x = std::max (0 , sample_loc.x () - X_OFFSET);
698
+ int min_y = std::max (0 , sample_loc.y () - Y_OFFSET);
699
+ int max_x = std::min (int (device_ctx.grid .width ()), sample_loc.x () + X_OFFSET);
700
+ int max_y = std::min (int (device_ctx.grid .height ()), sample_loc.y () + Y_OFFSET);
701
+
702
+ for (int ix = min_x; ix < max_x; ix++) {
703
+ for (int iy = min_y; iy < max_y; iy++) {
704
+ for (auto rr_type : {CHANX, CHANY}) {
705
+ rr_nodes_at_loc.clear ();
706
+
707
+ get_rr_node_indices (device_ctx.rr_node_indices , ix, iy, rr_type, &rr_nodes_at_loc);
708
+ for (int inode : rr_nodes_at_loc) {
709
+ if (inode < 0 ) continue ;
710
+
711
+ RRNodeId node_id (inode);
712
+
713
+ // Find the IPINs which are reachable from the wires within the bounding box
714
+ // around the selected tile location
715
+ dijkstra_flood_to_ipins (node_id, f_chan_reachable_ipins);
716
+ }
717
+ }
718
+ }
719
+ }
720
+ }
721
+ }
722
+
633
723
static vtr::Point <int > pick_sample_tile (t_physical_tile_type_ptr tile_type, vtr::Point <int > prev) {
634
724
// Very simple for now, just pick the fist matching tile found
635
725
vtr::Point <int > loc (OPEN, OPEN);
@@ -650,10 +740,10 @@ static vtr::Point<int> pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr:
650
740
if (y < 0 ) continue ;
651
741
652
742
// VTR_LOG(" y: %d\n", y);
653
- if (grid[x][y].type == tile_type) {
743
+ if (grid[x][y].type -> index == tile_type-> index ) {
654
744
loc.set_x (x);
655
745
loc.set_y (y);
656
- break ;
746
+ return loc ;
657
747
}
658
748
}
659
749
@@ -765,6 +855,104 @@ void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wire
765
855
}
766
856
}
767
857
858
+ void dijkstra_flood_to_ipins (RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins) {
859
+ auto & device_ctx = g_vpr_ctx.device ();
860
+ auto & rr_graph = device_ctx.rr_nodes ;
861
+
862
+ struct t_pq_entry {
863
+ float delay;
864
+ float congestion;
865
+ RRNodeId node;
866
+ int level;
867
+ int prev_seg_index;
868
+
869
+ bool operator <(const t_pq_entry& rhs) const {
870
+ return this ->delay < rhs.delay ;
871
+ }
872
+ };
873
+
874
+ std::priority_queue<t_pq_entry> pq;
875
+
876
+ t_pq_entry root;
877
+ root.congestion = 0 .;
878
+ root.delay = 0 .;
879
+ root.node = node;
880
+ root.level = 0 ;
881
+ root.prev_seg_index = OPEN;
882
+
883
+ /*
884
+ * Perform Djikstra from the CHAN of interest, stopping at the the first
885
+ * reachable IPIN
886
+ *
887
+ * Note that typical RR graphs are structured :
888
+ *
889
+ * CHANX/CHANY --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK
890
+ * |
891
+ * --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK
892
+ * |
893
+ * ...
894
+ *
895
+ * and there is a variable number of hops from a given CHANX/CHANY to IPIN.
896
+ * To avoid impacting on run-time, a fixed number of hops is performed. This
897
+ * should be enough to find the delay from the last CAHN to IPIN connection.
898
+ */
899
+ pq.push (root);
900
+ while (!pq.empty ()) {
901
+ t_pq_entry curr = pq.top ();
902
+ pq.pop ();
903
+
904
+ e_rr_type curr_rr_type = rr_graph.node_type (curr.node );
905
+ if (curr_rr_type == IPIN) {
906
+ int seg_index = curr.prev_seg_index ;
907
+
908
+ int node_x = rr_graph.node_xlow (curr.node );
909
+ int node_y = rr_graph.node_ylow (curr.node );
910
+
911
+ auto tile_type = device_ctx.grid [node_x][node_y].type ;
912
+ int itile = tile_type->index ;
913
+
914
+ int ptc = rr_graph.node_ptc_num (curr.node );
915
+
916
+ if (ptc >= int (chan_reachable_ipins[itile].size ())) {
917
+ chan_reachable_ipins[itile].resize (ptc + 1 ); // Inefficient but functional...
918
+ }
919
+
920
+ // Keep costs of the best path to reach each wire type
921
+ chan_reachable_ipins[itile][ptc][seg_index].wire_rr_type = curr_rr_type;
922
+ chan_reachable_ipins[itile][ptc][seg_index].wire_seg_index = seg_index;
923
+ chan_reachable_ipins[itile][ptc][seg_index].delay = curr.delay ;
924
+ chan_reachable_ipins[itile][ptc][seg_index].congestion = curr.congestion ;
925
+ } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) {
926
+ if (curr.level >= MAX_EXPANSION_LEVEL) {
927
+ continue ;
928
+ }
929
+
930
+ // We allow expansion through SOURCE/OPIN/IPIN types
931
+ int cost_index = rr_graph.node_cost_index (curr.node );
932
+ float new_cong = device_ctx.rr_indexed_data [cost_index].base_cost ; // Current nodes congestion cost
933
+ int seg_index = device_ctx.rr_indexed_data [cost_index].seg_index ;
934
+
935
+ for (RREdgeId edge : rr_graph.edge_range (curr.node )) {
936
+ int iswitch = rr_graph.edge_switch (edge);
937
+ float new_delay = device_ctx.rr_switch_inf [iswitch].Tdel ;
938
+
939
+ RRNodeId next_node = rr_graph.edge_sink_node (edge);
940
+
941
+ t_pq_entry next;
942
+ next.congestion = new_cong; // Of current node
943
+ next.delay = new_delay; // To reach next node
944
+ next.node = next_node;
945
+ next.level = curr.level + 1 ;
946
+ next.prev_seg_index = seg_index;
947
+
948
+ pq.push (next);
949
+ }
950
+ } else {
951
+ VPR_ERROR (VPR_ERROR_ROUTE, " Unrecognized RR type" );
952
+ }
953
+ }
954
+ }
955
+
768
956
/* returns index of a node from which to start routing */
769
957
static RRNodeId get_start_node (int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) {
770
958
auto & device_ctx = g_vpr_ctx.device ();
0 commit comments