51
51
/* we will profile delay/congestion using this many tracks for each wire type */
52
52
#define MAX_TRACK_OFFSET 16
53
53
54
+ #define X_OFFSET 2
55
+ #define Y_OFFSET 2
56
+
57
+ #define MAX_EXPANSION_LEVEL 1
58
+
54
59
/* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset.
55
60
* there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination --
56
61
* we can take the smallest cost, the average, median, etc. This define selects the method we use.
@@ -202,6 +207,13 @@ typedef std::vector<std::vector<std::map<int, t_reachable_wire_inf>>> t_src_opin
202
207
// |
203
208
// SOURCE/OPIN ptc
204
209
210
+ typedef std::vector<std::vector<std::map<int , t_reachable_wire_inf>>> t_chan_reachable_ipins; // [0..device_ctx.physical_tile_types.size()-1][0..max_ptc-1][wire_seg_index]
211
+ // ^ ^ ^
212
+ // | | |
213
+ // physical block type index | Wire to IPIN segment info
214
+ // |
215
+ // SINK/IPIN ptc
216
+
205
217
struct t_dijkstra_data {
206
218
/* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */
207
219
vtr::vector<RRNodeId, bool > node_expanded;
@@ -222,12 +234,17 @@ t_wire_cost_map f_wire_cost_map;
222
234
// Look-up table from SOURCE/OPIN to CHANX/CHANY of various types
223
235
t_src_opin_reachable_wires f_src_opin_reachable_wires;
224
236
237
+ // Look-up table from CHANX/CHANY to SINK/IPIN of various types
238
+ t_chan_reachable_ipins f_chan_reachable_ipins;
239
+
225
240
/* ******* File-Scope Functions ********/
226
241
Cost_Entry get_wire_cost_entry (e_rr_type rr_type, int seg_index, int delta_x, int delta_y);
227
242
static void compute_router_wire_lookahead (const std::vector<t_segment_inf>& segment_inf);
228
243
static void compute_router_src_opin_lookahead ();
244
+ static void compute_router_chan_ipin_lookahead ();
229
245
static vtr::Point<int > pick_sample_tile (t_physical_tile_type_ptr tile_type, vtr::Point<int > start);
230
246
void dijkstra_flood_to_wires (int itile, RRNodeId inode, t_src_opin_reachable_wires& src_opin_reachable_wires);
247
+ void dijkstra_flood_to_ipins (RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins);
231
248
232
249
/* returns index of a node from which to start routing */
233
250
static RRNodeId get_start_node (int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
@@ -281,6 +298,7 @@ void MapLookahead::read(const std::string& file) {
281
298
// Next, compute which wire types are accessible (and the cost to reach them)
282
299
// from the different physical tile type's SOURCEs & OPINs
283
300
compute_router_src_opin_lookahead ();
301
+ compute_router_chan_ipin_lookahead ();
284
302
}
285
303
286
304
void MapLookahead::write (const std::string& file) const {
@@ -380,15 +398,40 @@ float get_lookahead_map_cost(RRNodeId from_node, RRNodeId to_node, float critica
380
398
float expected_delay = cost_entry.delay ;
381
399
float expected_congestion = cost_entry.congestion ;
382
400
383
- expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion;
401
+ if (rr_graph.node_type (to_node) == SINK) {
402
+ auto to_tile_type = device_ctx.grid [rr_graph.node_xlow (to_node)][rr_graph.node_ylow (to_node)].type ;
403
+ auto to_tile_index = to_tile_type->index ;
404
+
405
+ auto to_ptc = rr_graph.node_ptc_num (to_node);
406
+
407
+ if (f_chan_reachable_ipins[to_tile_index].size () != 0 ) {
408
+ for (const auto & kv : f_chan_reachable_ipins[to_tile_index][to_ptc]) {
409
+ const t_reachable_wire_inf& reachable_wire_inf = kv.second ;
410
+
411
+ float this_delay = reachable_wire_inf.delay ;
412
+ float this_congestion = reachable_wire_inf.congestion ;
413
+
414
+ float this_cost = criticality_fac * (expected_delay + this_delay) + (1.0 - criticality_fac) * (expected_congestion + this_congestion);
415
+ expected_cost = std::min (this_cost, expected_cost);
416
+ }
417
+ }
418
+ }
419
+
420
+ if (cost_entry.delay == 0 ) {
421
+ expected_cost = std::numeric_limits<float >::max () / 1e12 ;
422
+ }
384
423
385
424
VTR_ASSERT_SAFE_MSG (std::isfinite (expected_cost),
386
425
vtr::string_fmt (" Lookahead failed to estimate cost from %s: %s" ,
387
426
rr_node_arch_name (size_t (from_node)).c_str (),
388
427
describe_rr_node (size_t (from_node)).c_str ())
389
428
.c_str ());
429
+ VTR_LOGV_DEBUG (f_router_debug, " Lookahead delay : %10.3g\n " , expected_delay);
430
+ VTR_LOGV_DEBUG (f_router_debug, " Lookahead cong : %10.3g\n " , expected_congestion);
390
431
}
391
432
433
+ VTR_LOGV_DEBUG (f_router_debug, " Lookahead cost : %10.3g\n " , expected_cost);
434
+
392
435
return expected_cost;
393
436
}
394
437
@@ -418,6 +461,7 @@ void compute_router_lookahead(const std::vector<t_segment_inf>& segment_inf) {
418
461
// Next, compute which wire types are accessible (and the cost to reach them)
419
462
// from the different physical tile type's SOURCEs & OPINs
420
463
compute_router_src_opin_lookahead ();
464
+ compute_router_chan_ipin_lookahead ();
421
465
}
422
466
423
467
static void compute_router_wire_lookahead (const std::vector<t_segment_inf>& segment_inf) {
@@ -630,6 +674,56 @@ static void compute_router_src_opin_lookahead() {
630
674
}
631
675
}
632
676
677
+ static void compute_router_chan_ipin_lookahead () {
678
+ vtr::ScopedStartFinishTimer timer (" Computing chan/ipin lookahead" );
679
+ auto & device_ctx = g_vpr_ctx.device ();
680
+
681
+ f_chan_reachable_ipins.clear ();
682
+
683
+ f_chan_reachable_ipins.resize (device_ctx.physical_tile_types .size ());
684
+
685
+ std::vector<int > rr_nodes_at_loc;
686
+
687
+ // We assume that the routing connectivity of each instance of a physical tile is the same,
688
+ // and so only measure one instance of each type
689
+ for (auto tile_type : device_ctx.physical_tile_types ) {
690
+ vtr::Point<int > sample_loc (-1 , -1 );
691
+
692
+ sample_loc = pick_sample_tile (&tile_type, sample_loc);
693
+
694
+ if (sample_loc.x () == -1 && sample_loc.y () == -1 ) {
695
+ // No untried instances of the current tile type left
696
+ VTR_LOG_WARN (" Found no sample locations for %s\n " ,
697
+ tile_type.name );
698
+ continue ;
699
+ }
700
+
701
+ int min_x = std::max (0 , sample_loc.x () - X_OFFSET);
702
+ int min_y = std::max (0 , sample_loc.y () - Y_OFFSET);
703
+ int max_x = std::min (int (device_ctx.grid .width ()), sample_loc.x () + X_OFFSET);
704
+ int max_y = std::min (int (device_ctx.grid .height ()), sample_loc.y () + Y_OFFSET);
705
+
706
+ for (int ix = min_x; ix < max_x; ix++) {
707
+ for (int iy = min_y; iy < max_y; iy++) {
708
+ for (auto rr_type : {CHANX, CHANY}) {
709
+ rr_nodes_at_loc.clear ();
710
+
711
+ get_rr_node_indices (device_ctx.rr_node_indices , ix, iy, rr_type, &rr_nodes_at_loc);
712
+ for (int inode : rr_nodes_at_loc) {
713
+ if (inode < 0 ) continue ;
714
+
715
+ RRNodeId node_id (inode);
716
+
717
+ // Find the IPINs which are reachable from the wires within the bounding box
718
+ // around the selected tile location
719
+ dijkstra_flood_to_ipins (node_id, f_chan_reachable_ipins);
720
+ }
721
+ }
722
+ }
723
+ }
724
+ }
725
+ }
726
+
633
727
static vtr::Point<int > pick_sample_tile (t_physical_tile_type_ptr tile_type, vtr::Point<int > prev) {
634
728
// Very simple for now, just pick the fist matching tile found
635
729
vtr::Point<int > loc (OPEN, OPEN);
@@ -650,10 +744,11 @@ static vtr::Point<int> pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr:
650
744
if (y < 0 ) continue ;
651
745
652
746
// VTR_LOG(" y: %d\n", y);
653
- if (grid[x][y].type == tile_type) {
747
+ if (grid[x][y].type -> index == tile_type-> index ) {
654
748
loc.set_x (x);
655
749
loc.set_y (y);
656
- break ;
750
+ VTR_LOG (" RETURN LOC! %s (%d, %d)\n " , tile_type->name , x, y);
751
+ return loc;
657
752
}
658
753
}
659
754
@@ -765,6 +860,104 @@ void dijkstra_flood_to_wires(int itile, RRNodeId node, t_src_opin_reachable_wire
765
860
}
766
861
}
767
862
863
+ void dijkstra_flood_to_ipins (RRNodeId node, t_chan_reachable_ipins& chan_reachable_ipins) {
864
+ auto & device_ctx = g_vpr_ctx.device ();
865
+ auto & rr_graph = device_ctx.rr_nodes ;
866
+
867
+ struct t_pq_entry {
868
+ float delay;
869
+ float congestion;
870
+ RRNodeId node;
871
+ int level;
872
+ int prev_seg_index;
873
+
874
+ bool operator <(const t_pq_entry& rhs) const {
875
+ return this ->delay < rhs.delay ;
876
+ }
877
+ };
878
+
879
+ std::priority_queue<t_pq_entry> pq;
880
+
881
+ t_pq_entry root;
882
+ root.congestion = 0 .;
883
+ root.delay = 0 .;
884
+ root.node = node;
885
+ root.level = 0 ;
886
+ root.prev_seg_index = OPEN;
887
+
888
+ /*
889
+ * Perform Djikstra from the CHAN of interest, stopping at the the first
890
+ * reachable IPIN
891
+ *
892
+ * Note that typical RR graphs are structured :
893
+ *
894
+ * CHANX/CHANY --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK
895
+ * |
896
+ * --> CHANX/CHANY --> ... --> CHANX/CHANY --> IPIN --> SINK
897
+ * |
898
+ * ...
899
+ *
900
+ * and there is a variable number of hops from a given CHANX/CHANY to IPIN.
901
+ * To avoid impacting on run-time, a fixed number of hops is performed. This
902
+ * should be enough to find the delay from the last CAHN to IPIN connection.
903
+ */
904
+ pq.push (root);
905
+ while (!pq.empty ()) {
906
+ t_pq_entry curr = pq.top ();
907
+ pq.pop ();
908
+
909
+ e_rr_type curr_rr_type = rr_graph.node_type (curr.node );
910
+ if (curr_rr_type == IPIN) {
911
+ int seg_index = curr.prev_seg_index ;
912
+
913
+ int node_x = rr_graph.node_xlow (curr.node );
914
+ int node_y = rr_graph.node_ylow (curr.node );
915
+
916
+ auto tile_type = device_ctx.grid [node_x][node_y].type ;
917
+ int itile = tile_type->index ;
918
+
919
+ int ptc = rr_graph.node_ptc_num (curr.node );
920
+
921
+ if (ptc >= int (chan_reachable_ipins[itile].size ())) {
922
+ chan_reachable_ipins[itile].resize (ptc + 1 ); // Inefficient but functional...
923
+ }
924
+
925
+ // Keep costs of the best path to reach each wire type
926
+ chan_reachable_ipins[itile][ptc][seg_index].wire_rr_type = curr_rr_type;
927
+ chan_reachable_ipins[itile][ptc][seg_index].wire_seg_index = seg_index;
928
+ chan_reachable_ipins[itile][ptc][seg_index].delay = curr.delay ;
929
+ chan_reachable_ipins[itile][ptc][seg_index].congestion = curr.congestion ;
930
+ } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) {
931
+ if (curr.level >= MAX_EXPANSION_LEVEL) {
932
+ continue ;
933
+ }
934
+
935
+ // We allow expansion through SOURCE/OPIN/IPIN types
936
+ int cost_index = rr_graph.node_cost_index (curr.node );
937
+ float new_cong = device_ctx.rr_indexed_data [cost_index].base_cost ; // Current nodes congestion cost
938
+ int seg_index = device_ctx.rr_indexed_data [cost_index].seg_index ;
939
+
940
+ for (RREdgeId edge : rr_graph.edge_range (curr.node )) {
941
+ int iswitch = rr_graph.edge_switch (edge);
942
+ float new_delay = device_ctx.rr_switch_inf [iswitch].Tdel ;
943
+
944
+ RRNodeId next_node = rr_graph.edge_sink_node (edge);
945
+
946
+ t_pq_entry next;
947
+ next.congestion = new_cong; // Of current node
948
+ next.delay = new_delay; // To reach next node
949
+ next.node = next_node;
950
+ next.level = curr.level + 1 ;
951
+ next.prev_seg_index = seg_index;
952
+
953
+ pq.push (next);
954
+ }
955
+ } else {
956
+ VPR_ERROR (VPR_ERROR_ROUTE, " Unrecognized RR type" );
957
+ }
958
+ }
959
+ }
960
+
768
961
/* returns index of a node from which to start routing */
769
962
static RRNodeId get_start_node (int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) {
770
963
auto & device_ctx = g_vpr_ctx.device ();
0 commit comments