2
2
#include " atom_netlist_fwd.h"
3
3
#include " physical_types_util.h"
4
4
#include " place_macro.h"
5
+ #include " vtr_ndmatrix.h"
5
6
#include " vtr_random.h"
6
7
#include " vtr_time.h"
7
8
#include " vpr_types.h"
19
20
20
21
#include < cmath>
21
22
#include < iterator>
23
+ #include < limits>
22
24
#include < optional>
25
+ #include < queue>
23
26
24
27
#ifdef VERBOSE
25
28
void print_clb_placement (const char * fname);
@@ -38,11 +41,6 @@ static constexpr int SORT_WEIGHT_PER_TILES_OUTSIDE_OF_PR = 100;
38
41
// The neighbor location should be within the defined range to the calculated centroid location.
39
42
static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM = 15 ;
40
43
41
- // The range limit to be used when searcing for a neighbor in the centroid placement when AP is used.
42
- // Since AP is assumed to have a better idea of where clusters should be placed, we want to search more
43
- // places to place a cluster near its solved position before giving up.
44
- static constexpr int CENTROID_NEIGHBOR_SEARCH_RLIM_AP = 60 ;
45
-
46
44
/* *
47
45
* @brief Control routine for placing a macro.
48
46
* First iteration of place_marco performs the following steps to place a macro:
@@ -549,47 +547,200 @@ static std::vector<ClusterBlockId> find_centroid_loc(const t_pl_macro& pl_macro,
549
547
}
550
548
551
549
// TODO: Should this return the unplaced_blocks_to_update_their_score?
552
- static void find_centroid_loc_from_flat_placement (const t_pl_macro& pl_macro,
553
- t_pl_loc& centroid ,
554
- const FlatPlacementInfo& flat_placement_info) {
550
+ static t_flat_pl_loc find_centroid_loc_from_flat_placement (const t_pl_macro& pl_macro,
551
+ int & sub_tile ,
552
+ const FlatPlacementInfo& flat_placement_info) {
555
553
// Use the flat placement to compute the centroid of the given macro.
556
554
// TODO: Instead of averaging, maybe use MODE (most frequently placed location).
557
555
float acc_weight = 0 .f ;
558
- float acc_x = 0 .f ;
559
- float acc_y = 0 .f ;
560
- float acc_layer = 0 .f ;
561
556
float acc_sub_tile = 0 .f ;
557
+ t_flat_pl_loc centroid ({0 .0f , 0 .0f , 0 .0f });
562
558
for (const t_pl_macro_member& member : pl_macro.members ) {
563
559
const auto & cluster_atoms = g_vpr_ctx.clustering ().atoms_lookup [member.blk_index ];
564
560
for (AtomBlockId atom_blk_id : cluster_atoms) {
565
561
// TODO: We can get away with using less information.
566
562
VTR_ASSERT (flat_placement_info.blk_x_pos [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_y_pos [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_layer [atom_blk_id] != FlatPlacementInfo::UNDEFINED_POS && flat_placement_info.blk_sub_tile [atom_blk_id] != FlatPlacementInfo::UNDEFINED_SUB_TILE);
567
- // TODO: Make this a debug print.
568
- // VTR_LOG("%s ", g_vpr_ctx.atom().netlist().block_name(atom_blk_id).c_str());
569
563
570
564
// Accumulate the x, y, layer, and sub_tile for each atom in each
571
565
// member of the macro. Remove the offset so the centroid would be
572
566
// where the head macro should be placed to put the members in the
573
567
// correct place.
574
- acc_x += flat_placement_info.blk_x_pos [atom_blk_id] - member.offset .x ;
575
- acc_y += flat_placement_info.blk_y_pos [atom_blk_id] - member.offset .y ;
576
- acc_layer += flat_placement_info.blk_layer [atom_blk_id] - member.offset .layer ;
568
+ t_flat_pl_loc cluster_offset ({(float )member.offset .x ,
569
+ (float )member.offset .y ,
570
+ (float )member.offset .layer });
571
+ centroid += flat_placement_info.get_pos (atom_blk_id);
572
+ centroid -= cluster_offset;
573
+
577
574
acc_sub_tile += flat_placement_info.blk_sub_tile [atom_blk_id] - member.offset .sub_tile ;
578
575
acc_weight++;
579
576
}
580
577
}
581
578
if (acc_weight > 0 .f ) {
582
- // NOTE: We add an offset of 0.5 to prevent us from moving to the tile
583
- // below / to the left due to tiny numerical changes (this
584
- // pretends that each atom is in the center of the tile).
585
- centroid.x = std::floor (acc_x / acc_weight);
586
- centroid.y = std::floor (acc_y / acc_weight);
587
- centroid.layer = std::floor (acc_layer / acc_weight);
588
- centroid.sub_tile = std::floor (acc_sub_tile / acc_weight);
589
-
590
- // TODO: Make this a debug print.
591
- // VTR_LOG("\n\t(%d, %d, %d, %d)\n", centroid.x, centroid.y, centroid.layer, centroid.sub_tile);
579
+ centroid /= acc_weight;
580
+ sub_tile = std::floor (acc_sub_tile / acc_weight);
592
581
}
582
+ return centroid;
583
+ }
584
+
585
+ /* *
586
+ * @brief Find the nearest compatible location for the given macro as close to
587
+ * the src_flat_loc as possible.
588
+ *
589
+ * This method uses a BFS to find the closest legal location for the macro.
590
+ *
591
+ * @param src_flat_loc
592
+ * The start location of the BFS. This is given as a flat placement to
593
+ * allow the search to trade-off different location options. For example,
594
+ * if src_loc was (1.6, 1.5), this tells the search that the cluster
595
+ * would prefer to be at tile (1, 1), but if it cannot go there and
596
+ * it had to go to one of the neighbors, it would prefer to be on the
597
+ * right.
598
+ * @param src_loc_sub_tile
599
+ * The expected sub_tile of the src location. Flat locations do not
600
+ * have subtile information, so this gets passed in as well.
601
+ * @param block_type
602
+ * The logical block type of the macro.
603
+ * @param macro
604
+ * The macro to place in the location.
605
+ * @param blk_loc_registry
606
+ *
607
+ * @return Returns the closest legal location found. All of the dimensions will
608
+ * be OPEN if a locations could not be found.
609
+ */
610
+ static inline t_pl_loc find_nearest_compatible_loc (t_flat_pl_loc& src_flat_loc,
611
+ int src_loc_sub_tile,
612
+ t_logical_block_type_ptr block_type,
613
+ const t_pl_macro& pl_macro,
614
+ const BlkLocRegistry& blk_loc_registry) {
615
+ // This method performs a BFS over the compressed grid. This avoids searching
616
+ // locations which obviously cannot implement this macro.
617
+ const auto & compressed_block_grid = g_vpr_ctx.placement ().compressed_block_grids [block_type->index ];
618
+ const int num_layers = g_vpr_ctx.device ().grid .get_num_layers ();
619
+ // This method does not support 3D FPGAs yet. The search performed will only
620
+ // traverse the same layer as the src_loc.
621
+ VTR_ASSERT (num_layers == 1 );
622
+
623
+ // Get the closest (approximately) compressed location to the src location.
624
+ // This does not need to be perfect (in fact I do not think it is), but the
625
+ // closer it is, the faster the BFS will find the best solution.
626
+ t_pl_loc src_loc (src_flat_loc.x , src_flat_loc.y , src_loc_sub_tile, src_flat_loc.layer );
627
+ auto compressed_src_locs = get_compressed_loc_approx (compressed_block_grid,
628
+ src_loc,
629
+ num_layers);
630
+ const t_physical_tile_loc compressed_src_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx ({src_loc.x , src_loc.y , src_loc.layer });
631
+
632
+ // Weighted-BFS search the compressed grid for an empty compatible subtile.
633
+ size_t num_rows = compressed_block_grid.get_num_rows (0 );
634
+ size_t num_cols = compressed_block_grid.get_num_columns (0 );
635
+ vtr::NdMatrix<bool , 2 > visited ({num_cols, num_rows}, false );
636
+ float best_dist = std::numeric_limits<float >::max ();
637
+ t_pl_loc best_loc (OPEN, OPEN, OPEN, OPEN);
638
+
639
+ std::queue<t_physical_tile_loc> loc_queue;
640
+ loc_queue.push (compressed_src_loc);
641
+ while (!loc_queue.empty ()) {
642
+ // Pop the top element off the queue.
643
+ t_physical_tile_loc loc = loc_queue.front ();
644
+ loc_queue.pop ();
645
+
646
+ // If this location has already been visited, skip it.
647
+ if (visited[loc.x ][loc.y ])
648
+ continue ;
649
+ visited[loc.x ][loc.y ] = true ;
650
+
651
+ // Get the distance from this loc to the src_loc in grid space
652
+ // Note: In compressed space, distances are not what they appear. We are
653
+ // using the true grid positions to get the truly closest loc.
654
+ // Here, we use the flat centroid of the macro and measure its
655
+ // distance from the center of this grid loc (hence the 0.5 offset).
656
+ auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc (loc);
657
+ float grid_dx = std::abs ((float )grid_loc.x + 0 .5f - src_flat_loc.x );
658
+ float grid_dy = std::abs ((float )grid_loc.y + 0 .5f - src_flat_loc.y );
659
+ float grid_dist = grid_dx + grid_dy;
660
+ // If this distance is worst than the best we have seen or is outside
661
+ // of the search distance, do not explore it or its neighbors.
662
+ // NOTE: This prune is always safe (i.e. it will never remove a better
663
+ // solution) since this is a spatial graph and our objective is
664
+ // positional distance. The un-visitied neighbors of a node should
665
+ // have a higher distance than the current node.
666
+ if (grid_dist >= best_dist)
667
+ continue ;
668
+
669
+ // In order to ensure our BFS finds the closest compatible location, we
670
+ // traverse compressed grid locations which may not actually be valid
671
+ // (i.e. no tile exists there). This is fine, we just need to check for
672
+ // them to ensure we never try to put a cluster there.
673
+ bool is_valid_compressed_loc = false ;
674
+ const auto & compressed_col_blk_map = compressed_block_grid.get_column_block_map (loc.x , 0 );
675
+ if (compressed_col_blk_map.count (loc.y ) != 0 )
676
+ is_valid_compressed_loc = true ;
677
+
678
+ // If this distance is better than the best we have seen so far, try
679
+ // to see if this is a better solution.
680
+ if (is_valid_compressed_loc) {
681
+ // Check if a sub-tile is available at this location.
682
+ const t_physical_tile_type_ptr phy_type = g_vpr_ctx.device ().grid .get_physical_type (grid_loc);
683
+ const auto & compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num (phy_type->index );
684
+ int new_sub_tile = -1 ;
685
+ for (int sub_tile : compatible_sub_tiles) {
686
+ if (blk_loc_registry.grid_blocks ().is_sub_tile_empty (grid_loc, sub_tile)) {
687
+ new_sub_tile = sub_tile;
688
+ break ;
689
+ }
690
+ }
691
+ if (new_sub_tile != -1 ) {
692
+ // If a sub-tile is available, set this to be the first sub-tile
693
+ // available and check if this site is legal for this macro.
694
+ // TODO: Should we pick a random sub-tile instead?
695
+ // Note: We are usin the fully legality check here to check for
696
+ // floorplanning constraints and compatibility for all
697
+ // members of the macro. This prevents some macros being
698
+ // placed where they obviously cannot be implemented.
699
+ t_pl_loc new_loc = t_pl_loc (grid_loc.x , grid_loc.y , new_sub_tile, grid_loc.layer_num );
700
+ bool site_legal_for_macro = macro_can_be_placed (pl_macro,
701
+ new_loc,
702
+ true /* check_all_legality*/ ,
703
+ blk_loc_registry);
704
+ if (site_legal_for_macro) {
705
+ // Update the best solition.
706
+ // Note: We need to keep searching since the compressed grid
707
+ // may present a location which is closer in compressed
708
+ // space earlier than a location which is closer in
709
+ // grid space.
710
+ best_dist = grid_dist;
711
+ best_loc = new_loc;
712
+ }
713
+ }
714
+ }
715
+
716
+ // Push the neighbors (in the compressed grid) onto the queue.
717
+ if (loc.x > 0 ) {
718
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x - 1 ,
719
+ loc.y ,
720
+ loc.layer_num );
721
+ loc_queue.push (new_loc);
722
+ }
723
+ if (loc.x < (int )num_cols - 1 ) {
724
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x + 1 ,
725
+ loc.y ,
726
+ loc.layer_num );
727
+ loc_queue.push (new_loc);
728
+ }
729
+ if (loc.y > 0 ) {
730
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x ,
731
+ loc.y - 1 ,
732
+ loc.layer_num );
733
+ loc_queue.push (new_loc);
734
+ }
735
+ if (loc.y < (int )num_rows - 1 ) {
736
+ t_physical_tile_loc new_loc = t_physical_tile_loc (loc.x ,
737
+ loc.y + 1 ,
738
+ loc.layer_num );
739
+ loc_queue.push (new_loc);
740
+ }
741
+ }
742
+
743
+ return best_loc;
593
744
}
594
745
595
746
static bool try_centroid_placement (const t_pl_macro& pl_macro,
@@ -614,46 +765,26 @@ static bool try_centroid_placement(const t_pl_macro& pl_macro,
614
765
unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
615
766
found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
616
767
} else {
617
- // Note: AP uses a different rlim than non-AP
618
- rlim = CENTROID_NEIGHBOR_SEARCH_RLIM_AP;
619
768
// If a flat placement is provided, use the flat placement to get the
620
- // centroid.
621
- find_centroid_loc_from_flat_placement (pl_macro, centroid_loc, flat_placement_info);
622
- if (!is_loc_on_chip ({centroid_loc.x , centroid_loc.y , centroid_loc.layer }) || !is_loc_legal (centroid_loc, pr, block_type)) {
623
- // If the centroid is not legal, check for a neighboring block we
624
- // can use instead.
625
- bool neighbor_legal_loc = find_centroid_neighbor (centroid_loc,
626
- block_type,
627
- false ,
628
- rlim,
629
- blk_loc_registry,
630
- rng);
631
- if (!neighbor_legal_loc) {
632
- // If we cannot find a neighboring block, fall back on the
633
- // original find_centroid_loc function.
634
- // FIXME: We should really just skip this block and come back
635
- // to it later. We do not want it taking space from
636
- // someone else!
637
- unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
638
- found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
639
- } else {
640
- found_legal_subtile = true ;
641
- }
769
+ // centroid location of the macro.
770
+ int centroid_sub_tile = OPEN;
771
+ t_flat_pl_loc centroid_flat_loc = find_centroid_loc_from_flat_placement (pl_macro, centroid_sub_tile, flat_placement_info);
772
+ // Then find the nearest legal location to this centroid for this macro.
773
+ centroid_loc = find_nearest_compatible_loc (centroid_flat_loc,
774
+ centroid_sub_tile,
775
+ block_type,
776
+ pl_macro,
777
+ blk_loc_registry);
778
+ if (centroid_loc.x == OPEN) {
779
+ // If we cannot find a nearest block, fall back on the original
780
+ // find_centroid_loc function.
781
+ // FIXME: We should really just skip this block and come back
782
+ // to it later. We do not want it taking space from
783
+ // someone else!
784
+ unplaced_blocks_to_update_their_score = find_centroid_loc (pl_macro, centroid_loc, blk_loc_registry);
785
+ found_legal_subtile = find_subtile_in_location (centroid_loc, block_type, blk_loc_registry, pr, rng);
642
786
} else {
643
- // If this is a legal location for this block, check if any other
644
- // blocks are at this subtile location.
645
- const GridBlock& grid_blocks = blk_loc_registry.grid_blocks ();
646
- if (grid_blocks.block_at_location (centroid_loc)) {
647
- // If there is a block at this subtile, try to find another
648
- // subtile at this location to be placed in.
649
- found_legal_subtile = find_subtile_in_location (centroid_loc,
650
- block_type,
651
- blk_loc_registry,
652
- pr,
653
- rng);
654
- } else {
655
- found_legal_subtile = true ;
656
- }
787
+ found_legal_subtile = true ;
657
788
}
658
789
}
659
790
0 commit comments