Skip to content

Commit fe9089c

Browse files
authored
Merge pull request #2457 from verilog-to-routing/compressed_router_lookahead
Updating router lookahead
2 parents 3e15bf3 + 899b63a commit fe9089c

File tree

38 files changed

+4402
-3539
lines changed

38 files changed

+4402
-3539
lines changed

vpr/src/base/ShowSetup.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
373373
case e_router_lookahead::MAP:
374374
VTR_LOG("MAP\n");
375375
break;
376+
case e_router_lookahead::COMPRESSED_MAP:
377+
VTR_LOG("COMPRESSED_MAP\n");
378+
break;
376379
case e_router_lookahead::EXTENDED_MAP:
377380
VTR_LOG("EXTENDED_MAP\n");
378381
break;
@@ -512,6 +515,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
512515
case e_router_lookahead::MAP:
513516
VTR_LOG("MAP\n");
514517
break;
518+
case e_router_lookahead::COMPRESSED_MAP:
519+
VTR_LOG("COMPRESSED_MAP\n");
520+
break;
515521
case e_router_lookahead::EXTENDED_MAP:
516522
VTR_LOG("EXTENDED_MAP\n");
517523
break;
@@ -628,8 +634,8 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
628634
VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown delay_model_reducer\n");
629635
VTR_LOG("PlacerOpts.delay_model_reducer: %s\n", e_reducer_strings[(size_t)PlacerOpts.delay_model_reducer].c_str());
630636

631-
std::string place_delay_model_strings[2] = {"DELTA", "DELTA_OVERRIDE"};
632-
if ((size_t)PlacerOpts.delay_model_type > 1)
637+
std::string place_delay_model_strings[3] = {"SIMPLE", "DELTA", "DELTA_OVERRIDE"};
638+
if ((size_t)PlacerOpts.delay_model_type > 2)
633639
VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown delay_model_type\n");
634640
VTR_LOG("PlacerOpts.delay_model_type: %s\n", place_delay_model_strings[(size_t)PlacerOpts.delay_model_type].c_str());
635641
}

vpr/src/base/read_options.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -902,11 +902,14 @@ struct ParseRouteBBUpdate {
902902

903903
struct ParseRouterLookahead {
904904
ConvertedValue<e_router_lookahead> from_str(std::string str) {
905+
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
905906
ConvertedValue<e_router_lookahead> conv_value;
906907
if (str == "classic")
907908
conv_value.set_value(e_router_lookahead::CLASSIC);
908909
else if (str == "map")
909910
conv_value.set_value(e_router_lookahead::MAP);
911+
else if (str == "compressed_map")
912+
conv_value.set_value(e_router_lookahead::COMPRESSED_MAP);
910913
else if (str == "extended_map")
911914
conv_value.set_value(e_router_lookahead::EXTENDED_MAP);
912915
else {
@@ -926,6 +929,8 @@ struct ParseRouterLookahead {
926929
conv_value.set_value("classic");
927930
else if (val == e_router_lookahead::MAP) {
928931
conv_value.set_value("map");
932+
} else if (val == e_router_lookahead::COMPRESSED_MAP) {
933+
conv_value.set_value("compressed_map");
929934
} else {
930935
VTR_ASSERT(val == e_router_lookahead::EXTENDED_MAP);
931936
conv_value.set_value("extended_map");
@@ -934,14 +939,16 @@ struct ParseRouterLookahead {
934939
}
935940

936941
std::vector<std::string> default_choices() {
937-
return {"classic", "map", "extended_map"};
942+
return {"classic", "map", "compressed_map", "extended_map"};
938943
}
939944
};
940945

941946
struct ParsePlaceDelayModel {
942947
ConvertedValue<PlaceDelayModelType> from_str(std::string str) {
943948
ConvertedValue<PlaceDelayModelType> conv_value;
944-
if (str == "delta")
949+
if (str == "simple") {
950+
conv_value.set_value(PlaceDelayModelType::SIMPLE);
951+
} else if (str == "delta")
945952
conv_value.set_value(PlaceDelayModelType::DELTA);
946953
else if (str == "delta_override")
947954
conv_value.set_value(PlaceDelayModelType::DELTA_OVERRIDE);
@@ -955,7 +962,9 @@ struct ParsePlaceDelayModel {
955962

956963
ConvertedValue<std::string> to_str(PlaceDelayModelType val) {
957964
ConvertedValue<std::string> conv_value;
958-
if (val == PlaceDelayModelType::DELTA)
965+
if (val == PlaceDelayModelType::SIMPLE)
966+
conv_value.set_value("simple");
967+
else if (val == PlaceDelayModelType::DELTA)
959968
conv_value.set_value("delta");
960969
else if (val == PlaceDelayModelType::DELTA_OVERRIDE)
961970
conv_value.set_value("delta_override");
@@ -968,7 +977,7 @@ struct ParsePlaceDelayModel {
968977
}
969978

970979
std::vector<std::string> default_choices() {
971-
return {"delta", "delta_override"};
980+
return {"simple", "delta", "delta_override"};
972981
}
973982
};
974983

@@ -2245,6 +2254,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
22452254
"This option controls what information is considered and how"
22462255
" the placement delay model is constructed.\n"
22472256
"Valid options:\n"
2257+
" * 'simple' uses map router lookahead\n"
22482258
" * 'delta' uses differences in position only\n"
22492259
" * 'delta_override' uses differences in position with overrides for direct connects\n")
22502260
.default_value("delta")
@@ -2566,6 +2576,8 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
25662576
" * classic: The classic VPR lookahead (may perform better on un-buffered routing\n"
25672577
" architectures)\n"
25682578
" * map: An advanced lookahead which accounts for diverse wire type\n"
2579+
" * compressed_map: The algorithm is similar to map lookahead with the exception of saprse sampling of the chip"
2580+
" to reduce the run-time to build the router lookahead and also its memory footprint\n"
25692581
" * extended_map: A more advanced and extended lookahead which accounts for a more\n"
25702582
" exhaustive node sampling method\n"
25712583
"\n"

vpr/src/base/vpr_types.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,11 @@ constexpr auto INVALID_BLOCK_ID = ClusterBlockId(-2);
119119
#endif
120120

121121
enum class e_router_lookahead {
122-
CLASSIC, ///<VPR's classic lookahead (assumes uniform wire types)
123-
MAP, ///<Lookahead considering different wire types (see Oleg Petelin's MASc Thesis)
124-
EXTENDED_MAP, ///<Lookahead with a more extensive node sampling method
125-
NO_OP ///<A no-operation lookahead which always returns zero
122+
CLASSIC, ///<VPR's classic lookahead (assumes uniform wire types)
123+
MAP, ///<Lookahead considering different wire types (see Oleg Petelin's MASc Thesis)
124+
COMPRESSED_MAP, /// Similar to MAP, but use a sparse sampling of the chip
125+
EXTENDED_MAP, ///<Lookahead with a more extensive node sampling method
126+
NO_OP ///<A no-operation lookahead which always returns zero
126127
};
127128

128129
enum class e_route_bb_update {
@@ -1129,6 +1130,7 @@ enum e_place_effort_scaling {
11291130
};
11301131

11311132
enum class PlaceDelayModelType {
1133+
SIMPLE,
11321134
DELTA, ///<Delta x/y based delay model
11331135
DELTA_OVERRIDE, ///<Delta x/y based delay model with special case delay overrides
11341136
};

vpr/src/place/place.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,6 @@ void try_place(const Netlist<>& net_list,
619619
if (placer_opts.place_algorithm.is_timing_driven()) {
620620
/*do this before the initial placement to avoid messing up the initial placement */
621621
place_delay_model = alloc_lookups_and_delay_model(net_list,
622-
device_ctx.arch_switch_inf,
623622
chan_width_dist,
624623
placer_opts,
625624
router_opts,

vpr/src/place/place_delay_model.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,14 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr<DeltaDelayModel> b
160160
base_delay_model_ = std::move(base_delay_model_obj);
161161
}
162162

163+
float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const {
164+
int delta_x = std::abs(from_loc.x - to_loc.x);
165+
int delta_y = std::abs(from_loc.y - to_loc.y);
166+
167+
int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index;
168+
return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y];
169+
}
170+
163171
/**
164172
* When writing capnp targetted serialization, always allow compilation when
165173
* VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead.
@@ -319,7 +327,6 @@ void OverrideDelayModel::write(const std::string& file) const {
319327

320328
///@brief Initialize the placer delay model.
321329
std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
322-
const std::vector<t_arch_switch_inf>& arch_switch_inf,
323330
t_chan_width_dist chan_width_dist,
324331
const t_placer_opts& placer_opts,
325332
const t_router_opts& router_opts,
@@ -331,7 +338,6 @@ std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>&
331338
return compute_place_delay_model(placer_opts,
332339
router_opts,
333340
net_list,
334-
arch_switch_inf,
335341
det_routing_arch,
336342
segment_inf,
337343
chan_width_dist,

vpr/src/place/place_delay_model.h

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class PlaceDelayModel;
2929

3030
///@brief Initialize the placer delay model.
3131
std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(const Netlist<>& net_list,
32-
const std::vector<t_arch_switch_inf>& arch_switch_inf,
3332
t_chan_width_dist chan_width_dist,
3433
const t_placer_opts& place_opts,
3534
const t_router_opts& router_opts,
@@ -113,7 +112,13 @@ class DeltaDelayModel : public PlaceDelayModel {
113112

114113
private:
115114
vtr::NdMatrix<float, 3> delays_; // [0..num_layers-1][0..max_dx][0..max_dy]
115+
/**
116+
* @brief The minimum delay of inter-layer connections
117+
*/
116118
float cross_layer_delay_;
119+
/**
120+
* @brief Indicates whether the router is a two-stage or run-flat
121+
*/
117122
bool is_flat_;
118123
};
119124

@@ -144,8 +149,13 @@ class OverrideDelayModel : public PlaceDelayModel {
144149

145150
private:
146151
std::unique_ptr<DeltaDelayModel> base_delay_model_;
147-
/* Minimum delay of cross-layer connections */
152+
/**
153+
* @brief Minimum delay of cross-layer connections
154+
*/
148155
float cross_layer_delay_;
156+
/**
157+
* @brief Indicates whether the router is a two-stage or run-flat
158+
*/
149159
bool is_flat_;
150160

151161
void compute_override_delay_model(RouterDelayProfiler& router,
@@ -217,3 +227,33 @@ class OverrideDelayModel : public PlaceDelayModel {
217227
static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts");
218228
static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts");
219229
};
230+
231+
///@brief A simple delay model based on the information stored in router lookahead
232+
/// This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router
233+
class SimpleDelayModel : public PlaceDelayModel {
234+
public:
235+
SimpleDelayModel() {}
236+
237+
void compute(
238+
RouterDelayProfiler& router,
239+
const t_placer_opts& placer_opts,
240+
const t_router_opts& router_opts,
241+
int longest_length) override;
242+
float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override;
243+
void dump_echo(std::string /*filepath*/) const override {}
244+
245+
void read(const std::string& /*file*/) override {}
246+
void write(const std::string& /*file*/) const override {}
247+
248+
private:
249+
/**
250+
* @brief The matrix to store the minimum delay between different points on different layers.
251+
*
252+
*The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers
253+
*for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs
254+
*on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers
255+
*is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1.
256+
*One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular.
257+
*/
258+
vtr::NdMatrix<float, 5> delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy]
259+
};

vpr/src/place/timing_place_lookup.cpp

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,13 @@ static vtr::NdMatrix<float, 3> compute_delta_delay_model(
143143
int longest_length,
144144
bool is_flat);
145145

146+
/**
147+
* @brief Use the information in the router lookahead to fill the delay matrix instead of running the router
148+
* @param route_profiler
149+
* @return The delay matrix that contain the minimum cost between two locations
150+
*/
151+
static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler);
152+
146153
static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
147154
t_physical_tile_type_ptr from_type,
148155
int from_pin,
@@ -167,7 +174,6 @@ static float find_neightboring_average(vtr::NdMatrix<float, 3>& matrix, t_physic
167174
std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
168175
const t_router_opts& router_opts,
169176
const Netlist<>& net_list,
170-
const std::vector<t_arch_switch_inf>& arch_switch_inf,
171177
t_det_routing_arch* det_routing_arch,
172178
std::vector<t_segment_inf>& segment_inf,
173179
t_chan_width_dist chan_width_dist,
@@ -195,10 +201,11 @@ std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts&
195201

196202
/*now setup and compute the actual arrays */
197203
std::unique_ptr<PlaceDelayModel> place_delay_model;
198-
float min_cross_layer_delay = get_min_cross_layer_delay(arch_switch_inf,
199-
segment_inf,
200-
det_routing_arch->wire_to_arch_ipin_switch_between_dice);
201-
if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
204+
float min_cross_layer_delay = get_min_cross_layer_delay();
205+
206+
if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) {
207+
place_delay_model = std::make_unique<SimpleDelayModel>();
208+
} else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
202209
place_delay_model = std::make_unique<DeltaDelayModel>(min_cross_layer_delay, is_flat);
203210
} else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
204211
place_delay_model = std::make_unique<OverrideDelayModel>(min_cross_layer_delay, is_flat);
@@ -250,6 +257,14 @@ void OverrideDelayModel::compute(
250257
compute_override_delay_model(route_profiler, router_opts);
251258
}
252259

260+
void SimpleDelayModel::compute(
261+
RouterDelayProfiler& router,
262+
const t_placer_opts& /*placer_opts*/,
263+
const t_router_opts& /*router_opts*/,
264+
int /*longest_length*/) {
265+
delays_ = compute_simple_delay_model(router);
266+
}
267+
253268
/******* File Accessible Functions **********/
254269

255270
std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) {
@@ -1004,6 +1019,37 @@ static vtr::NdMatrix<float, 3> compute_delta_delay_model(
10041019
return delta_delays;
10051020
}
10061021

1022+
static vtr::NdMatrix<float, 5> compute_simple_delay_model(RouterDelayProfiler& route_profiler) {
1023+
const auto& grid = g_vpr_ctx.device().grid;
1024+
int num_physical_tile_types = static_cast<int>(g_vpr_ctx.device().physical_tile_types.size());
1025+
// Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height]
1026+
// The second index related to the layer that the source location is on and the third index is for the sink layer
1027+
vtr::NdMatrix<float, 5> delta_delays({static_cast<unsigned long>(num_physical_tile_types),
1028+
static_cast<unsigned long>(grid.get_num_layers()),
1029+
static_cast<unsigned long>(grid.get_num_layers()),
1030+
grid.width(),
1031+
grid.height()});
1032+
1033+
for (int physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) {
1034+
for (int from_layer = 0; from_layer < grid.get_num_layers(); ++from_layer) {
1035+
for (int to_layer = 0; to_layer < grid.get_num_layers(); ++to_layer) {
1036+
for (int dx = 0; dx < static_cast<int>(grid.width()); ++dx) {
1037+
for (int dy = 0; dy < static_cast<int>(grid.height()); ++dy) {
1038+
float min_delay = route_profiler.get_min_delay(physical_tile_type_idx,
1039+
from_layer,
1040+
to_layer,
1041+
dx,
1042+
dy);
1043+
delta_delays[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay;
1044+
}
1045+
}
1046+
}
1047+
}
1048+
}
1049+
1050+
return delta_delays;
1051+
}
1052+
10071053
//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification
10081054
static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
10091055
t_physical_tile_type_ptr from_type,

vpr/src/place/timing_place_lookup.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
66
const t_router_opts& router_opts,
77
const Netlist<>& net_list,
8-
const std::vector<t_arch_switch_inf>& arch_switch_inf,
98
t_det_routing_arch* det_routing_arch,
109
std::vector<t_segment_inf>& segment_inf,
1110
t_chan_width_dist chan_width_dist,

0 commit comments

Comments
 (0)