Skip to content

Commit 825b8be

Browse files
committed
[ParallelRouter] Added Command-Line Options for MQ-Based Parallel Router
Added four command-line options for MQ-based parallel router: (1) `--multi_queue_num_threads <# threads>` (2) `--multi_queue_num_queues <# queues>` (3) `--multi_queue_direct_draining <on/off>` (4) `--thread_affinity <off (meaning no affinity, leave OS schedule) or set a list of CPU core ID (the first one is for the main thread), e.g., 0,1,2,3 or 0-3 or 0-1,2-3 or 0,1-2,3>`
1 parent df520ae commit 825b8be

11 files changed

+168
-93
lines changed

vpr/src/base/SetupVPR.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,10 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
414414
RouterOpts->router_profiler_astar_fac = Options.router_profiler_astar_fac;
415415
RouterOpts->post_target_prune_fac = Options.post_target_prune_fac;
416416
RouterOpts->post_target_prune_offset = Options.post_target_prune_offset;
417+
RouterOpts->multi_queue_num_threads = Options.multi_queue_num_threads;
418+
RouterOpts->multi_queue_num_queues = Options.multi_queue_num_queues;
419+
RouterOpts->multi_queue_direct_draining = Options.multi_queue_direct_draining;
420+
RouterOpts->thread_affinity = Options.thread_affinity;
417421
RouterOpts->bb_factor = Options.bb_factor;
418422
RouterOpts->criticality_exp = Options.criticality_exp;
419423
RouterOpts->max_criticality = Options.max_criticality;

vpr/src/base/ShowSetup.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,12 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
257257
VTR_LOG("false\n");
258258
}
259259

260+
auto transform_thread_affinity_list_to_str = [](const std::vector<int>& aff) {
261+
std::string str = aff.size() ? std::to_string(aff.front()) : "off";
262+
for (size_t i = 1; i < aff.size(); str += ',' + std::to_string(aff[i++])) ;
263+
return str;
264+
};
265+
260266
if (DETAILED == RouterOpts.route_type) {
261267
VTR_LOG("RouterOpts.router_algorithm: ");
262268
switch (RouterOpts.router_algorithm) {
@@ -340,6 +346,12 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
340346
VTR_LOG("RouterOpts.astar_fac: %f\n", RouterOpts.astar_fac);
341347
VTR_LOG("RouterOpts.astar_offset: %f\n", RouterOpts.astar_offset);
342348
VTR_LOG("RouterOpts.router_profiler_astar_fac: %f\n", RouterOpts.router_profiler_astar_fac);
349+
VTR_LOG("RouterOpts.post_target_prune_fac: %f\n", RouterOpts.post_target_prune_fac);
350+
VTR_LOG("RouterOpts.post_target_prune_offset: %f\n", RouterOpts.post_target_prune_offset);
351+
VTR_LOG("RouterOpts.multi_queue_num_threads: %d\n", RouterOpts.multi_queue_num_threads);
352+
VTR_LOG("RouterOpts.multi_queue_num_queues: %d\n", RouterOpts.multi_queue_num_queues);
353+
VTR_LOG("RouterOpts.multi_queue_direct_draining: %s\n", RouterOpts.multi_queue_direct_draining ? "true" : "false");
354+
VTR_LOG("RouterOpts.thread_affinity: %s\n", transform_thread_affinity_list_to_str(RouterOpts.thread_affinity).c_str());
343355
VTR_LOG("RouterOpts.criticality_exp: %f\n", RouterOpts.criticality_exp);
344356
VTR_LOG("RouterOpts.max_criticality: %f\n", RouterOpts.max_criticality);
345357
VTR_LOG("RouterOpts.init_wirelength_abort_threshold: %f\n", RouterOpts.init_wirelength_abort_threshold);
@@ -487,6 +499,10 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
487499
VTR_LOG("RouterOpts.router_profiler_astar_fac: %f\n", RouterOpts.router_profiler_astar_fac);
488500
VTR_LOG("RouterOpts.post_target_prune_fac: %f\n", RouterOpts.post_target_prune_fac);
489501
VTR_LOG("RouterOpts.post_target_prune_offset: %f\n", RouterOpts.post_target_prune_offset);
502+
VTR_LOG("RouterOpts.multi_queue_num_threads: %d\n", RouterOpts.multi_queue_num_threads);
503+
VTR_LOG("RouterOpts.multi_queue_num_queues: %d\n", RouterOpts.multi_queue_num_queues);
504+
VTR_LOG("RouterOpts.multi_queue_direct_draining: %s\n", RouterOpts.multi_queue_direct_draining ? "true" : "false");
505+
VTR_LOG("RouterOpts.thread_affinity: %s\n", transform_thread_affinity_list_to_str(RouterOpts.thread_affinity).c_str());
490506
VTR_LOG("RouterOpts.criticality_exp: %f\n", RouterOpts.criticality_exp);
491507
VTR_LOG("RouterOpts.max_criticality: %f\n", RouterOpts.max_criticality);
492508
VTR_LOG("RouterOpts.init_wirelength_abort_threshold: %f\n", RouterOpts.init_wirelength_abort_threshold);

vpr/src/base/read_options.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,67 @@ struct ParsePostSynthNetlistUnconnOutputHandling {
12591259
}
12601260
};
12611261

1262+
struct ParseTheadAffinityList {
1263+
inline std::vector<std::string> get_tokens_split_by_delimiter(const std::string& str, char delimiter) {
1264+
std::vector<std::string> tokens;
1265+
std::string acc = "";
1266+
for(const auto &x : str) {
1267+
if (x == delimiter) {
1268+
tokens.push_back(acc);
1269+
acc = "";
1270+
} else {
1271+
acc += x;
1272+
}
1273+
}
1274+
tokens.push_back(acc);
1275+
return tokens;
1276+
}
1277+
1278+
// Parse thread/core affinity list (i.e., pin threads to specific cores).
1279+
// Formats such as `0,1,2,3,4,5,6,7` and `0-7` and `0-3,4-7` and `0,1-2,3-6,7`
1280+
// are all supported.
1281+
inline std::vector<int> parse_thread_affinity_list(const std::string& str) {
1282+
std::vector<int> thread_affinity_list;
1283+
std::vector<std::string> lv1_tokens_split_by_comma = get_tokens_split_by_delimiter(str, ',');
1284+
for (const auto &l1_token : lv1_tokens_split_by_comma) {
1285+
std::vector<std::string> lv2_tokens_split_by_dash = get_tokens_split_by_delimiter(l1_token, '-');
1286+
size_t num_lv2_tokens = lv2_tokens_split_by_dash.size();
1287+
VTR_ASSERT(num_lv2_tokens == 1 || num_lv2_tokens == 2);
1288+
if (num_lv2_tokens == 2) {
1289+
int start_core_id = std::stoi(lv2_tokens_split_by_dash[0]);
1290+
int end_core_id = std::stoi(lv2_tokens_split_by_dash[1]);
1291+
for (int i = start_core_id; i <= end_core_id; ++i) {
1292+
thread_affinity_list.push_back(i);
1293+
}
1294+
} else {
1295+
thread_affinity_list.push_back(std::stoi(lv2_tokens_split_by_dash[0]));
1296+
}
1297+
}
1298+
return thread_affinity_list;
1299+
}
1300+
1301+
ConvertedValue<std::vector<int>> from_str(const std::string& str) {
1302+
ConvertedValue<std::vector<int>> conv_value;
1303+
VTR_ASSERT(str.size() > 0);
1304+
if (str == "off") {
1305+
conv_value.set_value({});
1306+
} else {
1307+
conv_value.set_value(parse_thread_affinity_list(str));
1308+
}
1309+
return conv_value;
1310+
}
1311+
1312+
ConvertedValue<std::string> to_str(std::vector<int> val) {
1313+
ConvertedValue<std::string> conv_value;
1314+
std::string str = val.size() ? std::to_string(val.front()) : "off";
1315+
for (size_t i = 1; i < val.size(); str += ',' + std::to_string(val[i++])) ;
1316+
conv_value.set_value(str);
1317+
return conv_value;
1318+
}
1319+
1320+
std::vector<std::string> default_choices() { return {}; }
1321+
};
1322+
12621323
argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& args) {
12631324
std::string description =
12641325
"Implements the specified circuit onto the target FPGA architecture"
@@ -2504,6 +2565,26 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
25042565
.default_value("0.0")
25052566
.show_in(argparse::ShowIn::HELP_ONLY);
25062567

2568+
route_timing_grp.add_argument<int>(args.multi_queue_num_threads, "--multi_queue_num_threads")
2569+
.help("TODO")
2570+
.default_value("1")
2571+
.show_in(argparse::ShowIn::HELP_ONLY);
2572+
2573+
route_timing_grp.add_argument<int>(args.multi_queue_num_queues, "--multi_queue_num_queues")
2574+
.help("TODO")
2575+
.default_value("2")
2576+
.show_in(argparse::ShowIn::HELP_ONLY);
2577+
2578+
route_timing_grp.add_argument<bool, ParseOnOff>(args.multi_queue_direct_draining, "--multi_queue_direct_draining")
2579+
.help("TODO")
2580+
.default_value("off")
2581+
.show_in(argparse::ShowIn::HELP_ONLY);
2582+
2583+
route_timing_grp.add_argument<std::vector<int>, ParseTheadAffinityList>(args.thread_affinity, "--thread_affinity")
2584+
.help("TODO")
2585+
.default_value("off")
2586+
.show_in(argparse::ShowIn::HELP_ONLY);
2587+
25072588
route_timing_grp.add_argument(args.max_criticality, "--max_criticality")
25082589
.help(
25092590
"Sets the maximum fraction of routing cost derived from delay (vs routability) for any net."

vpr/src/base/read_options.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ struct t_options {
209209
argparse::ArgValue<float> router_profiler_astar_fac;
210210
argparse::ArgValue<float> post_target_prune_fac;
211211
argparse::ArgValue<float> post_target_prune_offset;
212+
argparse::ArgValue<int> multi_queue_num_threads;
213+
argparse::ArgValue<int> multi_queue_num_queues;
214+
argparse::ArgValue<bool> multi_queue_direct_draining;
215+
argparse::ArgValue<std::vector<int>> thread_affinity;
212216
argparse::ArgValue<float> max_criticality;
213217
argparse::ArgValue<float> criticality_exp;
214218
argparse::ArgValue<float> router_init_wirelength_abort_threshold;

vpr/src/base/vpr_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,10 @@ struct t_router_opts {
14251425
float router_profiler_astar_fac;
14261426
float post_target_prune_fac;
14271427
float post_target_prune_offset;
1428+
int multi_queue_num_threads;
1429+
int multi_queue_num_queues;
1430+
bool multi_queue_direct_draining;
1431+
std::vector<int> thread_affinity;
14281432
float max_criticality;
14291433
float criticality_exp;
14301434
float init_wirelength_abort_threshold;

vpr/src/route/SerialNetlistRouter.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class SerialNetlistRouter : public NetlistRouter {
2121
const RoutingPredictor& routing_predictor,
2222
const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
2323
bool is_flat)
24-
: _serial_router(_make_router(router_lookahead, is_flat, false))
25-
, _parallel_router(_make_router(router_lookahead, is_flat, true))
24+
: _serial_router(_make_router(router_lookahead, router_opts, is_flat, false))
25+
, _parallel_router(_make_router(router_lookahead, router_opts, is_flat, true))
2626
, _net_list(net_list)
2727
, _router_opts(router_opts)
2828
, _connections_inf(connections_inf)
@@ -45,8 +45,10 @@ class SerialNetlistRouter : public NetlistRouter {
4545

4646
private:
4747
bool should_use_parallel_connection_router(const ParentNetId &net_id, int itry, float pres_fac, float worst_neg_slack);
48-
49-
ConnectionRouterInterface *_make_router(const RouterLookahead* router_lookahead, bool is_flat, bool is_parallel) {
48+
49+
ConnectionRouterInterface *_make_router(const RouterLookahead* router_lookahead,
50+
const t_router_opts& router_opts,
51+
bool is_flat, bool is_parallel) {
5052
auto& device_ctx = g_vpr_ctx.device();
5153
auto& route_ctx = g_vpr_ctx.mutable_routing();
5254

@@ -71,7 +73,11 @@ class SerialNetlistRouter : public NetlistRouter {
7173
device_ctx.rr_rc_data,
7274
device_ctx.rr_graph.rr_switch(),
7375
route_ctx.rr_node_route_inf,
74-
is_flat);
76+
is_flat,
77+
router_opts.multi_queue_num_threads,
78+
router_opts.multi_queue_num_queues,
79+
router_opts.multi_queue_direct_draining,
80+
router_opts.thread_affinity);
7581
}
7682
}
7783
/* Context fields */

vpr/src/route/multi_queue_priority_queue.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#ifndef _MULTI_QUEUE_PRIORITY_QUEUE_H
22
#define _MULTI_QUEUE_PRIORITY_QUEUE_H
33

4-
// #define MQ_IO_ENABLE_CLEAR_FOR_POP
4+
// This is only used to enable the clearing code in the MQIO codebase. Whether
5+
// using queue draining optimization only depends on the VPR command-line option
6+
// `--multi_queue_direct_draining` setting during runtime. If the option is set
7+
// to `off`, the queue draining won't work since the `setMinPrioForPop` won't be
8+
// called leaving the `minPrioForPop` in MQIO object always as float maximum.
9+
#define MQ_IO_ENABLE_CLEAR_FOR_POP
510

611
#include "heap_type.h"
712

vpr/src/route/parallel_connection_router.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ void ParallelConnectionRouter::timing_driven_route_connection_from_heap_thread_f
392392
while (heap_.try_pop(new_total_cost, inode)) {
393393
#ifdef PROFILE_HEAP_OCCUPANCY
394394
if (thread_idx == 0) {
395-
if (count % (1000 / mq_num_threads) == 0) {
395+
if (count % 1000 == 0) {
396396
heap_occ_profile_ << count << " " << heap_.getHeapOccupancy() << "\n";
397397
}
398398
count ++;
@@ -631,7 +631,9 @@ void ParallelConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_param
631631

632632
if (to_node == target_node) {
633633
#ifdef MQ_IO_ENABLE_CLEAR_FOR_POP
634-
heap_.setMinPrioForPop(new_total_cost);
634+
if (multi_queue_direct_draining_) {
635+
heap_.setMinPrioForPop(new_total_cost);
636+
}
635637
#endif
636638
return ;
637639
}

0 commit comments

Comments
 (0)