Skip to content

Commit 356d004

Browse files
Merge pull request verilog-to-routing#3037 from AlexandreSinger/feature-ap-multithreaded
[AP][Solver] Enabled Parallel Eigen
2 parents 824e494 + a427f9c commit 356d004

8 files changed

+47
-0
lines changed

vpr/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ set_property(CACHE VPR_PGO_CONFIG PROPERTY STRINGS prof_gen prof_use none)
1212

1313
set(VPR_PGO_DATA_DIR "." CACHE PATH "Where to store and retrieve PGO data")
1414

15+
set(VPR_ENABLE_OPEN_MP "on" CACHE STRING "Enable OpenMP when compiling VPR")
16+
1517
#Handle graphics setup
1618
set(GRAPHICS_DEFINES "")
1719

@@ -295,6 +297,21 @@ else()
295297
message(FATAL_ERROR "VPR: Unrecognized execution engine '${VPR_USE_EXECUTION_ENGINE}'")
296298
endif()
297299

300+
#
301+
# OpenMP configuration
302+
#
303+
if (VPR_ENABLE_OPEN_MP STREQUAL "on")
304+
find_package(OpenMP)
305+
if (OpenMP_CXX_FOUND)
306+
target_link_libraries(libvpr OpenMP::OpenMP_CXX)
307+
message(STATUS "OpenMP: Enabled")
308+
else()
309+
message(STATUS "OpenMP: Disabled (requested but not found)")
310+
endif()
311+
else()
312+
message(STATUS "OpenMP: Disabled")
313+
endif()
314+
298315
#
299316
# Signal handler configuration
300317
#

vpr/src/analytical_place/analytical_placement_flow.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
144144
device_ctx.physical_tile_types,
145145
pre_cluster_timing_manager,
146146
ap_opts.ap_timing_tradeoff,
147+
ap_opts.num_threads,
147148
ap_opts.log_verbosity);
148149
return global_placer->place();
149150
}

vpr/src/analytical_place/analytical_solver.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <cstdio>
1212
#include <limits>
1313
#include <memory>
14+
#include <thread>
1415
#include <utility>
1516
#include <vector>
1617
#include "PreClusterTimingManager.h"
@@ -32,6 +33,7 @@
3233
#pragma GCC diagnostic push
3334
#pragma GCC diagnostic ignored "-Wnull-dereference"
3435

36+
#include <Eigen/src/Core/products/Parallelizer.h>
3537
#include <Eigen/src/SparseCore/SparseMatrix.h>
3638
#include <Eigen/SVD>
3739
#include <Eigen/Sparse>
@@ -48,7 +50,22 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
4850
const AtomNetlist& atom_netlist,
4951
const PreClusterTimingManager& pre_cluster_timing_manager,
5052
float ap_timing_tradeoff,
53+
unsigned num_threads,
5154
int log_verbosity) {
55+
#ifdef EIGEN_INSTALLED
56+
// Set the number of threads that Eigen can use.
57+
unsigned eigen_num_threads = num_threads;
58+
if (num_threads == 0) {
59+
eigen_num_threads = std::thread::hardware_concurrency();
60+
}
61+
// Set the number of threads globally used by Eigen (if OpenMP is enabled).
62+
// NOTE: Since this is a global update, all solvers will have this number
63+
// of threads.
64+
Eigen::setNbThreads(eigen_num_threads);
65+
#else
66+
(void)num_threads;
67+
#endif // EIGEN_INSTALLED
68+
5269
// Based on the solver type passed in, build the solver.
5370
switch (solver_type) {
5471
case e_ap_analytical_solver::QP_Hybrid:

vpr/src/analytical_place/analytical_solver.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ std::unique_ptr<AnalyticalSolver> make_analytical_solver(e_ap_analytical_solver
138138
const AtomNetlist& atom_netlist,
139139
const PreClusterTimingManager& pre_cluster_timing_manager,
140140
float ap_timing_tradeoff,
141+
unsigned num_threads,
141142
int log_verbosity);
142143

143144
// The Eigen library is used to solve matrix equations in the following solvers.

vpr/src/analytical_place/global_placer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
3737
const std::vector<t_physical_tile_type>& physical_tile_types,
3838
const PreClusterTimingManager& pre_cluster_timing_manager,
3939
float ap_timing_tradeoff,
40+
unsigned num_threads,
4041
int log_verbosity) {
4142
return std::make_unique<SimPLGlobalPlacer>(analytical_solver_type,
4243
partial_legalizer_type,
@@ -48,6 +49,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
4849
physical_tile_types,
4950
pre_cluster_timing_manager,
5051
ap_timing_tradeoff,
52+
num_threads,
5153
log_verbosity);
5254
}
5355

@@ -61,6 +63,7 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_ap_analytical_solver analytical_solver_ty
6163
const std::vector<t_physical_tile_type>& physical_tile_types,
6264
const PreClusterTimingManager& pre_cluster_timing_manager,
6365
float ap_timing_tradeoff,
66+
unsigned num_threads,
6467
int log_verbosity)
6568
: GlobalPlacer(ap_netlist, log_verbosity) {
6669
// This can be a long method. Good to time this to see how long it takes to
@@ -75,6 +78,7 @@ SimPLGlobalPlacer::SimPLGlobalPlacer(e_ap_analytical_solver analytical_solver_ty
7578
atom_netlist,
7679
pre_cluster_timing_manager,
7780
ap_timing_tradeoff,
81+
num_threads,
7882
log_verbosity_);
7983

8084
// Build the density manager used by the partial legalizer.

vpr/src/analytical_place/global_placer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ std::unique_ptr<GlobalPlacer> make_global_placer(e_ap_analytical_solver analytic
8383
const std::vector<t_physical_tile_type>& physical_tile_types,
8484
const PreClusterTimingManager& pre_cluster_timing_manager,
8585
float ap_timing_tradeoff,
86+
unsigned num_threads,
8687
int log_verbosity);
8788

8889
/**
@@ -148,6 +149,7 @@ class SimPLGlobalPlacer : public GlobalPlacer {
148149
const std::vector<t_physical_tile_type>& physical_tile_types,
149150
const PreClusterTimingManager& pre_cluster_timing_manager,
150151
float ap_timing_tradeoff,
152+
unsigned num_threads,
151153
int log_verbosity);
152154

153155
/**

vpr/src/base/SetupVPR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ void SetupAPOpts(const t_options& options,
559559
apOpts.detailed_placer_type = options.ap_detailed_placer.value();
560560
apOpts.ap_timing_tradeoff = options.ap_timing_tradeoff.value();
561561
apOpts.appack_max_dist_th = options.appack_max_dist_th.value();
562+
apOpts.num_threads = options.num_workers.value();
562563
apOpts.log_verbosity = options.ap_verbosity.value();
563564
}
564565

vpr/src/base/vpr_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,8 @@ struct t_placer_opts {
11161116
* @param appack_max_dist_th
11171117
* Array of string passed by the user to configure the max candidate
11181118
* distance thresholds.
1119+
* @param num_threads
1120+
* The number of threads the AP flow can use.
11191121
* @param log_verbosity
11201122
* The verbosity level of log messages in the AP flow, with higher
11211123
* values leading to more verbose messages.
@@ -1135,6 +1137,8 @@ struct t_ap_opts {
11351137

11361138
std::vector<std::string> appack_max_dist_th;
11371139

1140+
unsigned num_threads;
1141+
11381142
int log_verbosity;
11391143
};
11401144

0 commit comments

Comments
 (0)