Skip to content

Commit 691dc2d

Browse files
committed
[Libomptarget] Begin implementing support for RPC services
This patch adds the intial support for running an RPC server in libomptarget to handle host services. We interface with the library provided by the `libc` project to stand up a basic server. We introduce a new type that is controlled by the plugin and has each device intialize its interface. We then run a basic server to check the RPC buffer. This patch does not fully implement the interface. In the future each plugin will want to define special handlers via the interface to support things like malloc or H2D copies coming from RPC. We will also want to allow the plugin to specify t he number of ports. This is currently capped in the implementation but will be adjusted soon. Right now running the server is handled by whatever thread ends up doing the waiting. This is probably not a completely sound solution but I am not overly familiar with the behaviour of OpenMP tasks and what would be required here. This works okay with synchrnous regions, and somewhat fine with `nowait` regions, but I've observed some weird behavior when one of those regions calls `exit`. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D154312
1 parent fa78983 commit 691dc2d

File tree

13 files changed

+491
-14
lines changed

13 files changed

+491
-14
lines changed

libc/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ include(LLVMLibCArchitectures)
9292
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
9393
set(LIBC_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
9494
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/gpu-none-llvm)
95-
set(LIBC_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/lib)
95+
set(LIBC_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
9696
elseif(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND LIBC_ENABLE_USE_BY_CLANG)
9797
set(LIBC_INCLUDE_DIR ${LLVM_BINARY_DIR}/include/${LLVM_DEFAULT_TARGET_TRIPLE})
9898
set(LIBC_INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}/${LLVM_DEFAULT_TARGET_TRIPLE})

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,17 +519,25 @@ struct AMDGPUSignalTy {
519519
}
520520

521521
/// Wait until the signal gets a zero value.
522-
Error wait(const uint64_t ActiveTimeout = 0) const {
523-
if (ActiveTimeout) {
522+
Error wait(const uint64_t ActiveTimeout = 0,
523+
RPCHandleTy *RPCHandle = nullptr) const {
524+
if (ActiveTimeout && !RPCHandle) {
524525
hsa_signal_value_t Got = 1;
525526
Got = hsa_signal_wait_scacquire(Signal, HSA_SIGNAL_CONDITION_EQ, 0,
526527
ActiveTimeout, HSA_WAIT_STATE_ACTIVE);
527528
if (Got == 0)
528529
return Plugin::success();
529530
}
531+
532+
// If there is an RPC device attached to this stream we run it as a server.
533+
uint64_t Timeout = RPCHandle ? 8192 : UINT64_MAX;
534+
auto WaitState = RPCHandle ? HSA_WAIT_STATE_ACTIVE : HSA_WAIT_STATE_BLOCKED;
530535
while (hsa_signal_wait_scacquire(Signal, HSA_SIGNAL_CONDITION_EQ, 0,
531-
UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0)
532-
;
536+
Timeout, WaitState) != 0) {
537+
if (RPCHandle)
538+
if (auto Err = RPCHandle->runServer())
539+
return Err;
540+
}
533541
return Plugin::success();
534542
}
535543

@@ -895,6 +903,11 @@ struct AMDGPUStreamTy {
895903
/// operation that was already finalized in a previous stream sycnhronize.
896904
uint32_t SyncCycle;
897905

906+
/// A pointer associated with an RPC server running on the given device. If
907+
/// RPC is not being used this will be a null pointer. Otherwise, this
908+
/// indicates that an RPC server is expected to be run on this stream.
909+
RPCHandleTy *RPCHandle;
910+
898911
/// Mutex to protect stream's management.
899912
mutable std::mutex Mutex;
900913

@@ -1050,6 +1063,9 @@ struct AMDGPUStreamTy {
10501063
/// Deinitialize the stream's signals.
10511064
Error deinit() { return Plugin::success(); }
10521065

1066+
/// Attach an RPC handle to this stream.
1067+
void setRPCHandle(RPCHandleTy *Handle) { RPCHandle = Handle; }
1068+
10531069
/// Push a asynchronous kernel to the stream. The kernel arguments must be
10541070
/// placed in a special allocation for kernel args and must keep alive until
10551071
/// the kernel finalizes. Once the kernel is finished, the stream will release
@@ -1264,7 +1280,8 @@ struct AMDGPUStreamTy {
12641280
return Plugin::success();
12651281

12661282
// Wait until all previous operations on the stream have completed.
1267-
if (auto Err = Slots[last()].Signal->wait(StreamBusyWaitMicroseconds))
1283+
if (auto Err =
1284+
Slots[last()].Signal->wait(StreamBusyWaitMicroseconds, RPCHandle))
12681285
return Err;
12691286

12701287
// Reset the stream and perform all pending post actions.
@@ -1786,6 +1803,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
17861803
/// AMDGPU devices do not have the concept of contexts.
17871804
Error setContext() override { return Plugin::success(); }
17881805

1806+
/// We want to set up the RPC server for host services to the GPU if it is
1807+
/// availible.
1808+
bool shouldSetupRPCServer() const override {
1809+
return libomptargetSupportsRPC();
1810+
}
1811+
17891812
/// Get the stream of the asynchronous info sructure or get a new one.
17901813
AMDGPUStreamTy &getStream(AsyncInfoWrapperTy &AsyncInfoWrapper) {
17911814
AMDGPUStreamTy *&Stream = AsyncInfoWrapper.getQueueAs<AMDGPUStreamTy *>();
@@ -2507,7 +2530,7 @@ AMDGPUStreamTy::AMDGPUStreamTy(AMDGPUDeviceTy &Device)
25072530
: Agent(Device.getAgent()), Queue(Device.getNextQueue()),
25082531
SignalManager(Device.getSignalManager()),
25092532
// Initialize the std::deque with some empty positions.
2510-
Slots(32), NextSlot(0), SyncCycle(0),
2533+
Slots(32), NextSlot(0), SyncCycle(0), RPCHandle(nullptr),
25112534
StreamBusyWaitMicroseconds(Device.getStreamBusyWaitMicroseconds()) {}
25122535

25132536
/// Class implementing the AMDGPU-specific functionalities of the global
@@ -2837,6 +2860,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
28372860
AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(GenericDevice);
28382861
AMDGPUStreamTy &Stream = AMDGPUDevice.getStream(AsyncInfoWrapper);
28392862

2863+
// If this kernel requires an RPC server we attach its pointer to the stream.
2864+
if (GenericDevice.getRPCHandle())
2865+
Stream.setRPCHandle(GenericDevice.getRPCHandle());
2866+
28402867
// Push the kernel launch into the stream.
28412868
return Stream.pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
28422869
GroupSize, ArgsMemoryManager);

openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# NOTE: Don't try to build `PluginInterface` using `add_llvm_library` because we
1414
# don't want to export `PluginInterface` while `add_llvm_library` requires that.
1515
add_library(PluginInterface OBJECT
16-
PluginInterface.cpp GlobalHandler.cpp JIT.cpp)
16+
PluginInterface.cpp GlobalHandler.cpp JIT.cpp RPC.cpp)
1717

1818
# Only enable JIT for those targets that LLVM can support.
1919
string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" TargetsSupported)
@@ -62,6 +62,25 @@ target_link_libraries(PluginInterface
6262
MemoryManager
6363
)
6464

65+
# Include the RPC server from the `libc` project if availible.
66+
set(libomptarget_supports_rpc FALSE)
67+
if(TARGET llvmlibc_rpc_server)
68+
target_link_libraries(PluginInterface PRIVATE llvmlibc_rpc_server)
69+
target_compile_definitions(PluginInterface PRIVATE LIBOMPTARGET_RPC_SUPPORT)
70+
set(libomptarget_supports_rpc TRUE)
71+
else()
72+
find_library(llvmlibc_rpc_server NAMES llvmlibc_rpc_server
73+
PATHS ${LIBOMPTARGET_LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
74+
if(llvmlibc_rpc_server)
75+
message(WARNING ${llvmlibc_rpc_server})
76+
target_link_libraries(PluginInterface PRIVATE llvmlibc_rpc_server)
77+
target_compile_definitions(PluginInterface PRIVATE LIBOMPTARGET_RPC_SUPPORT)
78+
set(libomptarget_supports_rpc TRUE)
79+
endif()
80+
endif()
81+
set(LIBOMPTARGET_GPU_LIBC_SUPPORT ${libomptarget_supports_rpc} CACHE BOOL
82+
"Libomptarget support for the GPU libc")
83+
6584
if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
6685
target_link_libraries(PluginInterface PUBLIC OMPT)
6786
endif()

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
396396
OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 32),
397397
DeviceId(DeviceId), GridValues(OMPGridValues),
398398
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
399-
PinnedAllocs(*this) {}
399+
PinnedAllocs(*this), RPCHandle(nullptr) {}
400400

401401
Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
402402
if (auto Err = initImpl(Plugin))
@@ -453,6 +453,10 @@ Error GenericDeviceTy::deinit() {
453453
if (RecordReplay.isRecordingOrReplaying())
454454
RecordReplay.deinit();
455455

456+
if (RPCHandle)
457+
if (auto Err = RPCHandle->deinitDevice())
458+
return std::move(Err);
459+
456460
return deinitImpl();
457461
}
458462

@@ -493,6 +497,9 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
493497
if (auto Err = registerOffloadEntries(*Image))
494498
return std::move(Err);
495499

500+
if (auto Err = setupRPCServer(Plugin, *Image))
501+
return std::move(Err);
502+
496503
// Return the pointer to the table of entries.
497504
return Image->getOffloadEntryTable();
498505
}
@@ -525,6 +532,33 @@ Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
525532
return Plugin::success();
526533
}
527534

535+
Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
536+
DeviceImageTy &Image) {
537+
// The plugin either does not need an RPC server or it is unavailible.
538+
if (!shouldSetupRPCServer())
539+
return Plugin::success();
540+
541+
// Check if this device needs to run an RPC server.
542+
RPCServerTy &Server = Plugin.getRPCServer();
543+
auto UsingOrErr =
544+
Server.isDeviceUsingRPC(*this, Plugin.getGlobalHandler(), Image);
545+
if (!UsingOrErr)
546+
return UsingOrErr.takeError();
547+
548+
if (!UsingOrErr.get())
549+
return Plugin::success();
550+
551+
if (auto Err = Server.initDevice(*this, Plugin.getGlobalHandler(), Image))
552+
return std::move(Err);
553+
554+
auto DeviceOrErr = Server.getDevice(*this);
555+
if (!DeviceOrErr)
556+
return DeviceOrErr.takeError();
557+
RPCHandle = *DeviceOrErr;
558+
DP("Running an RPC server on device %d\n", getDeviceId());
559+
return Plugin::success();
560+
}
561+
528562
Error GenericDeviceTy::registerOffloadEntries(DeviceImageTy &Image) {
529563
const __tgt_offload_entry *Begin = Image.getTgtImage()->EntriesBegin;
530564
const __tgt_offload_entry *End = Image.getTgtImage()->EntriesEnd;
@@ -1088,6 +1122,9 @@ Error GenericPluginTy::init() {
10881122
GlobalHandler = Plugin::createGlobalHandler();
10891123
assert(GlobalHandler && "Invalid global handler");
10901124

1125+
RPCServer = new RPCServerTy(NumDevices);
1126+
assert(RPCServer && "Invalid RPC server");
1127+
10911128
return Plugin::success();
10921129
}
10931130

@@ -1105,6 +1142,9 @@ Error GenericPluginTy::deinit() {
11051142
assert(!Devices[DeviceId] && "Device was not deinitialized");
11061143
}
11071144

1145+
if (RPCServer)
1146+
delete RPCServer;
1147+
11081148
// Perform last deinitializations on the plugin.
11091149
return deinitImpl();
11101150
}
@@ -1139,6 +1179,14 @@ Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
11391179
return Plugin::success();
11401180
}
11411181

1182+
const bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
1183+
#ifdef LIBOMPTARGET_RPC_SUPPORT
1184+
return true;
1185+
#else
1186+
return false;
1187+
#endif
1188+
}
1189+
11421190
/// Exposed library API function, basically wrappers around the GenericDeviceTy
11431191
/// functionality with the same name. All non-async functions are redirected
11441192
/// to the async versions right away with a NULL AsyncInfoPtr.

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "GlobalHandler.h"
2525
#include "JIT.h"
2626
#include "MemoryManager.h"
27+
#include "RPC.h"
2728
#include "Utilities.h"
2829
#include "omptarget.h"
2930

@@ -600,6 +601,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
600601
/// this behavior by overriding the shouldSetupDeviceEnvironment function.
601602
Error setupDeviceEnvironment(GenericPluginTy &Plugin, DeviceImageTy &Image);
602603

604+
// Setup the RPC server for this device if needed. This may not run on some
605+
// plugins like the CPU targets. By default, it will not be executed so it is
606+
// up to the target to override this using the shouldSetupRPCServer function.
607+
Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image);
608+
603609
/// Register the offload entries for a specific image on the device.
604610
Error registerOffloadEntries(DeviceImageTy &Image);
605611

@@ -751,6 +757,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
751757
return OMPX_MinThreadsForLowTripCount;
752758
}
753759

760+
/// Get the RPC server running on this device.
761+
RPCHandleTy *getRPCHandle() const { return RPCHandle; }
762+
754763
private:
755764
/// Register offload entry for global variable.
756765
Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
@@ -780,6 +789,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
780789
/// setupDeviceEnvironment() function.
781790
virtual bool shouldSetupDeviceEnvironment() const { return true; }
782791

792+
/// Indicate whether or not the device should setup the RPC server. This is
793+
/// only necessary for unhosted targets like the GPU.
794+
virtual bool shouldSetupRPCServer() const { return false; }
795+
783796
/// Pointer to the memory manager or nullptr if not available.
784797
MemoryManagerTy *MemoryManager;
785798

@@ -837,6 +850,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
837850

838851
/// Map of host pinned allocations used for optimize device transfers.
839852
PinnedAllocationMapTy PinnedAllocs;
853+
854+
/// A pointer to an RPC server instance attached to this device if present.
855+
/// This is used to run the RPC server during task synchronization.
856+
RPCHandleTy *RPCHandle;
840857
};
841858

842859
/// Class implementing common functionalities of offload plugins. Each plugin
@@ -892,6 +909,12 @@ struct GenericPluginTy {
892909
/// plugin.
893910
JITEngine &getJIT() { return JIT; }
894911

912+
/// Get a reference to the RPC server used to provide host services.
913+
RPCServerTy &getRPCServer() {
914+
assert(RPCServer && "RPC server not initialized");
915+
return *RPCServer;
916+
}
917+
895918
/// Get the OpenMP requires flags set for this plugin.
896919
int64_t getRequiresFlags() const { return RequiresFlags; }
897920

@@ -946,6 +969,9 @@ struct GenericPluginTy {
946969

947970
/// The JIT engine shared by all devices connected to this plugin.
948971
JITEngine JIT;
972+
973+
/// The interface between the plugin and the GPU for host services.
974+
RPCServerTy *RPCServer;
949975
};
950976

951977
/// Class for simplifying the getter operation of the plugin. Anywhere on the
@@ -1209,6 +1235,9 @@ template <typename ResourceRef> class GenericDeviceResourceManagerTy {
12091235
std::deque<ResourceRef> ResourcePool;
12101236
};
12111237

1238+
/// A static check on whether or not we support RPC in libomptarget.
1239+
const bool libomptargetSupportsRPC();
1240+
12121241
} // namespace plugin
12131242
} // namespace target
12141243
} // namespace omp

0 commit comments

Comments
 (0)