-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Libomptarget] Replace global PluginTy::get
interface with references
#86595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Summary: We have a plugin singleton that implements the Plugin interface. This then spawns separate device and kernels. Previously when these needed to reach into the global singleton they would use the `PluginTy::get` routine to get access to it. In the future we will move away from this as the lifetime of the plugin will be handled by `libomptarget` directly. This patch removes uses of this inside of the plugin implementaion themselves by simply keeping a reference to the plugin inside of the device. The external `__tgt_rtl` functions still use the global method, but will be removed later.
@llvm/pr-subscribers-backend-amdgpu Author: Joseph Huber (jhuber6) ChangesSummary: The external Full diff: https://github.com/llvm/llvm-project/pull/86595.diff 5 Files Affected:
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 14461f14dd6706..2dd08dd5d0b4ea 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -371,7 +371,8 @@ struct AMDGPUMemoryPoolTy {
struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
/// Create an empty memory manager.
- AMDGPUMemoryManagerTy() : MemoryPool(nullptr), MemoryManager(nullptr) {}
+ AMDGPUMemoryManagerTy(AMDGPUPluginTy &Plugin)
+ : Plugin(Plugin), MemoryPool(nullptr), MemoryManager(nullptr) {}
/// Initialize the memory manager from a memory pool.
Error init(AMDGPUMemoryPoolTy &MemoryPool) {
@@ -429,6 +430,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
return OFFLOAD_SUCCESS;
}
+ /// The underlying plugin that owns this memory manager.
+ AMDGPUPluginTy &Plugin;
+
/// The memory pool used to allocate memory.
AMDGPUMemoryPoolTy *MemoryPool;
@@ -1744,9 +1748,10 @@ struct AMDGenericDeviceTy {
/// HSA host agent. We aggregate all its resources into the same instance.
struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// Create a host device from an array of host agents.
- AMDHostDeviceTy(const llvm::SmallVector<hsa_agent_t> &HostAgents)
- : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(),
- PinnedMemoryManager() {
+ AMDHostDeviceTy(AMDGPUPluginTy &Plugin,
+ const llvm::SmallVector<hsa_agent_t> &HostAgents)
+ : AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(Plugin),
+ PinnedMemoryManager(Plugin) {
assert(HostAgents.size() && "No host agent found");
}
@@ -1840,9 +1845,10 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// generic device class.
struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Create an AMDGPU device with a device id and default AMDGPU grid values.
- AMDGPUDeviceTy(int32_t DeviceId, int32_t NumDevices,
+ AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
AMDHostDeviceTy &HostDevice, hsa_agent_t Agent)
- : GenericDeviceTy(DeviceId, NumDevices, {0}), AMDGenericDeviceTy(),
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, {0}),
+ AMDGenericDeviceTy(),
OMPX_NumQueues("LIBOMPTARGET_AMDGPU_NUM_HSA_QUEUES", 4),
OMPX_QueueSize("LIBOMPTARGET_AMDGPU_HSA_QUEUE_SIZE", 512),
OMPX_DefaultTeamsPerCU("LIBOMPTARGET_AMDGPU_TEAMS_PER_CU", 4),
@@ -2088,7 +2094,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Allocate and construct an AMDGPU kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the AMDGPU kernel.
- AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate<AMDGPUKernelTy>();
+ AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>();
if (!AMDGPUKernel)
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
@@ -2138,8 +2144,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
- AMDGPUDeviceImageTy *AMDImage =
- PluginTy::get().allocate<AMDGPUDeviceImageTy>();
+ AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>();
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);
// Load the HSA executable.
@@ -2697,7 +2702,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
Error setDeviceHeapSize(uint64_t Value) override {
for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value))
+ if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
return Err;
DeviceMemoryPoolSize = Value;
return Plugin::success();
@@ -2737,7 +2742,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return utils::iterateAgentMemoryPools(
Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) {
AMDGPUMemoryPoolTy *MemoryPool =
- PluginTy::get().allocate<AMDGPUMemoryPoolTy>();
+ Plugin.allocate<AMDGPUMemoryPoolTy>();
new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool);
AllMemoryPools.push_back(MemoryPool);
return HSA_STATUS_SUCCESS;
@@ -3090,7 +3095,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
// Initialize the host device using host agents.
HostDevice = allocate<AMDHostDeviceTy>();
- new (HostDevice) AMDHostDeviceTy(HostAgents);
+ new (HostDevice) AMDHostDeviceTy(*this, HostAgents);
// Setup the memory pools of available for the host.
if (auto Err = HostDevice->init())
@@ -3116,8 +3121,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
}
/// Creates an AMDGPU device.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(),
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new AMDGPUDeviceTy(Plugin, DeviceId, NumDevices, getHostDevice(),
getKernelAgent(DeviceId));
}
@@ -3248,7 +3254,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// 56 bytes per allocation.
uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
- AMDHostDeviceTy &HostDevice = PluginTy::get<AMDGPUPluginTy>().getHostDevice();
+ AMDGPUPluginTy &AMDGPUPlugin =
+ static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
+ AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
void *AllArgs = nullptr;
@@ -3385,7 +3393,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
}
assert(Ptr && "Invalid pointer");
- auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
+ auto &KernelAgents = Plugin.getKernelAgents();
// Allow all kernel agents to access the allocation.
if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) {
@@ -3428,7 +3436,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
}
if (Alloc) {
- auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
+ auto &KernelAgents =
+ static_cast<AMDGPUPluginTy &>(Plugin).getKernelAgents();
// Inherently necessary for host or shared allocations
// Also enabled for device memory to allow device to device memcpy
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index 1440f8f678d820..b16b081e5effd1 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -610,7 +610,7 @@ class PinnedAllocationMapTy {
struct GenericDeviceTy : public DeviceAllocatorTy {
/// Construct a device with its device id within the plugin, the number of
/// devices in the plugin and the grid values for that kind of device.
- GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
+ GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
const llvm::omp::GV &GridValues);
/// Get the device identifier within the corresponding plugin. Notice that
@@ -860,6 +860,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Allocate and construct a kernel object.
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
+ /// Reference to the underlying plugin that created this device.
+ GenericPluginTy &Plugin;
+
private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
@@ -977,7 +980,8 @@ struct GenericPluginTy {
virtual Error deinitImpl() = 0;
/// Create a new device for the underlying plugin.
- virtual GenericDeviceTy *createDevice(int32_t DeviceID,
+ virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin,
+ int32_t DeviceID,
int32_t NumDevices) = 0;
/// Create a new global handler for the underlying plugin.
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 4db787a2ea9185..c32bd089d8a995 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
// Retrieve kernel environment object for the kernel.
GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
sizeof(KernelEnvironment), &KernelEnvironment);
- GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
+ GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
if (auto Err =
GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
@@ -710,9 +710,10 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}
-GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
+GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices,
const llvm::omp::GV &OMPGridValues)
- : MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
+ : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
OMP_NumTeams("OMP_NUM_TEAMS"),
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
@@ -1522,7 +1523,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) {
assert(!Devices[DeviceId] && "Device already initialized");
// Create the device and save the reference.
- GenericDeviceTy *Device = createDevice(DeviceId, NumDevices);
+ GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
assert(Device && "Invalid device");
// Save the device reference into the list.
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index d43c723c350287..fc74c6aa23fddd 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -255,8 +255,8 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
/// generic device class.
struct CUDADeviceTy : public GenericDeviceTy {
// Create a CUDA device with a device id and the default CUDA grid values.
- CUDADeviceTy(int32_t DeviceId, int32_t NumDevices)
- : GenericDeviceTy(DeviceId, NumDevices, NVPTXGridValues),
+ CUDADeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices)
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, NVPTXGridValues),
CUDAStreamManager(*this), CUDAEventManager(*this) {}
~CUDADeviceTy() {}
@@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
/// Allocate and construct a CUDA kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the CUDA kernel.
- CUDAKernelTy *CUDAKernel = PluginTy::get().allocate<CUDAKernelTy>();
+ CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>();
if (!CUDAKernel)
return Plugin::error("Failed to allocate memory for CUDA kernel");
@@ -529,8 +529,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
return std::move(Err);
// Allocate and initialize the image object.
- CUDADeviceImageTy *CUDAImage =
- PluginTy::get().allocate<CUDADeviceImageTy>();
+ CUDADeviceImageTy *CUDAImage = Plugin.allocate<CUDADeviceImageTy>();
new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage);
// Load the CUDA module.
@@ -1373,8 +1372,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }
/// Creates a CUDA device to use for offloading.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new CUDADeviceTy(DeviceId, NumDevices);
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new CUDADeviceTy(Plugin, DeviceId, NumDevices);
}
/// Creates a CUDA global handler.
diff --git a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
index ea8ed8d6a8569e..f0ce24249301af 100644
--- a/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
@@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy {
GlobalTy Global(getName(), 0);
// Get the metadata (address) of the kernel function.
- GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
+ GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global))
return Err;
@@ -132,8 +132,9 @@ struct GenELF64DeviceImageTy : public DeviceImageTy {
/// Class implementing the device functionalities for GenELF64.
struct GenELF64DeviceTy : public GenericDeviceTy {
/// Create the device with a specific id.
- GenELF64DeviceTy(int32_t DeviceId, int32_t NumDevices)
- : GenericDeviceTy(DeviceId, NumDevices, GenELF64GridValues) {}
+ GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices)
+ : GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {}
~GenELF64DeviceTy() {}
@@ -149,8 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// Construct the kernel for a specific image on the device.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the kernel.
- GenELF64KernelTy *GenELF64Kernel =
- PluginTy::get().allocate<GenELF64KernelTy>();
+ GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>();
if (!GenELF64Kernel)
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
@@ -166,8 +166,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
- GenELF64DeviceImageTy *Image =
- PluginTy::get().allocate<GenELF64DeviceImageTy>();
+ GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>();
new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage);
// Create a temporary file.
@@ -400,8 +399,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }
/// Creates a generic ELF device.
- GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
- return new GenELF64DeviceTy(DeviceId, NumDevices);
+ GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
+ int32_t NumDevices) override {
+ return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices);
}
/// Creates a generic global handler.
|
@ronlieb @dhruvachak AMD version https://gist.github.com/jhuber6/3531aff7d6cd5ae99b6d1307b6115e34. PTAL for OMPT changes. |
Summary:
We have a plugin singleton that implements the Plugin interface. This
then spawns separate device and kernels. Previously when these needed to
reach into the global singleton they would use the
PluginTy::get
routine to get access to it. In the future we will move away from this
as the lifetime of the plugin will be handled by
libomptarget
directly. This patch removes uses of this inside of the plugin
implementaion themselves by simply keeping a reference to the plugin
inside of the device.
The external
__tgt_rtl
functions still use the global method, but willbe removed later.