Skip to content

Commit 4dc3225

Browse files
authored
[Libomptarget] Replace global PluginTy::get interface with references (#86595)
Summary: We have a plugin singleton that implements the Plugin interface. This then spawns separate device and kernels. Previously when these needed to reach into the global singleton they would use the `PluginTy::get` routine to get access to it. In the future we will move away from this as the lifetime of the plugin will be handled by `libomptarget` directly. This patch removes uses of this inside of the plugin implementaion themselves by simply keeping a reference to the plugin inside of the device. The external `__tgt_rtl` functions still use the global method, but will be removed later.
1 parent bfb12ef commit 4dc3225

File tree

5 files changed

+53
-39
lines changed

5 files changed

+53
-39
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ struct AMDGPUMemoryPoolTy {
371371
struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
372372

373373
/// Create an empty memory manager.
374-
AMDGPUMemoryManagerTy() : MemoryPool(nullptr), MemoryManager(nullptr) {}
374+
AMDGPUMemoryManagerTy(AMDGPUPluginTy &Plugin)
375+
: Plugin(Plugin), MemoryPool(nullptr), MemoryManager(nullptr) {}
375376

376377
/// Initialize the memory manager from a memory pool.
377378
Error init(AMDGPUMemoryPoolTy &MemoryPool) {
@@ -429,6 +430,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
429430
return OFFLOAD_SUCCESS;
430431
}
431432

433+
/// The underlying plugin that owns this memory manager.
434+
AMDGPUPluginTy &Plugin;
435+
432436
/// The memory pool used to allocate memory.
433437
AMDGPUMemoryPoolTy *MemoryPool;
434438

@@ -1744,9 +1748,10 @@ struct AMDGenericDeviceTy {
17441748
/// HSA host agent. We aggregate all its resources into the same instance.
17451749
struct AMDHostDeviceTy : public AMDGenericDeviceTy {
17461750
/// Create a host device from an array of host agents.
1747-
AMDHostDeviceTy(const llvm::SmallVector<hsa_agent_t> &HostAgents)
1748-
: AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(),
1749-
PinnedMemoryManager() {
1751+
AMDHostDeviceTy(AMDGPUPluginTy &Plugin,
1752+
const llvm::SmallVector<hsa_agent_t> &HostAgents)
1753+
: AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(Plugin),
1754+
PinnedMemoryManager(Plugin) {
17501755
assert(HostAgents.size() && "No host agent found");
17511756
}
17521757

@@ -1840,9 +1845,10 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
18401845
/// generic device class.
18411846
struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
18421847
// Create an AMDGPU device with a device id and default AMDGPU grid values.
1843-
AMDGPUDeviceTy(int32_t DeviceId, int32_t NumDevices,
1848+
AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
18441849
AMDHostDeviceTy &HostDevice, hsa_agent_t Agent)
1845-
: GenericDeviceTy(DeviceId, NumDevices, {0}), AMDGenericDeviceTy(),
1850+
: GenericDeviceTy(Plugin, DeviceId, NumDevices, {0}),
1851+
AMDGenericDeviceTy(),
18461852
OMPX_NumQueues("LIBOMPTARGET_AMDGPU_NUM_HSA_QUEUES", 4),
18471853
OMPX_QueueSize("LIBOMPTARGET_AMDGPU_HSA_QUEUE_SIZE", 512),
18481854
OMPX_DefaultTeamsPerCU("LIBOMPTARGET_AMDGPU_TEAMS_PER_CU", 4),
@@ -2088,7 +2094,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20882094
/// Allocate and construct an AMDGPU kernel.
20892095
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
20902096
// Allocate and construct the AMDGPU kernel.
2091-
AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate<AMDGPUKernelTy>();
2097+
AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>();
20922098
if (!AMDGPUKernel)
20932099
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
20942100

@@ -2138,8 +2144,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21382144
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
21392145
int32_t ImageId) override {
21402146
// Allocate and initialize the image object.
2141-
AMDGPUDeviceImageTy *AMDImage =
2142-
PluginTy::get().allocate<AMDGPUDeviceImageTy>();
2147+
AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>();
21432148
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);
21442149

21452150
// Load the HSA executable.
@@ -2697,7 +2702,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26972702
}
26982703
Error setDeviceHeapSize(uint64_t Value) override {
26992704
for (DeviceImageTy *Image : LoadedImages)
2700-
if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value))
2705+
if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
27012706
return Err;
27022707
DeviceMemoryPoolSize = Value;
27032708
return Plugin::success();
@@ -2737,7 +2742,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
27372742
return utils::iterateAgentMemoryPools(
27382743
Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) {
27392744
AMDGPUMemoryPoolTy *MemoryPool =
2740-
PluginTy::get().allocate<AMDGPUMemoryPoolTy>();
2745+
Plugin.allocate<AMDGPUMemoryPoolTy>();
27412746
new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool);
27422747
AllMemoryPools.push_back(MemoryPool);
27432748
return HSA_STATUS_SUCCESS;
@@ -3090,7 +3095,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
30903095

30913096
// Initialize the host device using host agents.
30923097
HostDevice = allocate<AMDHostDeviceTy>();
3093-
new (HostDevice) AMDHostDeviceTy(HostAgents);
3098+
new (HostDevice) AMDHostDeviceTy(*this, HostAgents);
30943099

30953100
// Setup the memory pools of available for the host.
30963101
if (auto Err = HostDevice->init())
@@ -3116,8 +3121,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
31163121
}
31173122

31183123
/// Creates an AMDGPU device.
3119-
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
3120-
return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(),
3124+
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
3125+
int32_t NumDevices) override {
3126+
return new AMDGPUDeviceTy(Plugin, DeviceId, NumDevices, getHostDevice(),
31213127
getKernelAgent(DeviceId));
31223128
}
31233129

@@ -3248,7 +3254,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
32483254
// 56 bytes per allocation.
32493255
uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;
32503256

3251-
AMDHostDeviceTy &HostDevice = PluginTy::get<AMDGPUPluginTy>().getHostDevice();
3257+
AMDGPUPluginTy &AMDGPUPlugin =
3258+
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
3259+
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
32523260
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
32533261

32543262
void *AllArgs = nullptr;
@@ -3385,7 +3393,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
33853393
}
33863394
assert(Ptr && "Invalid pointer");
33873395

3388-
auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
3396+
auto &KernelAgents = Plugin.getKernelAgents();
33893397

33903398
// Allow all kernel agents to access the allocation.
33913399
if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) {
@@ -3428,7 +3436,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
34283436
}
34293437

34303438
if (Alloc) {
3431-
auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
3439+
auto &KernelAgents =
3440+
static_cast<AMDGPUPluginTy &>(Plugin).getKernelAgents();
34323441
// Inherently necessary for host or shared allocations
34333442
// Also enabled for device memory to allow device to device memcpy
34343443

openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ class PinnedAllocationMapTy {
610610
struct GenericDeviceTy : public DeviceAllocatorTy {
611611
/// Construct a device with its device id within the plugin, the number of
612612
/// devices in the plugin and the grid values for that kind of device.
613-
GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
613+
GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
614614
const llvm::omp::GV &GridValues);
615615

616616
/// Get the device identifier within the corresponding plugin. Notice that
@@ -860,6 +860,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
860860
/// Allocate and construct a kernel object.
861861
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
862862

863+
/// Reference to the underlying plugin that created this device.
864+
GenericPluginTy &Plugin;
865+
863866
private:
864867
/// Get and set the stack size and heap size for the device. If not used, the
865868
/// plugin can implement the setters as no-op and setting the output
@@ -977,7 +980,8 @@ struct GenericPluginTy {
977980
virtual Error deinitImpl() = 0;
978981

979982
/// Create a new device for the underlying plugin.
980-
virtual GenericDeviceTy *createDevice(int32_t DeviceID,
983+
virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin,
984+
int32_t DeviceID,
981985
int32_t NumDevices) = 0;
982986

983987
/// Create a new global handler for the underlying plugin.

openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
438438
// Retrieve kernel environment object for the kernel.
439439
GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
440440
sizeof(KernelEnvironment), &KernelEnvironment);
441-
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
441+
GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
442442
if (auto Err =
443443
GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
444444
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
@@ -710,9 +710,10 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
710710
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
711711
}
712712

713-
GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
713+
GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
714+
int32_t NumDevices,
714715
const llvm::omp::GV &OMPGridValues)
715-
: MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
716+
: Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
716717
OMP_NumTeams("OMP_NUM_TEAMS"),
717718
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
718719
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
@@ -1522,7 +1523,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) {
15221523
assert(!Devices[DeviceId] && "Device already initialized");
15231524

15241525
// Create the device and save the reference.
1525-
GenericDeviceTy *Device = createDevice(DeviceId, NumDevices);
1526+
GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
15261527
assert(Device && "Invalid device");
15271528

15281529
// Save the device reference into the list.

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,8 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
255255
/// generic device class.
256256
struct CUDADeviceTy : public GenericDeviceTy {
257257
// Create a CUDA device with a device id and the default CUDA grid values.
258-
CUDADeviceTy(int32_t DeviceId, int32_t NumDevices)
259-
: GenericDeviceTy(DeviceId, NumDevices, NVPTXGridValues),
258+
CUDADeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices)
259+
: GenericDeviceTy(Plugin, DeviceId, NumDevices, NVPTXGridValues),
260260
CUDAStreamManager(*this), CUDAEventManager(*this) {}
261261

262262
~CUDADeviceTy() {}
@@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
471471
/// Allocate and construct a CUDA kernel.
472472
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
473473
// Allocate and construct the CUDA kernel.
474-
CUDAKernelTy *CUDAKernel = PluginTy::get().allocate<CUDAKernelTy>();
474+
CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>();
475475
if (!CUDAKernel)
476476
return Plugin::error("Failed to allocate memory for CUDA kernel");
477477

@@ -529,8 +529,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
529529
return std::move(Err);
530530

531531
// Allocate and initialize the image object.
532-
CUDADeviceImageTy *CUDAImage =
533-
PluginTy::get().allocate<CUDADeviceImageTy>();
532+
CUDADeviceImageTy *CUDAImage = Plugin.allocate<CUDADeviceImageTy>();
534533
new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage);
535534

536535
// Load the CUDA module.
@@ -1373,8 +1372,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
13731372
Error deinitImpl() override { return Plugin::success(); }
13741373

13751374
/// Creates a CUDA device to use for offloading.
1376-
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
1377-
return new CUDADeviceTy(DeviceId, NumDevices);
1375+
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
1376+
int32_t NumDevices) override {
1377+
return new CUDADeviceTy(Plugin, DeviceId, NumDevices);
13781378
}
13791379

13801380
/// Creates a CUDA global handler.

openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy {
6666
GlobalTy Global(getName(), 0);
6767

6868
// Get the metadata (address) of the kernel function.
69-
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
69+
GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
7070
if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global))
7171
return Err;
7272

@@ -132,8 +132,9 @@ struct GenELF64DeviceImageTy : public DeviceImageTy {
132132
/// Class implementing the device functionalities for GenELF64.
133133
struct GenELF64DeviceTy : public GenericDeviceTy {
134134
/// Create the device with a specific id.
135-
GenELF64DeviceTy(int32_t DeviceId, int32_t NumDevices)
136-
: GenericDeviceTy(DeviceId, NumDevices, GenELF64GridValues) {}
135+
GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
136+
int32_t NumDevices)
137+
: GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {}
137138

138139
~GenELF64DeviceTy() {}
139140

@@ -149,8 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
149150
/// Construct the kernel for a specific image on the device.
150151
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
151152
// Allocate and construct the kernel.
152-
GenELF64KernelTy *GenELF64Kernel =
153-
PluginTy::get().allocate<GenELF64KernelTy>();
153+
GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>();
154154
if (!GenELF64Kernel)
155155
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
156156

@@ -166,8 +166,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
166166
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
167167
int32_t ImageId) override {
168168
// Allocate and initialize the image object.
169-
GenELF64DeviceImageTy *Image =
170-
PluginTy::get().allocate<GenELF64DeviceImageTy>();
169+
GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>();
171170
new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage);
172171

173172
// Create a temporary file.
@@ -400,8 +399,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
400399
Error deinitImpl() override { return Plugin::success(); }
401400

402401
/// Creates a generic ELF device.
403-
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
404-
return new GenELF64DeviceTy(DeviceId, NumDevices);
402+
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
403+
int32_t NumDevices) override {
404+
return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices);
405405
}
406406

407407
/// Creates a generic global handler.

0 commit comments

Comments
 (0)