Skip to content

[Libomptarget] Replace global PluginTy::get interface with references #86595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 26 additions & 17 deletions openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,8 @@ struct AMDGPUMemoryPoolTy {
struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {

/// Create an empty memory manager.
AMDGPUMemoryManagerTy() : MemoryPool(nullptr), MemoryManager(nullptr) {}
AMDGPUMemoryManagerTy(AMDGPUPluginTy &Plugin)
: Plugin(Plugin), MemoryPool(nullptr), MemoryManager(nullptr) {}

/// Initialize the memory manager from a memory pool.
Error init(AMDGPUMemoryPoolTy &MemoryPool) {
Expand Down Expand Up @@ -429,6 +430,9 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
return OFFLOAD_SUCCESS;
}

/// The underlying plugin that owns this memory manager.
AMDGPUPluginTy &Plugin;

/// The memory pool used to allocate memory.
AMDGPUMemoryPoolTy *MemoryPool;

Expand Down Expand Up @@ -1744,9 +1748,10 @@ struct AMDGenericDeviceTy {
/// HSA host agent. We aggregate all its resources into the same instance.
struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// Create a host device from an array of host agents.
AMDHostDeviceTy(const llvm::SmallVector<hsa_agent_t> &HostAgents)
: AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(),
PinnedMemoryManager() {
AMDHostDeviceTy(AMDGPUPluginTy &Plugin,
const llvm::SmallVector<hsa_agent_t> &HostAgents)
: AMDGenericDeviceTy(), Agents(HostAgents), ArgsMemoryManager(Plugin),
PinnedMemoryManager(Plugin) {
assert(HostAgents.size() && "No host agent found");
}

Expand Down Expand Up @@ -1840,9 +1845,10 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
/// generic device class.
struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Create an AMDGPU device with a device id and default AMDGPU grid values.
AMDGPUDeviceTy(int32_t DeviceId, int32_t NumDevices,
AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
AMDHostDeviceTy &HostDevice, hsa_agent_t Agent)
: GenericDeviceTy(DeviceId, NumDevices, {0}), AMDGenericDeviceTy(),
: GenericDeviceTy(Plugin, DeviceId, NumDevices, {0}),
AMDGenericDeviceTy(),
OMPX_NumQueues("LIBOMPTARGET_AMDGPU_NUM_HSA_QUEUES", 4),
OMPX_QueueSize("LIBOMPTARGET_AMDGPU_HSA_QUEUE_SIZE", 512),
OMPX_DefaultTeamsPerCU("LIBOMPTARGET_AMDGPU_TEAMS_PER_CU", 4),
Expand Down Expand Up @@ -2088,7 +2094,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Allocate and construct an AMDGPU kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the AMDGPU kernel.
AMDGPUKernelTy *AMDGPUKernel = PluginTy::get().allocate<AMDGPUKernelTy>();
AMDGPUKernelTy *AMDGPUKernel = Plugin.allocate<AMDGPUKernelTy>();
if (!AMDGPUKernel)
return Plugin::error("Failed to allocate memory for AMDGPU kernel");

Expand Down Expand Up @@ -2138,8 +2144,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
AMDGPUDeviceImageTy *AMDImage =
PluginTy::get().allocate<AMDGPUDeviceImageTy>();
AMDGPUDeviceImageTy *AMDImage = Plugin.allocate<AMDGPUDeviceImageTy>();
new (AMDImage) AMDGPUDeviceImageTy(ImageId, *this, TgtImage);

// Load the HSA executable.
Expand Down Expand Up @@ -2697,7 +2702,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
Error setDeviceHeapSize(uint64_t Value) override {
for (DeviceImageTy *Image : LoadedImages)
if (auto Err = setupDeviceMemoryPool(PluginTy::get(), *Image, Value))
if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
return Err;
DeviceMemoryPoolSize = Value;
return Plugin::success();
Expand Down Expand Up @@ -2737,7 +2742,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return utils::iterateAgentMemoryPools(
Agent, [&](hsa_amd_memory_pool_t HSAMemoryPool) {
AMDGPUMemoryPoolTy *MemoryPool =
PluginTy::get().allocate<AMDGPUMemoryPoolTy>();
Plugin.allocate<AMDGPUMemoryPoolTy>();
new (MemoryPool) AMDGPUMemoryPoolTy(HSAMemoryPool);
AllMemoryPools.push_back(MemoryPool);
return HSA_STATUS_SUCCESS;
Expand Down Expand Up @@ -3090,7 +3095,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {

// Initialize the host device using host agents.
HostDevice = allocate<AMDHostDeviceTy>();
new (HostDevice) AMDHostDeviceTy(HostAgents);
new (HostDevice) AMDHostDeviceTy(*this, HostAgents);

// Setup the memory pools of available for the host.
if (auto Err = HostDevice->init())
Expand All @@ -3116,8 +3121,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
}

/// Creates an AMDGPU device.
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
return new AMDGPUDeviceTy(DeviceId, NumDevices, getHostDevice(),
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
int32_t NumDevices) override {
return new AMDGPUDeviceTy(Plugin, DeviceId, NumDevices, getHostDevice(),
getKernelAgent(DeviceId));
}

Expand Down Expand Up @@ -3248,7 +3254,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// 56 bytes per allocation.
uint32_t AllArgsSize = KernelArgsSize + ImplicitArgsSize;

AMDHostDeviceTy &HostDevice = PluginTy::get<AMDGPUPluginTy>().getHostDevice();
AMDGPUPluginTy &AMDGPUPlugin =
static_cast<AMDGPUPluginTy &>(GenericDevice.Plugin);
AMDHostDeviceTy &HostDevice = AMDGPUPlugin.getHostDevice();
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();

void *AllArgs = nullptr;
Expand Down Expand Up @@ -3385,7 +3393,7 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
}
assert(Ptr && "Invalid pointer");

auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
auto &KernelAgents = Plugin.getKernelAgents();

// Allow all kernel agents to access the allocation.
if (auto Err = MemoryPool->enableAccess(Ptr, Size, KernelAgents)) {
Expand Down Expand Up @@ -3428,7 +3436,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
}

if (Alloc) {
auto &KernelAgents = PluginTy::get<AMDGPUPluginTy>().getKernelAgents();
auto &KernelAgents =
static_cast<AMDGPUPluginTy &>(Plugin).getKernelAgents();
// Inherently necessary for host or shared allocations
// Also enabled for device memory to allow device to device memcpy

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ class PinnedAllocationMapTy {
struct GenericDeviceTy : public DeviceAllocatorTy {
/// Construct a device with its device id within the plugin, the number of
/// devices in the plugin and the grid values for that kind of device.
GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
const llvm::omp::GV &GridValues);

/// Get the device identifier within the corresponding plugin. Notice that
Expand Down Expand Up @@ -860,6 +860,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Allocate and construct a kernel object.
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;

/// Reference to the underlying plugin that created this device.
GenericPluginTy &Plugin;

private:
/// Get and set the stack size and heap size for the device. If not used, the
/// plugin can implement the setters as no-op and setting the output
Expand Down Expand Up @@ -977,7 +980,8 @@ struct GenericPluginTy {
virtual Error deinitImpl() = 0;

/// Create a new device for the underlying plugin.
virtual GenericDeviceTy *createDevice(int32_t DeviceID,
virtual GenericDeviceTy *createDevice(GenericPluginTy &Plugin,
int32_t DeviceID,
int32_t NumDevices) = 0;

/// Create a new global handler for the underlying plugin.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
// Retrieve kernel environment object for the kernel.
GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
sizeof(KernelEnvironment), &KernelEnvironment);
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
if (auto Err =
GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
Expand Down Expand Up @@ -710,9 +710,10 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}

GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
int32_t NumDevices,
const llvm::omp::GV &OMPGridValues)
: MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
: Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
OMP_NumTeams("OMP_NUM_TEAMS"),
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
Expand Down Expand Up @@ -1522,7 +1523,7 @@ Error GenericPluginTy::initDevice(int32_t DeviceId) {
assert(!Devices[DeviceId] && "Device already initialized");

// Create the device and save the reference.
GenericDeviceTy *Device = createDevice(DeviceId, NumDevices);
GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
assert(Device && "Invalid device");

// Save the device reference into the list.
Expand Down
14 changes: 7 additions & 7 deletions openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ struct CUDAEventRef final : public GenericDeviceResourceRef {
/// generic device class.
struct CUDADeviceTy : public GenericDeviceTy {
// Create a CUDA device with a device id and the default CUDA grid values.
CUDADeviceTy(int32_t DeviceId, int32_t NumDevices)
: GenericDeviceTy(DeviceId, NumDevices, NVPTXGridValues),
CUDADeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices)
: GenericDeviceTy(Plugin, DeviceId, NumDevices, NVPTXGridValues),
CUDAStreamManager(*this), CUDAEventManager(*this) {}

~CUDADeviceTy() {}
Expand Down Expand Up @@ -471,7 +471,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
/// Allocate and construct a CUDA kernel.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the CUDA kernel.
CUDAKernelTy *CUDAKernel = PluginTy::get().allocate<CUDAKernelTy>();
CUDAKernelTy *CUDAKernel = Plugin.allocate<CUDAKernelTy>();
if (!CUDAKernel)
return Plugin::error("Failed to allocate memory for CUDA kernel");

Expand Down Expand Up @@ -529,8 +529,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
return std::move(Err);

// Allocate and initialize the image object.
CUDADeviceImageTy *CUDAImage =
PluginTy::get().allocate<CUDADeviceImageTy>();
CUDADeviceImageTy *CUDAImage = Plugin.allocate<CUDADeviceImageTy>();
new (CUDAImage) CUDADeviceImageTy(ImageId, *this, TgtImage);

// Load the CUDA module.
Expand Down Expand Up @@ -1373,8 +1372,9 @@ struct CUDAPluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }

/// Creates a CUDA device to use for offloading.
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
return new CUDADeviceTy(DeviceId, NumDevices);
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
int32_t NumDevices) override {
return new CUDADeviceTy(Plugin, DeviceId, NumDevices);
}

/// Creates a CUDA global handler.
Expand Down
18 changes: 9 additions & 9 deletions openmp/libomptarget/plugins-nextgen/host/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct GenELF64KernelTy : public GenericKernelTy {
GlobalTy Global(getName(), 0);

// Get the metadata (address) of the kernel function.
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
if (auto Err = GHandler.getGlobalMetadataFromDevice(Device, Image, Global))
return Err;

Expand Down Expand Up @@ -132,8 +132,9 @@ struct GenELF64DeviceImageTy : public DeviceImageTy {
/// Class implementing the device functionalities for GenELF64.
struct GenELF64DeviceTy : public GenericDeviceTy {
/// Create the device with a specific id.
GenELF64DeviceTy(int32_t DeviceId, int32_t NumDevices)
: GenericDeviceTy(DeviceId, NumDevices, GenELF64GridValues) {}
GenELF64DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
int32_t NumDevices)
: GenericDeviceTy(Plugin, DeviceId, NumDevices, GenELF64GridValues) {}

~GenELF64DeviceTy() {}

Expand All @@ -149,8 +150,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// Construct the kernel for a specific image on the device.
Expected<GenericKernelTy &> constructKernel(const char *Name) override {
// Allocate and construct the kernel.
GenELF64KernelTy *GenELF64Kernel =
PluginTy::get().allocate<GenELF64KernelTy>();
GenELF64KernelTy *GenELF64Kernel = Plugin.allocate<GenELF64KernelTy>();
if (!GenELF64Kernel)
return Plugin::error("Failed to allocate memory for GenELF64 kernel");

Expand All @@ -166,8 +166,7 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Expected<DeviceImageTy *> loadBinaryImpl(const __tgt_device_image *TgtImage,
int32_t ImageId) override {
// Allocate and initialize the image object.
GenELF64DeviceImageTy *Image =
PluginTy::get().allocate<GenELF64DeviceImageTy>();
GenELF64DeviceImageTy *Image = Plugin.allocate<GenELF64DeviceImageTy>();
new (Image) GenELF64DeviceImageTy(ImageId, *this, TgtImage);

// Create a temporary file.
Expand Down Expand Up @@ -400,8 +399,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
Error deinitImpl() override { return Plugin::success(); }

/// Creates a generic ELF device.
GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices) override {
return new GenELF64DeviceTy(DeviceId, NumDevices);
GenericDeviceTy *createDevice(GenericPluginTy &Plugin, int32_t DeviceId,
int32_t NumDevices) override {
return new GenELF64DeviceTy(Plugin, DeviceId, NumDevices);
}

/// Creates a generic global handler.
Expand Down