Skip to content

Commit 0870c88

Browse files
authored
[Offload] Add an unloadBinary interface to PluginInterface (#143873)
This allows removal of a specific Image from a Device, rather than requiring all image data to outlive the device they were created for. This is required for `ol_program_handle_t`s, which now specify the lifetime of the buffer used to create the program.
1 parent e90ab0e commit 0870c88

File tree

6 files changed

+77
-66
lines changed

6 files changed

+77
-66
lines changed

offload/liboffload/API/Program.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
def : Function {
1414
let name = "olCreateProgram";
1515
let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
16-
let details = [];
16+
let details = [
17+
"The provided `ProgData` will be copied and need not outlive the returned handle",
18+
];
1719
let params = [
1820
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
1921
Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,14 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
480480
}
481481

482482
Error olDestroyProgram_impl(ol_program_handle_t Program) {
483+
auto &Device = Program->Image->getDevice();
484+
if (auto Err = Device.unloadBinary(Program->Image))
485+
return Err;
486+
487+
auto &LoadedImages = Device.LoadedImages;
488+
LoadedImages.erase(
489+
std::find(LoadedImages.begin(), LoadedImages.end(), Program->Image));
490+
483491
return olDestroy(Program);
484492
}
485493

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20232023
return Plugin::success();
20242024
}
20252025

2026+
Error unloadBinaryImpl(DeviceImageTy *Image) override {
2027+
AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
2028+
2029+
// Unload the executable of the image.
2030+
return AMDImage.unloadExecutable();
2031+
}
2032+
20262033
/// Deinitialize the device and release its resources.
20272034
Error deinitImpl() override {
20282035
// Deinitialize the stream and event pools.
@@ -2035,19 +2042,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20352042
if (auto Err = AMDGPUSignalManager.deinit())
20362043
return Err;
20372044

2038-
// Close modules if necessary.
2039-
if (!LoadedImages.empty()) {
2040-
// Each image has its own module.
2041-
for (DeviceImageTy *Image : LoadedImages) {
2042-
AMDGPUDeviceImageTy &AMDImage =
2043-
static_cast<AMDGPUDeviceImageTy &>(*Image);
2044-
2045-
// Unload the executable of the image.
2046-
if (auto Err = AMDImage.unloadExecutable())
2047-
return Err;
2048-
}
2049-
}
2050-
20512045
// Invalidate agent reference.
20522046
Agent = {0};
20532047

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
752752
virtual Expected<DeviceImageTy *>
753753
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
754754

755+
/// Unload a previously loaded Image from the device
756+
Error unloadBinary(DeviceImageTy *Image);
757+
virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
758+
755759
/// Setup the device environment if needed. Notice this setup may not be run
756760
/// on some plugins. By default, it will be executed, but plugins can change
757761
/// this behavior by overriding the shouldSetupDeviceEnvironment function.
@@ -1036,6 +1040,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10361040
BoolEnvar OMPX_TrackAllocationTraces =
10371041
BoolEnvar("OFFLOAD_TRACK_ALLOCATION_TRACES", false);
10381042

1043+
/// Array of images loaded into the device. Images are automatically
1044+
/// deallocated by the allocator.
1045+
llvm::SmallVector<DeviceImageTy *> LoadedImages;
1046+
10391047
private:
10401048
/// Get and set the stack size and heap size for the device. If not used, the
10411049
/// plugin can implement the setters as no-op and setting the output
@@ -1086,10 +1094,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10861094
UInt32Envar OMPX_InitialNumStreams;
10871095
UInt32Envar OMPX_InitialNumEvents;
10881096

1089-
/// Array of images loaded into the device. Images are automatically
1090-
/// deallocated by the allocator.
1091-
llvm::SmallVector<DeviceImageTy *> LoadedImages;
1092-
10931097
/// The identifier of the device within the plugin. Notice this is not a
10941098
/// global device id and is not the device id visible to the OpenMP user.
10951099
const int32_t DeviceId;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -821,26 +821,49 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
821821
return Plugin::success();
822822
}
823823

824-
Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
825-
for (DeviceImageTy *Image : LoadedImages)
826-
if (auto Err = callGlobalDestructors(Plugin, *Image))
827-
return Err;
824+
Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
825+
if (auto Err = callGlobalDestructors(Plugin, *Image))
826+
return Err;
828827

829828
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
830829
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
831-
for (auto *Image : LoadedImages) {
832-
DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
833-
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
834-
sizeof(DeviceMemoryPoolTrackingTy),
835-
&ImageDeviceMemoryPoolTracking);
836-
if (auto Err =
837-
GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
838-
consumeError(std::move(Err));
839-
continue;
840-
}
841-
DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
830+
DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
831+
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
832+
sizeof(DeviceMemoryPoolTrackingTy),
833+
&ImageDeviceMemoryPoolTracking);
834+
if (auto Err =
835+
GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
836+
consumeError(std::move(Err));
842837
}
838+
DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
839+
}
840+
841+
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
842+
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
843+
if (!ProfOrErr)
844+
return ProfOrErr.takeError();
845+
846+
if (!ProfOrErr->empty()) {
847+
// Dump out profdata
848+
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
849+
uint32_t(DeviceDebugKind::PGODump))
850+
ProfOrErr->dump();
851+
852+
// Write data to profiling file
853+
if (auto Err = ProfOrErr->write())
854+
return Err;
855+
}
843856

857+
return unloadBinaryImpl(Image);
858+
}
859+
860+
Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
861+
for (auto &I : LoadedImages)
862+
if (auto Err = unloadBinary(I))
863+
return Err;
864+
LoadedImages.clear();
865+
866+
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
844867
// TODO: Write this by default into a file.
845868
printf("\n\n|-----------------------\n"
846869
"| Device memory tracker:\n"
@@ -856,25 +879,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
856879
DeviceMemoryPoolTracking.AllocationMax);
857880
}
858881

859-
for (auto *Image : LoadedImages) {
860-
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
861-
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
862-
if (!ProfOrErr)
863-
return ProfOrErr.takeError();
864-
865-
if (ProfOrErr->empty())
866-
continue;
867-
868-
// Dump out profdata
869-
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
870-
uint32_t(DeviceDebugKind::PGODump))
871-
ProfOrErr->dump();
872-
873-
// Write data to profiling file
874-
if (auto Err = ProfOrErr->write())
875-
return Err;
876-
}
877-
878882
// Delete the memory manager before deinitializing the device. Otherwise,
879883
// we may delete device allocations after the device is deinitialized.
880884
if (MemoryManager)

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,19 @@ struct CUDADeviceTy : public GenericDeviceTy {
358358
return Plugin::success();
359359
}
360360

361+
Error unloadBinaryImpl(DeviceImageTy *Image) override {
362+
assert(Context && "Invalid CUDA context");
363+
364+
// Each image has its own module.
365+
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
366+
367+
// Unload the module of the image.
368+
if (auto Err = CUDAImage.unloadModule())
369+
return Err;
370+
371+
return Plugin::success();
372+
}
373+
361374
/// Deinitialize the device and release its resources.
362375
Error deinitImpl() override {
363376
if (Context) {
@@ -372,20 +385,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
372385
if (auto Err = CUDAEventManager.deinit())
373386
return Err;
374387

375-
// Close modules if necessary.
376-
if (!LoadedImages.empty()) {
377-
assert(Context && "Invalid CUDA context");
378-
379-
// Each image has its own module.
380-
for (DeviceImageTy *Image : LoadedImages) {
381-
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
382-
383-
// Unload the module of the image.
384-
if (auto Err = CUDAImage.unloadModule())
385-
return Err;
386-
}
387-
}
388-
389388
if (Context) {
390389
CUresult Res = cuDevicePrimaryCtxRelease(Device);
391390
if (auto Err =

0 commit comments

Comments
 (0)