Skip to content

Commit 37c1a5e

Browse files
authored
[Libomptarget] Fix GPU Dtors referencing possibly deallocated image (#77828)
Summary: The constructors and destructors look up a symbol in the ELF quickly to determine if they need to be run on the GPU. This allows us to avoid the very slow actions required to do the slower lookup using the vendor API. One problem occurs with how we handle the lifetime of these images. Right now there is no invariant to specify the lifetime of the underlying binary image that is loaded. In the typical case, this comes from the binary itself in the `.llvm.offloading` section, meaning that the lifetime of the binary should match the executable itself. This would work fine, if it weren't for the fact that the plugin is loaded via `dlopen` and can have a teardown order out of sync with the main executable. This was likely what was occuring when this failed on some systems but not others. A potential solution would be to simply copy images into memory so the runtime does not rely on external references. Another would be to manually zero these out after initialization as to prevent this mistake from happening accidentally. The former has the benefit of making some checks easier, and allowing for constant initialization be done on the ELF itself (normally we can't do this because writing to a constant section, e.g. .llvm.offloading is a segfault.). The downside would be the extra time required to copy the image in bulk (Although we are likely doing this in the vendor runtimes as well). This patch went with a quick solution to simply set a boolean value at initialization time if we need to call destructors. Fixes: #77798
1 parent 75efddb commit 37c1a5e

File tree

3 files changed

+35
-8
lines changed

3 files changed

+35
-8
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,12 +1976,18 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
19761976

19771977
virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
19781978
DeviceImageTy &Image) override {
1979-
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.init");
1979+
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
1980+
if (Handler.isSymbolInImage(*this, Image, "amdgcn.device.fini"))
1981+
Image.setPendingGlobalDtors();
1982+
1983+
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
19801984
}
19811985

19821986
virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
19831987
DeviceImageTy &Image) override {
1984-
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.fini");
1988+
if (Image.hasPendingGlobalDtors())
1989+
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
1990+
return Plugin::success();
19851991
}
19861992

19871993
const uint64_t getStreamBusyWaitMicroseconds() const {
@@ -2701,15 +2707,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
27012707
/// Common method to invoke a single threaded constructor or destructor
27022708
/// kernel by name.
27032709
Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image,
2704-
const char *Name) {
2710+
bool IsCtor) {
2711+
const char *KernelName =
2712+
IsCtor ? "amdgcn.device.init" : "amdgcn.device.fini";
27052713
// Perform a quick check for the named kernel in the image. The kernel
27062714
// should be created by the 'amdgpu-lower-ctor-dtor' pass.
27072715
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
2708-
if (!Handler.isSymbolInImage(*this, Image, Name))
2716+
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
27092717
return Plugin::success();
27102718

27112719
// Allocate and construct the AMDGPU kernel.
2712-
AMDGPUKernelTy AMDGPUKernel(Name);
2720+
AMDGPUKernelTy AMDGPUKernel(KernelName);
27132721
if (auto Err = AMDGPUKernel.init(*this, Image))
27142722
return Err;
27152723

openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,19 @@ class DeviceImageTy {
218218
const __tgt_device_image *TgtImage;
219219
const __tgt_device_image *TgtImageBitcode;
220220

221+
/// If this image has any global destructors that much be called.
222+
/// FIXME: This is only required because we currently have no invariants
223+
/// towards the lifetime of the underlying image. We should either copy
224+
/// the image into memory locally or erase the pointers after init.
225+
bool PendingGlobalDtors;
226+
221227
/// Table of offload entries.
222228
OffloadEntryTableTy OffloadEntryTable;
223229

224230
public:
225231
DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
226-
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr) {
232+
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr),
233+
PendingGlobalDtors(false) {
227234
assert(TgtImage && "Invalid target image");
228235
}
229236

@@ -255,6 +262,10 @@ class DeviceImageTy {
255262
"Image");
256263
}
257264

265+
/// Accessors to the boolean value
266+
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
267+
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }
268+
258269
/// Get a reference to the offload entry table for the image.
259270
OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
260271
};

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,20 @@ struct CUDADeviceTy : public GenericDeviceTy {
392392

393393
virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
394394
DeviceImageTy &Image) override {
395+
// Check for the presense of global destructors at initialization time. This
396+
// is required when the image may be deallocated before destructors are run.
397+
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
398+
if (Handler.isSymbolInImage(*this, Image, "nvptx$device$fini"))
399+
Image.setPendingGlobalDtors();
400+
395401
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
396402
}
397403

398404
virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
399405
DeviceImageTy &Image) override {
400-
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
406+
if (Image.hasPendingGlobalDtors())
407+
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
408+
return Plugin::success();
401409
}
402410

403411
Expected<std::unique_ptr<MemoryBuffer>>
@@ -1145,7 +1153,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
11451153
// Perform a quick check for the named kernel in the image. The kernel
11461154
// should be created by the 'nvptx-lower-ctor-dtor' pass.
11471155
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
1148-
if (!Handler.isSymbolInImage(*this, Image, KernelName))
1156+
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
11491157
return Plugin::success();
11501158

11511159
// The Nvidia backend cannot handle creating the ctor / dtor array

0 commit comments

Comments
 (0)