Skip to content

[Libomptarget] Fix GPU Dtors referencing possibly deallocated image #77828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1976,12 +1976,18 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.init");
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (Handler.isSymbolInImage(*this, Image, "amdgcn.device.fini"))
Image.setPendingGlobalDtors();

return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
}

virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.fini");
if (Image.hasPendingGlobalDtors())
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
return Plugin::success();
}

const uint64_t getStreamBusyWaitMicroseconds() const {
Expand Down Expand Up @@ -2701,15 +2707,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Common method to invoke a single threaded constructor or destructor
/// kernel by name.
Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image,
const char *Name) {
bool IsCtor) {
const char *KernelName =
IsCtor ? "amdgcn.device.init" : "amdgcn.device.fini";
// Perform a quick check for the named kernel in the image. The kernel
// should be created by the 'amdgpu-lower-ctor-dtor' pass.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (!Handler.isSymbolInImage(*this, Image, Name))
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
return Plugin::success();

// Allocate and construct the AMDGPU kernel.
AMDGPUKernelTy AMDGPUKernel(Name);
AMDGPUKernelTy AMDGPUKernel(KernelName);
if (auto Err = AMDGPUKernel.init(*this, Image))
return Err;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,19 @@ class DeviceImageTy {
const __tgt_device_image *TgtImage;
const __tgt_device_image *TgtImageBitcode;

/// If this image has any global destructors that much be called.
/// FIXME: This is only required because we currently have no invariants
/// towards the lifetime of the underlying image. We should either copy
/// the image into memory locally or erase the pointers after init.
bool PendingGlobalDtors;

/// Table of offload entries.
OffloadEntryTableTy OffloadEntryTable;

public:
DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr) {
: ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr),
PendingGlobalDtors(false) {
assert(TgtImage && "Invalid target image");
}

Expand Down Expand Up @@ -255,6 +262,10 @@ class DeviceImageTy {
"Image");
}

/// Accessors to the boolean value
bool setPendingGlobalDtors() { return PendingGlobalDtors = true; }
bool hasPendingGlobalDtors() const { return PendingGlobalDtors; }

/// Get a reference to the offload entry table for the image.
OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
};
Expand Down
12 changes: 10 additions & 2 deletions openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,20 @@ struct CUDADeviceTy : public GenericDeviceTy {

virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
// Check for the presense of global destructors at initialization time. This
// is required when the image may be deallocated before destructors are run.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (Handler.isSymbolInImage(*this, Image, "nvptx$device$fini"))
Image.setPendingGlobalDtors();

return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true);
}

virtual Error callGlobalDestructors(GenericPluginTy &Plugin,
DeviceImageTy &Image) override {
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
if (Image.hasPendingGlobalDtors())
return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false);
return Plugin::success();
}

Expected<std::unique_ptr<MemoryBuffer>>
Expand Down Expand Up @@ -1145,7 +1153,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
// Perform a quick check for the named kernel in the image. The kernel
// should be created by the 'nvptx-lower-ctor-dtor' pass.
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
if (!Handler.isSymbolInImage(*this, Image, KernelName))
if (IsCtor && !Handler.isSymbolInImage(*this, Image, KernelName))
return Plugin::success();

// The Nvidia backend cannot handle creating the ctor / dtor array
Expand Down