Skip to content

Commit 292cb11

Browse files
committed
[Libomptarget] Revert changes to AMDGPU plugin destructors
These patches exposed a lot of problems in the AMD toolchain. Rather than keep it broken we should revert it to its old semi-functional state. This will prevent us from using device destructors but should remove some new bugs. In the future this interface should be changed once these problems are addressed more correctly. This reverts commit ed0f218. This reverts commit 2b7203a. Fixes llvm#57536 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D133997
1 parent 8491d01 commit 292cb11

File tree

2 files changed

+29
-50
lines changed

2 files changed

+29
-50
lines changed

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ struct KernelArgPool {
214214
};
215215
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
216216

217+
std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
218+
KernelArgPoolMap;
219+
217220
/// Use a single entity to encode a kernel and a set of flags
218221
struct KernelTy {
219222
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -225,9 +228,7 @@ struct KernelTy {
225228
KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
226229
int32_t DeviceId, void *CallStackAddr, const char *Name,
227230
uint32_t KernargSegmentSize,
228-
hsa_amd_memory_pool_t &KernArgMemoryPool,
229-
std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
230-
&KernelArgPoolMap)
231+
hsa_amd_memory_pool_t &KernArgMemoryPool)
231232
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
232233
DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
233234
DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
@@ -241,6 +242,10 @@ struct KernelTy {
241242
}
242243
};
243244

245+
/// List that contains all the kernels.
246+
/// FIXME: we may need this to be per device and per library.
247+
std::list<KernelTy> KernelsList;
248+
244249
template <typename Callback> static hsa_status_t findAgents(Callback CB) {
245250

246251
hsa_status_t Err =
@@ -455,12 +460,6 @@ class RTLDeviceInfoTy : HSALifetime {
455460

456461
int NumberOfDevices = 0;
457462

458-
/// List that contains all the kernels.
459-
/// FIXME: we may need this to be per device and per library.
460-
std::list<KernelTy> KernelsList;
461-
std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
462-
KernelArgPoolMap;
463-
464463
// GPU devices
465464
std::vector<hsa_agent_t> HSAAgents;
466465
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -862,6 +861,7 @@ class RTLDeviceInfoTy : HSALifetime {
862861
"Unexpected device id!");
863862
FuncGblEntries[DeviceId].emplace_back();
864863
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
864+
// KernelArgPoolMap.clear();
865865
E.Entries.clear();
866866
E.Table.EntriesBegin = E.Table.EntriesEnd = 0;
867867
}
@@ -1117,8 +1117,10 @@ class RTLDeviceInfoTy : HSALifetime {
11171117

11181118
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
11191119

1120-
static RTLDeviceInfoTy *DeviceInfoState = nullptr;
1121-
static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; }
1120+
// Putting accesses to DeviceInfo global behind a function call prior
1121+
// to changing to use init_plugin/deinit_plugin calls
1122+
static RTLDeviceInfoTy DeviceInfoState;
1123+
static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; }
11221124

11231125
namespace {
11241126

@@ -1459,9 +1461,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
14591461
KernelArgPool *ArgPool = nullptr;
14601462
void *KernArg = nullptr;
14611463
{
1462-
auto It =
1463-
DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name));
1464-
if (It != DeviceInfo().KernelArgPoolMap.end()) {
1464+
auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name));
1465+
if (It != KernelArgPoolMap.end()) {
14651466
ArgPool = (It->second).get();
14661467
}
14671468
}
@@ -1940,20 +1941,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
19401941
}
19411942

19421943
extern "C" {
1943-
1944-
int32_t __tgt_rtl_init_plugin() {
1945-
DeviceInfoState = new RTLDeviceInfoTy;
1946-
return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded)
1947-
? OFFLOAD_SUCCESS
1948-
: OFFLOAD_FAIL;
1949-
}
1950-
1951-
int32_t __tgt_rtl_deinit_plugin() {
1952-
if (DeviceInfoState)
1953-
delete DeviceInfoState;
1954-
return OFFLOAD_SUCCESS;
1955-
}
1956-
19571944
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
19581945
return elfMachineIdIsAmdgcn(Image);
19591946
}
@@ -1985,6 +1972,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
19851972
return true;
19861973
}
19871974

1975+
int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; }
1976+
int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; }
1977+
19881978
int __tgt_rtl_number_of_devices() {
19891979
// If the construction failed, no methods are safe to call
19901980
if (DeviceInfo().ConstructionSucceeded) {
@@ -2524,12 +2514,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
25242514
}
25252515
check("Loading computation property", Err);
25262516

2527-
DeviceInfo().KernelsList.push_back(
2528-
KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name,
2529-
KernargSegmentSize, DeviceInfo().KernArgPool,
2530-
DeviceInfo().KernelArgPoolMap));
2517+
KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId,
2518+
CallStackAddr, E->name, KernargSegmentSize,
2519+
DeviceInfo().KernArgPool));
25312520
__tgt_offload_entry Entry = *E;
2532-
Entry.addr = (void *)&DeviceInfo().KernelsList.back();
2521+
Entry.addr = (void *)&KernelsList.back();
25332522
DeviceInfo().addOffloadEntry(DeviceId, Entry);
25342523
DP("Entry point %ld maps to %s\n", E - HostBegin, E->name);
25352524
}

openmp/libomptarget/src/rtl.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,23 +65,6 @@ __attribute__((constructor(101))) void init() {
6565

6666
__attribute__((destructor(101))) void deinit() {
6767
DP("Deinit target library!\n");
68-
69-
for (auto *R : PM->RTLs.UsedRTLs) {
70-
// Plugins can either destroy their local state using global variables
71-
// or attribute(destructor) functions or by implementing deinit_plugin
72-
// The hazard with plugin local destructors is they may be called before
73-
// or after this destructor. If the plugin is destroyed using global
74-
// state before this library finishes calling into it the plugin is
75-
// likely to crash. If good fortune means the plugin outlives this
76-
// library then there may be no crash.
77-
// Using deinit_plugin and no global destructors from the plugin works.
78-
if (R->deinit_plugin) {
79-
if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
80-
DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
81-
}
82-
}
83-
}
84-
8568
delete PM;
8669

8770
if (ProfileTraceFile) {
@@ -579,6 +562,13 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) {
579562
PM->TblMapMtx.unlock();
580563

581564
// TODO: Write some RTL->unload_image(...) function?
565+
for (auto *R : UsedRTLs) {
566+
if (R->deinit_plugin) {
567+
if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
568+
DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
569+
}
570+
}
571+
}
582572

583573
DP("Done unregistering library!\n");
584574
}

0 commit comments

Comments
 (0)