@@ -214,6 +214,9 @@ struct KernelArgPool {
214
214
};
215
215
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
216
216
217
+ std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
218
+ KernelArgPoolMap;
219
+
217
220
// / Use a single entity to encode a kernel and a set of flags
218
221
struct KernelTy {
219
222
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -225,9 +228,7 @@ struct KernelTy {
225
228
KernelTy (llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
226
229
int32_t DeviceId, void *CallStackAddr, const char *Name,
227
230
uint32_t KernargSegmentSize,
228
- hsa_amd_memory_pool_t &KernArgMemoryPool,
229
- std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
230
- &KernelArgPoolMap)
231
+ hsa_amd_memory_pool_t &KernArgMemoryPool)
231
232
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
232
233
DeviceId (DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
233
234
DP (" Construct kernelinfo: ExecMode %d\n " , ExecutionMode);
@@ -241,6 +242,10 @@ struct KernelTy {
241
242
}
242
243
};
243
244
245
+ // / List that contains all the kernels.
246
+ // / FIXME: we may need this to be per device and per library.
247
+ std::list<KernelTy> KernelsList;
248
+
244
249
template <typename Callback> static hsa_status_t findAgents (Callback CB) {
245
250
246
251
hsa_status_t Err =
@@ -455,12 +460,6 @@ class RTLDeviceInfoTy : HSALifetime {
455
460
456
461
int NumberOfDevices = 0 ;
457
462
458
- // / List that contains all the kernels.
459
- // / FIXME: we may need this to be per device and per library.
460
- std::list<KernelTy> KernelsList;
461
- std::unordered_map<std::string /* kernel*/ , std::unique_ptr<KernelArgPool>>
462
- KernelArgPoolMap;
463
-
464
463
// GPU devices
465
464
std::vector<hsa_agent_t > HSAAgents;
466
465
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -862,6 +861,7 @@ class RTLDeviceInfoTy : HSALifetime {
862
861
" Unexpected device id!" );
863
862
FuncGblEntries[DeviceId].emplace_back ();
864
863
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back ();
864
+ // KernelArgPoolMap.clear();
865
865
E.Entries .clear ();
866
866
E.Table .EntriesBegin = E.Table .EntriesEnd = 0 ;
867
867
}
@@ -1117,8 +1117,10 @@ class RTLDeviceInfoTy : HSALifetime {
1117
1117
1118
1118
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
1119
1119
1120
- static RTLDeviceInfoTy *DeviceInfoState = nullptr ;
1121
- static RTLDeviceInfoTy &DeviceInfo () { return *DeviceInfoState; }
1120
+ // Putting accesses to DeviceInfo global behind a function call prior
1121
+ // to changing to use init_plugin/deinit_plugin calls
1122
+ static RTLDeviceInfoTy DeviceInfoState;
1123
+ static RTLDeviceInfoTy &DeviceInfo () { return DeviceInfoState; }
1122
1124
1123
1125
namespace {
1124
1126
@@ -1459,9 +1461,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
1459
1461
KernelArgPool *ArgPool = nullptr ;
1460
1462
void *KernArg = nullptr ;
1461
1463
{
1462
- auto It =
1463
- DeviceInfo ().KernelArgPoolMap .find (std::string (KernelInfo->Name ));
1464
- if (It != DeviceInfo ().KernelArgPoolMap .end ()) {
1464
+ auto It = KernelArgPoolMap.find (std::string (KernelInfo->Name ));
1465
+ if (It != KernelArgPoolMap.end ()) {
1465
1466
ArgPool = (It->second ).get ();
1466
1467
}
1467
1468
}
@@ -1940,20 +1941,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
1940
1941
}
1941
1942
1942
1943
extern " C" {
1943
-
1944
- int32_t __tgt_rtl_init_plugin () {
1945
- DeviceInfoState = new RTLDeviceInfoTy;
1946
- return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded )
1947
- ? OFFLOAD_SUCCESS
1948
- : OFFLOAD_FAIL;
1949
- }
1950
-
1951
- int32_t __tgt_rtl_deinit_plugin () {
1952
- if (DeviceInfoState)
1953
- delete DeviceInfoState;
1954
- return OFFLOAD_SUCCESS;
1955
- }
1956
-
1957
1944
int32_t __tgt_rtl_is_valid_binary (__tgt_device_image *Image) {
1958
1945
return elfMachineIdIsAmdgcn (Image);
1959
1946
}
@@ -1985,6 +1972,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
1985
1972
return true ;
1986
1973
}
1987
1974
1975
+ int32_t __tgt_rtl_init_plugin () { return OFFLOAD_SUCCESS; }
1976
+ int32_t __tgt_rtl_deinit_plugin () { return OFFLOAD_SUCCESS; }
1977
+
1988
1978
int __tgt_rtl_number_of_devices () {
1989
1979
// If the construction failed, no methods are safe to call
1990
1980
if (DeviceInfo ().ConstructionSucceeded ) {
@@ -2524,12 +2514,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
2524
2514
}
2525
2515
check (" Loading computation property" , Err);
2526
2516
2527
- DeviceInfo ().KernelsList .push_back (
2528
- KernelTy (ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name ,
2529
- KernargSegmentSize, DeviceInfo ().KernArgPool ,
2530
- DeviceInfo ().KernelArgPoolMap ));
2517
+ KernelsList.push_back (KernelTy (ExecModeVal, WGSizeVal, DeviceId,
2518
+ CallStackAddr, E->name , KernargSegmentSize,
2519
+ DeviceInfo ().KernArgPool ));
2531
2520
__tgt_offload_entry Entry = *E;
2532
- Entry.addr = (void *)&DeviceInfo (). KernelsList .back ();
2521
+ Entry.addr = (void *)&KernelsList.back ();
2533
2522
DeviceInfo ().addOffloadEntry (DeviceId, Entry);
2534
2523
DP (" Entry point %ld maps to %s\n " , E - HostBegin, E->name );
2535
2524
}
0 commit comments