Skip to content

Commit cba81a2

Browse files
authored
Merge pull request #270 from AMD-Lightning-Internal/memory_manager_threshold
[Offload][AMDGPU] use virtual call to set threshold for GenericDeviceTy::MemoryManager
2 parents 45e055f + 493d957 commit cba81a2

File tree

3 files changed

+43
-1
lines changed

3 files changed

+43
-1
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4337,6 +4337,42 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43374337
return Plugin::success();
43384338
}
43394339

4340+
bool checkIfCoarseGrainMemoryNearOrAbove64GB() {
4341+
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
4342+
if (Pool->isGlobal() && Pool->isCoarseGrained()) {
4343+
uint64_t Value;
4344+
hsa_status_t Status =
4345+
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
4346+
if (Status != HSA_STATUS_SUCCESS) continue;
4347+
constexpr uint64_t Almost64Gig = 0xFF0000000;
4348+
if (Value >= Almost64Gig) return true;
4349+
}
4350+
}
4351+
return false; // CoarseGrain pool w/ 64GB or more capacity not found
4352+
}
4353+
4354+
size_t getMemoryManagerSizeThreshold() override {
4355+
// TODO: check performance on lower memory capacity GPU
4356+
// for lowering the threshold from 64GB.
4357+
if (checkIfCoarseGrainMemoryNearOrAbove64GB()) {
4358+
// Set GenericDeviceTy::MemoryManager's Threshold to ~2GB,
4359+
// used if not set by LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD
4360+
// ENV var. This MemoryManager is used for
4361+
// omp_target_alloc(), OpenMP (non-usm) map clause, etc.
4362+
//
4363+
// TODO 1: Fine tune to lower the threshold closer to 1GB.
4364+
// TODO 2: HSA-level memory manager on the user-side such that
4365+
// memory management is shared with HIP and OpenCL.
4366+
//
4367+
// If this value needs to go above UINT_MAX, consider
4368+
// adding sizeof(size_t) check to avoid unpleasant truncation
4369+
// surprises where size_t is still 32bit.
4370+
constexpr size_t Almost2Gig = 2000000000u;
4371+
return Almost2Gig;
4372+
}
4373+
return 0;
4374+
}
4375+
43404376
/// Determines if
43414377
/// - Coarse graining upon USM map on MI200 needs to be enabled.
43424378
void specialBehaviorHandling() {

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
11891189
/// Pointer to the memory manager or nullptr if not available.
11901190
MemoryManagerTy *MemoryManager;
11911191

1192+
/// Per device setting of MemoryManager's Threshold
1193+
virtual size_t getMemoryManagerSizeThreshold() { return 0 /* use default */; }
1194+
11921195
/// Environment variables defined by the OpenMP standard.
11931196
Int32Envar OMP_TeamLimit;
11941197
Int32Envar OMP_NumTeams;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,8 +1005,11 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
10051005

10061006
// Enable the memory manager if required.
10071007
auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
1008-
if (EnableMM)
1008+
if (EnableMM) {
1009+
if (ThresholdMM == 0)
1010+
ThresholdMM = getMemoryManagerSizeThreshold();
10091011
MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
1012+
}
10101013

10111014
return Plugin::success();
10121015
}

0 commit comments

Comments
 (0)