Skip to content

Commit a79a105

Browse files
authored
Merge pull request llvm#310 from AMD-Lightning-Internal/amd/dev/hidekido/CoarseTune
memory tuning threshold Amd/dev/hidekido/coarse tune
2 parents 0208a6e + 4c6caeb commit a79a105

File tree

4 files changed

+44
-2
lines changed

4 files changed

+44
-2
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4091,6 +4091,42 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
40914091
return Plugin::success();
40924092
}
40934093

4094+
bool checkIfCoarseGrainMemoryNearOrAbove64GB() {
4095+
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
4096+
if (Pool->isGlobal() && Pool->isCoarseGrained()) {
4097+
uint64_t Value;
4098+
hsa_status_t Status =
4099+
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
4100+
if (Status != HSA_STATUS_SUCCESS) continue;
4101+
constexpr uint64_t Almost64Gig = 0xFF0000000;
4102+
if (Value >= Almost64Gig) return true;
4103+
}
4104+
}
4105+
return false; // CoarseGrain pool w/ 64GB or more capacity not found
4106+
}
4107+
4108+
size_t getMemoryManagerSizeThreshold() override {
4109+
// TODO: check performance on lower memory capacity GPU
4110+
// for lowering the threshold from 64GB.
4111+
if (checkIfCoarseGrainMemoryNearOrAbove64GB()) {
4112+
// Set GenericDeviceTy::MemoryManager's Threshold to ~2GB,
4113+
// used if not set by LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD
4114+
// ENV var. This MemoryManager is used for
4115+
// omp_target_alloc(), OpenMP (non-usm) map clause, etc.
4116+
//
4117+
// TODO 1: Fine tune to lower the threshold closer to 1GB.
4118+
// TODO 2: HSA-level memory manager on the user-side such that
4119+
// memory management is shared with HIP and OpenCL.
4120+
//
4121+
// If this value needs to go above UINT_MAX, consider
4122+
// adding sizeof(size_t) check to avoid unpleasant truncation
4123+
// surprises where size_t is still 32bit.
4124+
constexpr size_t Almost2Gig = 2000000000u;
4125+
return Almost2Gig;
4126+
}
4127+
return 0;
4128+
}
4129+
40944130
/// Determines if
40954131
/// - Coarse graining upon USM map on MI200 needs to be enabled.
40964132
void specialBehaviorHandling() {

offload/plugins-nextgen/common/include/MemoryManager.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ class MemoryManagerTy {
323323
/// manager explicitly by setting the var to 0. If user doesn't specify
324324
/// anything, returns <0, true>.
325325
static std::pair<size_t, bool> getSizeThresholdFromEnv() {
326-
static UInt32Envar MemoryManagerThreshold(
326+
static UInt64Envar MemoryManagerThreshold(
327327
"LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD", 0);
328328

329329
size_t Threshold = MemoryManagerThreshold.get();

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10031003
/// Pointer to the memory manager or nullptr if not available.
10041004
MemoryManagerTy *MemoryManager;
10051005

1006+
/// Per device setting of MemoryManager's Threshold
1007+
virtual size_t getMemoryManagerSizeThreshold() { return 0 /* use default */; }
1008+
10061009
/// Environment variables defined by the OpenMP standard.
10071010
Int32Envar OMP_TeamLimit;
10081011
Int32Envar OMP_NumTeams;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -914,8 +914,11 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
914914

915915
// Enable the memory manager if required.
916916
auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
917-
if (EnableMM)
917+
if (EnableMM) {
918+
if (ThresholdMM == 0)
919+
ThresholdMM = getMemoryManagerSizeThreshold();
918920
MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
921+
}
919922

920923
return Plugin::success();
921924
}

0 commit comments

Comments
 (0)