Skip to content

Commit 023cfc3

Browse files
committed
[AMDGPU][Offload] GFX90A coarse grain shared alloc option
1 parent daa2a39 commit 023cfc3

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2680,6 +2680,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26802680
OMPX_ApuMaps("OMPX_APU_MAPS", false),
26812681
OMPX_EnableGFX90ACoarseGrainUsmMaps(
26822682
"OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS", false),
2683+
OMPX_EnableGFX90ACoarseGrainSharedAlloc(
2684+
"OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC", false),
26832685
OMPX_StrictSanityChecks("OMPX_STRICT_SANITY_CHECKS", false),
26842686
OMPX_SyncCopyBack("LIBOMPTARGET_SYNC_COPY_BACK", true),
26852687
OMPX_APUPrefaultMemcopy("LIBOMPTARGET_APU_PREFAULT_MEMCOPY", "true"),
@@ -4093,6 +4095,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
40934095
/// - Coarse graining upon USM map on MI200 needs to be enabled.
40944096
void specialBehaviorHandling() {
40954097
EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
4098+
EnableGFX90ACoarseGrainSharedAlloc =
4099+
OMPX_EnableGFX90ACoarseGrainSharedAlloc;
40964100
}
40974101

40984102
bool IsGfx90aCoarseGrainUsmMapEnabledImpl() override final {
@@ -4210,6 +4214,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42104214
/// OMPX_DISABLE_USM_MAPS
42114215
BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
42124216

4217+
/// Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC.
4218+
/// Use on MI200 systems to enable coarse grain
4219+
/// allocation of TARGET_ALLOC_SHARED memory.
4220+
/// Default is fine grain allocation.
4221+
BoolEnvar OMPX_EnableGFX90ACoarseGrainSharedAlloc;
4222+
42134223
/// Makes warnings turn into fatal errors
42144224
BoolEnvar OMPX_StrictSanityChecks;
42154225

@@ -4295,6 +4305,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42954305
// residing on the page goes through implicit/explicit OpenMP map.
42964306
bool EnableGFX90ACoarseGrainUsmMaps = false;
42974307

4308+
// Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC environment variable.
4309+
// If set, TARGET_ALLOC_SHARED is allocated on coarse grain memory on MI200
4310+
bool EnableGFX90ACoarseGrainSharedAlloc = false;
4311+
42984312
/// True if in multi-device mode.
42994313
bool IsMultiDeviceEnabled = false;
43004314

@@ -4954,6 +4968,11 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
49544968
break;
49554969
}
49564970

4971+
if (Kind == TARGET_ALLOC_SHARED && IsEquippedWithGFX90A &&
4972+
EnableGFX90ACoarseGrainSharedAlloc) {
4973+
MemoryPool = CoarseGrainedMemoryPools[0];
4974+
}
4975+
49574976
if (!MemoryPool) {
49584977
REPORT("No memory pool for the specified allocation kind\n");
49594978
return nullptr;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,11 +2030,6 @@ void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
20302030
}
20312031
assert(*AllocOrErr && "Null pointer upon successful allocation");
20322032

2033-
// Method has no effect when the CUDA Plugin is used.
2034-
// This method can only be called if HostPtr is not null.
2035-
if (HostPtr && Kind == TARGET_ALLOC_SHARED)
2036-
set_coarse_grain_mem_region(DeviceId, HostPtr, Size);
2037-
20382033
return *AllocOrErr;
20392034
}();
20402035
T.res(R);

0 commit comments

Comments
 (0)