Skip to content

Commit 2323dda

Browse files
authored
Merge pull request #110 from AMD-Lightning-Internal/amd/dev/rlieberm/CG-rename-OMPX_DISABLE_USM_MAPS-1
[Offload][AMDGPU] Rename OMPX_DISABLE_USM_MAPS env var and associated…
2 parents e27dfd0 + bc8d7c3 commit 2323dda

File tree

6 files changed

+64
-52
lines changed

6 files changed

+64
-52
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,8 +2678,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26782678
// setting default to true here appears to solve random sdma problem
26792679
"LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES", false),
26802680
OMPX_ApuMaps("OMPX_APU_MAPS", false),
2681-
OMPX_DisableUsmMaps("OMPX_DISABLE_USM_MAPS", true),
2682-
OMPX_NoMapChecks("OMPX_DISABLE_MAPS", true),
2681+
OMPX_EnableGFX90ACoarseGrainUsmMaps(
2682+
"OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS", false),
26832683
OMPX_StrictSanityChecks("OMPX_STRICT_SANITY_CHECKS", false),
26842684
OMPX_SyncCopyBack("LIBOMPTARGET_SYNC_COPY_BACK", true),
26852685
OMPX_APUPrefaultMemcopy("LIBOMPTARGET_APU_PREFAULT_MEMCOPY", "true"),
@@ -2944,7 +2944,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29442944
if (auto Err = checkIfMI300x())
29452945
return Err;
29462946

2947-
// detect special cases for MI200 and MI300A
2947+
// detect special cases for MI200
29482948
specialBehaviorHandling();
29492949

29502950
// detect ROCm-specific environment variables
@@ -3499,8 +3499,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
34993499
bool set_attr = true) override final {
35003500
// If the table has not yet been created, check if the gpu arch is
35013501
// MI200 and create it, but only if USM Map is enabled.
3502-
if (!IsEquippedWithGFX90A || OMPX_DisableUsmMaps)
3503-
return Plugin::success();
3502+
if (!IsEquippedWithGFX90A || !EnableGFX90ACoarseGrainUsmMaps)
3503+
return Plugin::error("Invalid request to set coarse grain mode");
35043504
if (!CoarseGrainMemoryTable)
35053505
CoarseGrainMemoryTable = new AMDGPUMemTypeBitFieldTable(
35063506
AMDGPU_X86_64_SystemConfiguration::max_addressable_byte +
@@ -4090,21 +4090,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
40904090
}
40914091

40924092
/// Determines if
4093-
/// - Map checks should be disabled
4094-
/// - Coarse graining upon map on MI200 needs to be disabled.
4095-
/// - Prefaulting GPU page tables on MI300A needs to be enabled.
4093+
/// - Coarse graining upon USM map on MI200 needs to be enabled.
40964094
void specialBehaviorHandling() {
4097-
if (OMPX_NoMapChecks.get() == false) {
4098-
NoUSMMapChecks = false;
4099-
}
4100-
4101-
if (OMPX_DisableUsmMaps.get() == true) {
4102-
EnableFineGrainedMemory = true;
4103-
}
4095+
EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
41044096
}
41054097

4106-
bool IsFineGrainedMemoryEnabledImpl() override final {
4107-
return EnableFineGrainedMemory;
4098+
bool IsGfx90aCoarseGrainUsmMapEnabledImpl() override final {
4099+
return !EnableGFX90ACoarseGrainUsmMaps;
41084100
}
41094101

41104102
bool hasAPUDeviceImpl() override final { return IsAPU; }
@@ -4207,17 +4199,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42074199
/// automatic zero-copy behavior on non-APU GPUs.
42084200
BoolEnvar OMPX_ApuMaps;
42094201

4210-
/// Value of OMPX_DISABLE_USM_MAPS. Use on MI200
4211-
/// systems to disable both device memory
4212-
/// allocations and host-device memory copies upon
4213-
/// map, and coarse graining of mapped variables.
4214-
BoolEnvar OMPX_DisableUsmMaps;
4215-
4216-
/// Value of OMPX_DISABLE_MAPS. Turns off map table checks
4217-
/// in libomptarget in unified_shared_memory mode. Legacy:
4218-
/// never turned to false (unified_shared_memory mode is
4219-
/// currently always without map checks.
4220-
BoolEnvar OMPX_NoMapChecks;
4202+
/// Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS.
4203+
/// Use on MI200 systems to enable coarse graining
4204+
/// of mapped variables (and other variables partially
4205+
/// or fully on the same memory page) under unified
4206+
/// shared memory.
4207+
///
4208+
/// It was enabled by default up to Rocm6.3
4209+
/// and env var spelling for controlling it was
4210+
/// OMPX_DISABLE_USM_MAPS
4211+
BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
42214212

42224213
/// Makes warnings turn into fatal errors
42234214
BoolEnvar OMPX_StrictSanityChecks;
@@ -4298,14 +4289,24 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42984289
/// False otherwise.
42994290
bool IsXnackEnabled = false;
43004291

4301-
// Set by OMPX_DISABLE_USM_MAPS environment variable.
4302-
// If set, fine graned memory is used for maps instead of coarse grained.
4303-
bool EnableFineGrainedMemory = false;
4292+
// Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS environment variable.
4293+
// If set, under unified shared memory on MI200, fine grained memory page
4294+
// is switched to coarse grain (and stay coarse grain) if a variable
4295+
// residing on the page goes through implicit/explicit OpenMP map.
4296+
bool EnableGFX90ACoarseGrainUsmMaps = false;
43044297

4305-
/// Set by OMPX_DISABLE_MAPS environment variable.
4306-
// If false, map checks are performed also in unified_shared_memory mode.
4307-
// TODO: this feature is non functional.
4308-
bool NoUSMMapChecks = true;
4298+
/// True if in multi-device mode.
4299+
bool IsMultiDeviceEnabled = false;
4300+
4301+
public:
4302+
/// Return if it is an MI300 series device.
4303+
bool checkIfMI300Device() {
4304+
// Include MI300, MI300X, MI308.
4305+
llvm::StringRef StrGfxName(ComputeUnitKind);
4306+
return llvm::StringSwitch<bool>(StrGfxName)
4307+
.Case("gfx942", true)
4308+
.Default(false);
4309+
}
43094310
};
43104311

43114312
Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
@@ -4964,10 +4965,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
49644965
REPORT("%s\n", toString(std::move(Err)).data());
49654966
return nullptr;
49664967
}
4967-
// FIXME: Maybe this should be guarded by hasgfx90a
4968-
if (MemoryPool == CoarseGrainedMemoryPools[0]) {
4969-
// printf(" Device::allocate calling setCoarseGrainMemoryImpl(Alloc, Size,
4970-
// false)\n");
4968+
if (MemoryPool == CoarseGrainedMemoryPools[0] && IsEquippedWithGFX90A &&
4969+
EnableGFX90ACoarseGrainUsmMaps) {
4970+
// Need to register in the coarse grain usm map table
4971+
// if not already registered.
49714972
if (auto Err = setCoarseGrainMemoryImpl(Alloc, Size, /*set_attr=*/false)) {
49724973
REPORT("%s\n", toString(std::move(Err)).data());
49734974
return nullptr;

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -836,10 +836,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
836836
virtual bool supportsUnifiedMemoryImpl() { return false; }
837837

838838
// Returns true if coarse graining of mapped variables is
839-
// disabled on MI200 GPUs.
840-
// virtual bool IsFineGrainedMemoryEnabled() { return false; }
841-
bool IsFineGrainedMemoryEnabled();
842-
virtual bool IsFineGrainedMemoryEnabledImpl() { return false; }
839+
// enabled on MI200 GPUs.
840+
// virtual bool IsGfx90aCoarseGrainUsmMapEnabled() { return false; }
841+
bool IsGfx90aCoarseGrainUsmMapEnabled();
842+
virtual bool IsGfx90aCoarseGrainUsmMapEnabledImpl() { return false; }
843843

844844
/// Create an event.
845845
Error createEvent(void **EventPtrStorage);
@@ -1254,8 +1254,9 @@ struct GenericPluginTy {
12541254
/// Returns if this device supports USM.
12551255
bool supports_unified_memory(int32_t DeviceId);
12561256

1257-
/// Returns if fine grained memory is supported.
1258-
bool is_fine_grained_memory_enabled(int32_t DeviceId);
1257+
/// Returns if GFX90A coarse graining of OpenMP mapped
1258+
/// variables is enabled under unified shared memory.
1259+
bool is_gfx90a_coarse_grain_usm_map_enabled(int32_t DeviceId);
12591260

12601261
/// Returns if managed memory is supported.
12611262
bool is_system_supporting_managed_memory(int32_t DeviceId);

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,8 +1606,8 @@ bool GenericDeviceTy::supportsUnifiedMemory() {
16061606
return supportsUnifiedMemoryImpl();
16071607
}
16081608

1609-
bool GenericDeviceTy::IsFineGrainedMemoryEnabled() {
1610-
return IsFineGrainedMemoryEnabledImpl();
1609+
bool GenericDeviceTy::IsGfx90aCoarseGrainUsmMapEnabled() {
1610+
return IsGfx90aCoarseGrainUsmMapEnabledImpl();
16111611
}
16121612

16131613
Error GenericDeviceTy::prepopulatePageTable(void *ptr, int64_t size) {
@@ -1932,9 +1932,11 @@ bool GenericPluginTy::supports_unified_memory(int32_t DeviceId) {
19321932
return R;
19331933
}
19341934

1935-
bool GenericPluginTy::is_fine_grained_memory_enabled(int32_t DeviceId) {
1935+
bool GenericPluginTy::is_gfx90a_coarse_grain_usm_map_enabled(int32_t DeviceId) {
19361936
auto T = logger::log<bool>(__func__, DeviceId);
1937-
auto R = [&]() { return getDevice(DeviceId).IsFineGrainedMemoryEnabled(); }();
1937+
auto R = [&]() {
1938+
return getDevice(DeviceId).IsGfx90aCoarseGrainUsmMapEnabled();
1939+
}();
19381940
T.res(R);
19391941
return R;
19401942
}

offload/src/OpenMP/API.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,11 @@ EXTERN void omp_register_coarse_grain_mem(void *ptr, size_t size, int setattr) {
696696
FATAL_MESSAGE(omp_get_default_device(), "%s",
697697
toString(DeviceOrErr.takeError()).c_str());
698698

699+
if (!(DeviceOrErr->RTL->is_gfx90a(omp_get_default_device()) &&
700+
DeviceOrErr->RTL->is_gfx90a_coarse_grain_usm_map_enabled(
701+
omp_get_default_device())))
702+
return;
703+
699704
bool set_attr = (setattr == 1) ? true : false;
700705
DeviceOrErr->RTL->set_coarse_grain_mem(omp_get_default_device(), ptr, size,
701706
set_attr);

offload/src/OpenMP/Mapping.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
271271
// memory as coarse-grained. The usage of coarse-grained memory can be
272272
// overriden by setting the env-var OMPX_DISABLE_USM_MAPS=1.
273273
if (Device.RTL->is_gfx90a(Device.DeviceID) && HstPtrBegin &&
274-
(!Device.RTL->is_fine_grained_memory_enabled(Device.DeviceID))) {
274+
(!Device.RTL->is_gfx90a_coarse_grain_usm_map_enabled(
275+
Device.DeviceID))) {
275276
Device.RTL->set_coarse_grain_mem_region(Device.DeviceID, HstPtrBegin,
276277
Size);
277278
INFO(OMP_INFOTYPE_MAPPING_CHANGED, Device.DeviceID,

offload/test/mapping/coarse_grain.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// RUN: | %fcheck-generic -check-prefix=CHECK_FINE
55

66
// RUN: %libomptarget-compilexx-generic
7-
// RUN: env OMPX_DISABLE_USM_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
7+
// RUN: env OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS=0 OMPX_DISABLE_USM_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=30 \
8+
// RUN: %libomptarget-run-generic 2>&1 \
89
// RUN: | %fcheck-generic -check-prefix=CHECK_FINE
910

1011
// RUN: %libomptarget-compilexx-generic
11-
// RUN: env OMPX_DISABLE_USM_MAPS=0 HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
12+
// RUN: env OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS=1 OMPX_DISABLE_USM_MAPS=0 HSA_XNACK=1 LIBOMPTARGET_INFO=30 \
13+
// RUN: %libomptarget-run-generic 2>&1 \
1214
// RUN: | %fcheck-generic -check-prefix=CHECK
1315

1416
// UNSUPPORTED: aarch64-unknown-linux-gnu

0 commit comments

Comments
 (0)