@@ -2678,8 +2678,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2678
2678
// setting default to true here appears to solve random sdma problem
2679
2679
" LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES" , false ),
2680
2680
OMPX_ApuMaps (" OMPX_APU_MAPS" , false ),
2681
- OMPX_DisableUsmMaps ( " OMPX_DISABLE_USM_MAPS " , true ),
2682
- OMPX_NoMapChecks ( " OMPX_DISABLE_MAPS " , true ),
2681
+ OMPX_EnableGFX90ACoarseGrainUsmMaps (
2682
+ " OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS " , false ),
2683
2683
OMPX_StrictSanityChecks (" OMPX_STRICT_SANITY_CHECKS" , false ),
2684
2684
OMPX_SyncCopyBack (" LIBOMPTARGET_SYNC_COPY_BACK" , true ),
2685
2685
OMPX_APUPrefaultMemcopy (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY" , " true" ),
@@ -2944,7 +2944,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2944
2944
if (auto Err = checkIfMI300x ())
2945
2945
return Err;
2946
2946
2947
- // detect special cases for MI200 and MI300A
2947
+ // detect special cases for MI200
2948
2948
specialBehaviorHandling ();
2949
2949
2950
2950
// detect ROCm-specific environment variables
@@ -3499,8 +3499,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3499
3499
bool set_attr = true ) override final {
3500
3500
// If the table has not yet been created, check if the gpu arch is
3501
3501
// MI200 and create it, but only if USM Map is enabled.
3502
- if (!IsEquippedWithGFX90A || OMPX_DisableUsmMaps )
3503
- return Plugin::success ( );
3502
+ if (!IsEquippedWithGFX90A || !EnableGFX90ACoarseGrainUsmMaps )
3503
+ return Plugin::error ( " Invalid request to set coarse grain mode " );
3504
3504
if (!CoarseGrainMemoryTable)
3505
3505
CoarseGrainMemoryTable = new AMDGPUMemTypeBitFieldTable (
3506
3506
AMDGPU_X86_64_SystemConfiguration::max_addressable_byte +
@@ -4090,21 +4090,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4090
4090
}
4091
4091
4092
4092
// / Determines if
4093
- // / - Map checks should be disabled
4094
- // / - Coarse graining upon map on MI200 needs to be disabled.
4095
- // / - Prefaulting GPU page tables on MI300A needs to be enabled.
4093
+ // / - Coarse graining upon USM map on MI200 needs to be enabled.
4096
4094
void specialBehaviorHandling () {
4097
- if (OMPX_NoMapChecks.get () == false ) {
4098
- NoUSMMapChecks = false ;
4099
- }
4100
-
4101
- if (OMPX_DisableUsmMaps.get () == true ) {
4102
- EnableFineGrainedMemory = true ;
4103
- }
4095
+ EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
4104
4096
}
4105
4097
4106
- bool IsFineGrainedMemoryEnabledImpl () override final {
4107
- return EnableFineGrainedMemory ;
4098
+ bool IsGfx90aCoarseGrainUsmMapEnabledImpl () override final {
4099
+ return !EnableGFX90ACoarseGrainUsmMaps ;
4108
4100
}
4109
4101
4110
4102
bool hasAPUDeviceImpl () override final { return IsAPU; }
@@ -4207,17 +4199,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4207
4199
// / automatic zero-copy behavior on non-APU GPUs.
4208
4200
BoolEnvar OMPX_ApuMaps;
4209
4201
4210
- // / Value of OMPX_DISABLE_USM_MAPS. Use on MI200
4211
- // / systems to disable both device memory
4212
- // / allocations and host-device memory copies upon
4213
- // / map, and coarse graining of mapped variables.
4214
- BoolEnvar OMPX_DisableUsmMaps;
4215
-
4216
- // / Value of OMPX_DISABLE_MAPS. Turns off map table checks
4217
- // / in libomptarget in unified_shared_memory mode. Legacy:
4218
- // / never turned to false (unified_shared_memory mode is
4219
- // / currently always without map checks.
4220
- BoolEnvar OMPX_NoMapChecks;
4202
+ // / Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS.
4203
+ // / Use on MI200 systems to enable coarse graining
4204
+ // / of mapped variables (and other variables partially
4205
+ // / or fully on the same memory page) under unified
4206
+ // / shared memory.
4207
+ // /
4208
+ // / It was enabled by default up to Rocm6.3
4209
+ // / and env var spelling for controlling it was
4210
+ // / OMPX_DISABLE_USM_MAPS
4211
+ BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
4221
4212
4222
4213
// / Makes warnings turn into fatal errors
4223
4214
BoolEnvar OMPX_StrictSanityChecks;
@@ -4298,14 +4289,24 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4298
4289
// / False otherwise.
4299
4290
bool IsXnackEnabled = false ;
4300
4291
4301
- // Set by OMPX_DISABLE_USM_MAPS environment variable.
4302
- // If set, fine graned memory is used for maps instead of coarse grained.
4303
- bool EnableFineGrainedMemory = false ;
4292
+ // Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS environment variable.
4293
+ // If set, under unified shared memory on MI200, fine grained memory page
4294
+ // is switched to coarse grain (and stay coarse grain) if a variable
4295
+ // residing on the page goes through implicit/explicit OpenMP map.
4296
+ bool EnableGFX90ACoarseGrainUsmMaps = false ;
4304
4297
4305
- // / Set by OMPX_DISABLE_MAPS environment variable.
4306
- // If false, map checks are performed also in unified_shared_memory mode.
4307
- // TODO: this feature is non functional.
4308
- bool NoUSMMapChecks = true ;
4298
+ // / True if in multi-device mode.
4299
+ bool IsMultiDeviceEnabled = false ;
4300
+
4301
+ public:
4302
+ // / Return if it is an MI300 series device.
4303
+ bool checkIfMI300Device () {
4304
+ // Include MI300, MI300X, MI308.
4305
+ llvm::StringRef StrGfxName (ComputeUnitKind);
4306
+ return llvm::StringSwitch<bool >(StrGfxName)
4307
+ .Case (" gfx942" , true )
4308
+ .Default (false );
4309
+ }
4309
4310
};
4310
4311
4311
4312
Error AMDGPUDeviceImageTy::loadExecutable (const AMDGPUDeviceTy &Device) {
@@ -4964,10 +4965,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
4964
4965
REPORT (" %s\n " , toString (std::move (Err)).data ());
4965
4966
return nullptr ;
4966
4967
}
4967
- // FIXME: Maybe this should be guarded by hasgfx90a
4968
- if (MemoryPool == CoarseGrainedMemoryPools[ 0 ] ) {
4969
- // printf(" Device::allocate calling setCoarseGrainMemoryImpl(Alloc, Size,
4970
- // false)\n");
4968
+ if (MemoryPool == CoarseGrainedMemoryPools[ 0 ] && IsEquippedWithGFX90A &&
4969
+ EnableGFX90ACoarseGrainUsmMaps ) {
4970
+ // Need to register in the coarse grain usm map table
4971
+ // if not already registered.
4971
4972
if (auto Err = setCoarseGrainMemoryImpl (Alloc, Size, /* set_attr=*/ false )) {
4972
4973
REPORT (" %s\n " , toString (std::move (Err)).data ());
4973
4974
return nullptr ;
0 commit comments