@@ -2688,6 +2688,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2688
2688
OMPX_APUPrefaultMemcopySize (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY_SIZE" ,
2689
2689
1 * 1024 * 1024 ), // 1MB
2690
2690
OMPX_DGPUMaps (" OMPX_DGPU_MAPS" , false ),
2691
+ OMPX_EnableDevice2DeviceMemAccess (
2692
+ " OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS" , false ),
2691
2693
AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
2692
2694
AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
2693
2695
@@ -4274,6 +4276,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4274
4276
// / copy on APUs regardless of the setting of HSA_XNACK.
4275
4277
BoolEnvar OMPX_DGPUMaps;
4276
4278
4279
+ // Determines whether we call HSA API, upon device memory allocation,
4280
+ // for making the memory acceccible from other agents.
4281
+ // Default is disabled
4282
+ BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4283
+
4277
4284
// / Stream manager for AMDGPU streams.
4278
4285
AMDGPUStreamManagerTy AMDGPUStreamManager;
4279
4286
@@ -5031,7 +5038,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
5031
5038
}
5032
5039
}
5033
5040
5034
- if (Alloc) {
5041
+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5042
+ OMPX_EnableDevice2DeviceMemAccess)) {
5035
5043
// Get a list of agents that can access this memory pool. Inherently
5036
5044
// necessary for host or shared allocations Also enabled for device memory
5037
5045
// to allow device to device memcpy
0 commit comments