Skip to content

Commit c32e705

Browse files
authored
Add Env var control on enabling device-to-device memory access (llvm#1364)
2 parents fbbe276 + 55fc654 commit c32e705

File tree

1 file changed

+9
-1
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+9
-1
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26882688
OMPX_APUPrefaultMemcopySize("LIBOMPTARGET_APU_PREFAULT_MEMCOPY_SIZE",
26892689
1 * 1024 * 1024), // 1MB
26902690
OMPX_DGPUMaps("OMPX_DGPU_MAPS", false),
2691+
OMPX_EnableDevice2DeviceMemAccess(
2692+
"OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS", false),
26912693
AMDGPUStreamManager(*this, Agent), AMDGPUEventManager(*this),
26922694
AMDGPUSignalManager(*this), Agent(Agent), HostDevice(HostDevice) {}
26932695

@@ -4274,6 +4276,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42744276
/// copy on APUs regardless of the setting of HSA_XNACK.
42754277
BoolEnvar OMPX_DGPUMaps;
42764278

4279+
// Determines whether we call HSA API, upon device memory allocation,
4280+
// for making the memory acceccible from other agents.
4281+
// Default is disabled
4282+
BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4283+
42774284
/// Stream manager for AMDGPU streams.
42784285
AMDGPUStreamManagerTy AMDGPUStreamManager;
42794286

@@ -5031,7 +5038,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
50315038
}
50325039
}
50335040

5034-
if (Alloc) {
5041+
if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5042+
OMPX_EnableDevice2DeviceMemAccess)) {
50355043
// Get a list of agents that can access this memory pool. Inherently
50365044
// necessary for host or shared allocations Also enabled for device memory
50375045
// to allow device to device memcpy

0 commit comments

Comments
 (0)