Skip to content

Commit 3ab714b

Browse files
[SYCL][L0] Add SYCL_PI_LEVEL_ZERO_USM_RESIDENT to control if/where to make USM allocation resident (#8361)
Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 7ffbd65 commit 3ab714b

File tree

2 files changed

+69
-3
lines changed

2 files changed

+69
-3
lines changed

sycl/doc/EnvironmentVariables.md

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ variables in production code.</span>
258258
| `SYCL_PI_LEVEL_ZERO_REUSE_DISCARDED_EVENTS` | Integer | When set to a positive value enables the mode when discarded Level Zero events are reset and reused in scope of the same in-order queue based on the dependency chain between commands. Default is 1. |
259259
| `SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING` (Deprecated) | Integer | When set to non-zero value exposes compute slices as sub-sub-devices in `sycl::info::partition_property::partition_by_affinity_domain` partitioning scheme. Default is zero meaning that they are only exposed when partitioning by `sycl::info::partition_property::ext_intel_partition_by_cslice`. This option is introduced for compatibility reasons and is immediately deprecated. New code must not rely on this behavior. Also note that even if sub-sub-device was created using `partition_by_affinity_domain` it would still be reported as created via partitioning by compute slices. |
260260
| `SYCL_PI_LEVEL_ZERO_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD` | Integer | If non-negative then the threshold is set to this value. If negative, the threshold is set to INT_MAX. Whenever the number of events associated with an immediate command list exceeds this threshold, a check is made for signaled events and these events are recycled. Setting this threshold low causes events to be checked more often, which could result in unneeded events being recycled sooner. However, more frequent event status checks may cost time. The default is 20. |
261+
| `SYCL_PI_LEVEL_ZERO_USM_RESIDENT` | Integer | Controls if/where to make USM allocations resident at the time of allocation. If set to 0 (default) then no special residency is forced. If set to 1 then allocation (device or shared) is made resident at the device of allocation. If set to 2 then allocation (device or shared) is made resident on all devices in the context of allocation that have P2P access to the device of allocation. For host allocation, any non-0 setting forces the allocation resident on all devices in the context. |
261262

262263
## Debugging variables for CUDA Plugin
263264

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6864,12 +6864,73 @@ pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program,
68646864
return mapError(ZeResult);
68656865
}
68666866

6867-
static bool ShouldUseUSMAllocator() {
6867+
static bool UseUSMAllocator = [] {
68686868
// Enable allocator by default if it's not explicitly disabled
68696869
return std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_USM_ALLOCATOR") == nullptr;
6870-
}
6870+
}();
6871+
6872+
enum class USMAllocationForceResidencyType {
6873+
// [Default] Do not force memory residency at allocation time.
6874+
None = 0,
6875+
// Force memory resident on the device of allocation at allocation time.
6876+
// For host allocation force residency on all devices in a context.
6877+
Device = 1,
6878+
// Force memory resident on all devices in the context with P2P
6879+
// access to the device of allocation.
6880+
// For host allocation force residency on all devices in a context.
6881+
P2PDevices = 2
6882+
};
6883+
6884+
// Returns the desired USM residency setting
6885+
static USMAllocationForceResidencyType USMAllocationForceResidency = [] {
6886+
const auto Str = std::getenv("SYCL_PI_LEVEL_ZERO_USM_RESIDENT");
6887+
if (!Str)
6888+
return USMAllocationForceResidencyType::None;
6889+
switch (std::atoi(Str)) {
6890+
case 1:
6891+
return USMAllocationForceResidencyType::Device;
6892+
case 2:
6893+
return USMAllocationForceResidencyType::P2PDevices;
6894+
default:
6895+
return USMAllocationForceResidencyType::None;
6896+
};
6897+
}();
6898+
6899+
// Make USM allocation resident as requested
6900+
static pi_result
6901+
USMAllocationMakeResident(pi_context Context,
6902+
pi_device Device, // nullptr for host allocation
6903+
void *Ptr, size_t Size) {
68716904

6872-
static const bool UseUSMAllocator = ShouldUseUSMAllocator();
6905+
std::list<pi_device> Devices;
6906+
6907+
if (USMAllocationForceResidency == USMAllocationForceResidencyType::None)
6908+
return PI_SUCCESS;
6909+
else if (!Device) {
6910+
// Host allocation, make it resident on all devices in the context
6911+
Devices.insert(Devices.end(), Context->Devices.begin(),
6912+
Context->Devices.end());
6913+
} else {
6914+
Devices.push_back(Device);
6915+
if (USMAllocationForceResidency ==
6916+
USMAllocationForceResidencyType::P2PDevices) {
6917+
ze_bool_t P2P;
6918+
for (const auto &D : Context->Devices) {
6919+
if (D == Device)
6920+
continue;
6921+
// TODO: Cache P2P devices for a context
6922+
ZE_CALL(zeDeviceCanAccessPeer, (D->ZeDevice, Device->ZeDevice, &P2P));
6923+
if (P2P)
6924+
Devices.push_back(D);
6925+
}
6926+
}
6927+
}
6928+
for (const auto &D : Devices) {
6929+
ZE_CALL(zeContextMakeMemoryResident,
6930+
(Context->ZeContext, D->ZeDevice, Ptr, Size));
6931+
}
6932+
return PI_SUCCESS;
6933+
}
68736934

68746935
static pi_result USMDeviceAllocImpl(void **ResultPtr, pi_context Context,
68756936
pi_device Device,
@@ -6902,6 +6963,7 @@ static pi_result USMDeviceAllocImpl(void **ResultPtr, pi_context Context,
69026963
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
69036964
PI_ERROR_INVALID_VALUE);
69046965

6966+
USMAllocationMakeResident(Context, Device, *ResultPtr, Size);
69056967
return PI_SUCCESS;
69066968
}
69076969

@@ -6932,6 +6994,8 @@ static pi_result USMSharedAllocImpl(void **ResultPtr, pi_context Context,
69326994
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
69336995
PI_ERROR_INVALID_VALUE);
69346996

6997+
USMAllocationMakeResident(Context, Device, *ResultPtr, Size);
6998+
69356999
// TODO: Handle PI_MEM_ALLOC_DEVICE_READ_ONLY.
69367000
return PI_SUCCESS;
69377001
}
@@ -6956,6 +7020,7 @@ static pi_result USMHostAllocImpl(void **ResultPtr, pi_context Context,
69567020
reinterpret_cast<std::uintptr_t>(*ResultPtr) % Alignment == 0,
69577021
PI_ERROR_INVALID_VALUE);
69587022

7023+
USMAllocationMakeResident(Context, nullptr, *ResultPtr, Size);
69597024
return PI_SUCCESS;
69607025
}
69617026

0 commit comments

Comments
 (0)