Skip to content

Commit d04dbc2

Browse files
authored
Merge pull request #2259 from nrspruit/immediate_dg2_or_newer_default
[L0] Enable Immediate Command Lists by default given Intel GPU DG2 or newer
2 parents 56af572 + a67086d commit d04dbc2

File tree

2 files changed

+39
-21
lines changed

2 files changed

+39
-21
lines changed

source/adapters/level_zero/device.cpp

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,9 +1053,7 @@ ur_result_t urDeviceGetInfo(
10531053
case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
10541054
return ReturnValue(false);
10551055
case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: {
1056-
bool DeviceIsDG2OrNewer =
1057-
Device->ZeDeviceIpVersionExt->ipVersion >= 0x030dc000;
1058-
return ReturnValue(DeviceIsDG2OrNewer &&
1056+
return ReturnValue(Device->isIntelDG2OrNewer() &&
10591057
Device->ZeDeviceImageProperties->maxImageDims1D > 0 &&
10601058
Device->ZeDeviceImageProperties->maxImageDims2D > 0 &&
10611059
Device->ZeDeviceImageProperties->maxImageDims3D > 0);
@@ -1065,15 +1063,11 @@ ur_result_t urDeviceGetInfo(
10651063
return ReturnValue(false);
10661064
}
10671065
case UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP: {
1068-
bool DeviceIsDG2OrNewer =
1069-
Device->ZeDeviceIpVersionExt->ipVersion >= 0x030dc000;
1070-
return ReturnValue(DeviceIsDG2OrNewer &&
1066+
return ReturnValue(Device->isIntelDG2OrNewer() &&
10711067
Device->ZeDeviceImageProperties->maxImageDims1D > 0);
10721068
}
10731069
case UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP: {
1074-
bool DeviceIsDG2OrNewer =
1075-
Device->ZeDeviceIpVersionExt->ipVersion >= 0x030dc000;
1076-
return ReturnValue(DeviceIsDG2OrNewer &&
1070+
return ReturnValue(Device->isIntelDG2OrNewer() &&
10771071
Device->ZeDeviceImageProperties->maxImageDims2D > 0);
10781072
}
10791073
case UR_DEVICE_INFO_IMAGE_PITCH_ALIGN_EXP:
@@ -1409,13 +1403,35 @@ ur_result_t urDeviceRelease(ur_device_handle_t Device) {
14091403
}
14101404
} // namespace ur::level_zero
14111405

1412-
// Whether immediate commandlists will be used for kernel launches and copies.
1413-
// The default is standard commandlists. Setting 1 or 2 specifies use of
1414-
// immediate commandlists. Note: when immediate commandlists are used then
1415-
// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy.
1416-
// (See env var UR_L0_DEVICE_SCOPE_EVENTS).
1417-
1418-
// Get value of immediate commandlists env var setting or -1 if unset
1406+
/**
1407+
* @brief Determines the mode of immediate command lists to be used.
1408+
*
1409+
* This function checks environment variables and device properties to decide
1410+
* the mode of immediate command lists. The mode can be influenced by the
1411+
* following environment variables:
1412+
* - `UR_L0_USE_IMMEDIATE_COMMANDLISTS`
1413+
* - `SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS`
1414+
*
1415+
* If neither environment variable is set, the function defaults to using the
1416+
* device's properties to determine the mode.
1417+
*
1418+
* @return The mode of immediate command lists, which can be one of the
1419+
* following:
1420+
* - `NotUsed`: Immediate command lists are not used.
1421+
* - `PerQueue`: Immediate command lists are used per queue.
1422+
* - `PerThreadPerQueue`: Immediate command lists are used per thread per queue.
1423+
*
1424+
* The decision process is as follows:
1425+
* 1. If the environment variables are not set, the function checks if the
1426+
* device is Intel DG2 or newer and if the driver version is supported. If both
1427+
* conditions are met, or if the device is PVC, it returns `PerQueue`.
1428+
* Otherwise, it returns `NotUsed`.
1429+
* 2. If the environment variable is set, it returns the corresponding mode:
1430+
* - `0`: `NotUsed`
1431+
* - `1`: `PerQueue`
1432+
* - `2`: `PerThreadPerQueue`
1433+
* - Any other value: `NotUsed`
1434+
*/
14191435
ur_device_handle_t_::ImmCmdlistMode
14201436
ur_device_handle_t_::useImmediateCommandLists() {
14211437
// If immediate commandlist setting is not explicitly set, then use the device
@@ -1433,9 +1449,10 @@ ur_device_handle_t_::useImmediateCommandLists() {
14331449
}();
14341450

14351451
if (ImmediateCommandlistsSetting == -1) {
1452+
bool isDG2OrNewer = this->isIntelDG2OrNewer();
14361453
bool isDG2SupportedDriver =
14371454
this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820);
1438-
if ((isDG2SupportedDriver && isDG2()) || isPVC()) {
1455+
if ((isDG2SupportedDriver && isDG2OrNewer) || isPVC()) {
14391456
return PerQueue;
14401457
} else {
14411458
return NotUsed;

source/adapters/level_zero/device.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,6 @@ struct ur_device_handle_t_ : _ur_object {
180180

181181
bool isSubDevice() { return RootDevice != nullptr; }
182182

183-
// Is this a Data Center GPU Max series (aka PVC)?
184-
// TODO: change to use
185-
// https://spec.oneapi.io/level-zero/latest/core/api.html#ze-device-ip-version-ext-t
186-
// when that is stable.
187183
bool isPVC() {
188184
return (ZeDeviceProperties->deviceId & 0xff0) == 0xbd0 ||
189185
(ZeDeviceProperties->deviceId & 0xff0) == 0xb60;
@@ -192,6 +188,11 @@ struct ur_device_handle_t_ : _ur_object {
192188
// Checks if this GPU is an Intel Flex GPU or Intel Arc Alchemist
193189
bool isDG2() { return (ZeDeviceProperties->deviceId & 0xff00) == 0x5600; }
194190

191+
bool isIntelDG2OrNewer() {
192+
return (ZeDeviceProperties->vendorId == 0x8086 &&
193+
ZeDeviceIpVersionExt->ipVersion >= 0x030dc000);
194+
}
195+
195196
bool isIntegrated() {
196197
return (ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED);
197198
}

0 commit comments

Comments
 (0)