Skip to content

Commit 6e310b0

Browse files
committed
Add device query for checking if device architecture is homogeneous
Currently, only Level Zero returns true for a new query. Level Zero supports only Intel GPU devices at the moment and to my knowledge they all should be homogeneous. All other backends return false, which disables build optimizations.
1 parent d44e27f commit 6e310b0

File tree

7 files changed

+38
-12
lines changed

7 files changed

+38
-12
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ typedef enum {
302302
PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025,
303303
PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026,
304304
PI_DEVICE_INFO_IMAGE_SRGB = 0x10027,
305+
PI_DEVICE_INFO_HOMOGENEOUS_ARCH = 0x10028,
305306
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
306307
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
307308
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,6 +1490,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
14901490
return getInfo(param_value_size, param_value, param_value_size_ret,
14911491
PI_TRUE);
14921492
}
1493+
case PI_DEVICE_INFO_HOMOGENEOUS_ARCH: {
1494+
return getInfo(param_value_size, param_value, param_value_size_ret,
1495+
PI_FALSE);
1496+
}
14931497
case PI_DEVICE_INFO_COMPILER_AVAILABLE: {
14941498
return getInfo(param_value_size, param_value, param_value_size_ret,
14951499
PI_TRUE);

sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
586586
return ReturnValue("");
587587
case PI_DEVICE_INFO_VERSION:
588588
return ReturnValue(CmEmuDeviceVersionString);
589+
case PI_DEVICE_INFO_HOMOGENEOUS_ARCH: // emulator doesn't support partition
590+
return ReturnValue(pi_bool{false});
589591
case PI_DEVICE_INFO_COMPILER_AVAILABLE:
590592
return ReturnValue(pi_bool{false});
591593
case PI_DEVICE_INFO_LINKER_AVAILABLE:

sycl/plugins/hip/pi_hip.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
14091409
return getInfo(param_value_size, param_value, param_value_size_ret,
14101410
PI_TRUE);
14111411
}
1412+
case PI_DEVICE_INFO_HOMOGENEOUS_ARCH: {
1413+
return getInfo(param_value_size, param_value, param_value_size_ret,
1414+
PI_FALSE);
1415+
}
14121416
case PI_DEVICE_INFO_COMPILER_AVAILABLE: {
14131417
return getInfo(param_value_size, param_value, param_value_size_ret,
14141418
PI_TRUE);

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
23262326
}
23272327
case PI_DEVICE_INFO_NAME:
23282328
return ReturnValue(Device->ZeDeviceProperties->name);
2329+
case PI_DEVICE_INFO_HOMOGENEOUS_ARCH:
2330+
return ReturnValue(PI_TRUE);
23292331
case PI_DEVICE_INFO_COMPILER_AVAILABLE:
23302332
return ReturnValue(pi_bool{1});
23312333
case PI_DEVICE_INFO_LINKER_AVAILABLE:

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,13 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName,
203203
std::memcpy(paramValue, &result, sizeof(cl_bool));
204204
return PI_SUCCESS;
205205
}
206-
206+
case PI_DEVICE_INFO_HOMOGENEOUS_ARCH: {
207+
// FIXME: conservatively return false due to lack of low-level API exposing
208+
// actual status of this property
209+
cl_bool result = false;
210+
std::memcpy(paramValue, &result, sizeof(cl_bool));
211+
return PI_SUCCESS;
212+
}
207213
case PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D:
208214
// Returns the maximum sizes of a work group for each dimension one
209215
// could use to submit a kernel. There is no such query defined in OpenCL

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,22 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
481481
if (Prg)
482482
Prg->stableSerializeSpecConstRegistry(SpecConsts);
483483

484+
// Check if root device architecture is homogeneous and we can optimize builds
485+
// for sub-devices
486+
DeviceImplPtr RootDevImpl = DeviceImpl;
487+
while (!RootDevImpl->isRootDevice()) {
488+
auto ParentDev = detail::getSyclObjImpl(
489+
RootDevImpl->get_info<info::device::parent_device>());
490+
if (!ContextImpl->hasDevice(ParentDev))
491+
break;
492+
RootDevImpl = ParentDev;
493+
}
494+
495+
pi_bool IsRootDeviceArchHomogeneous = PI_FALSE;
496+
ContextImpl->getPlugin().call<PiApiKind::piDeviceGetInfo>(
497+
RootDevImpl->getHandleRef(), PI_DEVICE_INFO_HOMOGENEOUS_ARCH,
498+
sizeof(pi_bool), &IsRootDeviceArchHomogeneous, nullptr);
499+
484500
// FIXME: the logic is modified to work around unintuitive Intel OpenCL CPU
485501
// implementation behavior. Kernels created with the program built for root
486502
// device can be re-used on sub-devices, but other combinations doesn't work
@@ -493,17 +509,8 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
493509
// The expected solution is to build for any sub-device and use root device
494510
// handle as cache key to share build results for any other sub-device or even
495511
// a root device.
496-
// TODO: it might be worth testing if Level Zero plug-in supports all cases
497-
// and enable more cases for Level Zero.
498-
DeviceImplPtr Dev = DeviceImpl;
499-
while (!Dev->isRootDevice()) {
500-
auto ParentDev =
501-
detail::getSyclObjImpl(Dev->get_info<info::device::parent_device>());
502-
if (!ContextImpl->hasDevice(ParentDev))
503-
break;
504-
Dev = ParentDev;
505-
}
506-
512+
DeviceImplPtr Dev =
513+
(IsRootDeviceArchHomogeneous == PI_TRUE) ? RootDevImpl : DeviceImpl;
507514
auto BuildF = [this, &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
508515
&LinkOpts, &JITCompilationIsRequired, SpecConsts] {
509516
auto Context = createSyclObjFromImpl<context>(ContextImpl);

0 commit comments

Comments
 (0)