Skip to content

[SYCL][L0] Add SubDevices into the device cache #3314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 44 additions & 29 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1802,19 +1802,26 @@ pi_result piDevicePartition(pi_device Device,

PI_ASSERT(Device, PI_INVALID_DEVICE);

// Check if Device was already partitioned into the same or bigger size
// before. If so, we can return immediately without searching the global
// device cache. Note that L0 driver always returns the same handles in the
// same order for the given number of sub-devices.
if (OutDevices && NumDevices <= Device->SubDevices.size()) {
for (uint32_t I = 0; I < NumDevices; I++) {
OutDevices[I] = Device->SubDevices[I];
// reusing the same pi_device needs to increment the reference count
piDeviceRetain(OutDevices[I]);
}
if (OutNumDevices)
*OutNumDevices = NumDevices;
return PI_SUCCESS;
}

// Get the number of subdevices available.
// TODO: maybe add interface to create the specified # of subdevices.
uint32_t Count = 0;
ZE_CALL(zeDeviceGetSubDevices(Device->ZeDevice, &Count, nullptr));

// Check that the requested/allocated # of sub-devices is the same
// as was reported by the above call.
// TODO: we may want to support smaller/larger # devices too.
if (Count != NumDevices) {
zePrint("piDevicePartition: unsupported # of sub-devices requested\n");
return PI_INVALID_OPERATION;
}

if (OutNumDevices) {
*OutNumDevices = Count;
}
Expand All @@ -1825,17 +1832,29 @@ pi_result piDevicePartition(pi_device Device,
}

try {
pi_platform Platform = Device->Platform;
auto ZeSubdevices = new ze_device_handle_t[Count];
ZE_CALL(zeDeviceGetSubDevices(Device->ZeDevice, &Count, ZeSubdevices));

// Wrap the Level Zero sub-devices into PI sub-devices, and write them out.
for (uint32_t I = 0; I < Count; ++I) {
OutDevices[I] = new _pi_device(ZeSubdevices[I], Device->Platform,
true /* isSubDevice */);
pi_result Result = OutDevices[I]->initialize();
if (Result != PI_SUCCESS) {
delete[] ZeSubdevices;
return Result;
pi_device Dev = Platform->getDeviceFromNativeHandle(ZeSubdevices[I]);
if (Dev) {
OutDevices[I] = Dev;
// reusing the same pi_device needs to increment the reference count
piDeviceRetain(OutDevices[I]);
} else {
std::unique_ptr<_pi_device> PiSubDevice(
new _pi_device(ZeSubdevices[I], Platform));
pi_result Result = PiSubDevice->initialize();
if (Result != PI_SUCCESS) {
delete[] ZeSubdevices;
return Result;
}
OutDevices[I] = PiSubDevice.get();
Platform->PiDevicesCache.push_back(std::move(PiSubDevice));
// save pointers to sub-devices for quick retrieval in the future.
Device->SubDevices.push_back(Dev);
}
}
delete[] ZeSubdevices;
Expand Down Expand Up @@ -1911,29 +1930,25 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
PI_ASSERT(Device, PI_INVALID_DEVICE);
PI_ASSERT(NativeHandle, PI_INVALID_VALUE);
PI_ASSERT(Platform, PI_INVALID_PLATFORM);

std::lock_guard<std::mutex> Lock(Platform->PiDevicesCacheMutex);
pi_result Res = populateDeviceCacheIfNeeded(Platform);
if (Res != PI_SUCCESS) {
return Res;
{
std::lock_guard<std::mutex> Lock(Platform->PiDevicesCacheMutex);
pi_result Res = populateDeviceCacheIfNeeded(Platform);
if (Res != PI_SUCCESS) {
return Res;
}
}

auto ZeDevice = pi_cast<ze_device_handle_t>(NativeHandle);

// The SYCL spec requires that the set of devices must remain fixed for the
// duration of the application's execution. We assume that we found all of the
// Level Zero devices when we initialized the device cache, so the
// "NativeHandle" must already be in the cache. If it is not, this must not be
// a valid Level Zero device.
for (const std::unique_ptr<_pi_device> &CachedDevice :
Platform->PiDevicesCache) {
if (CachedDevice->ZeDevice == ZeDevice) {
*Device = CachedDevice.get();
return PI_SUCCESS;
}
}

return PI_INVALID_VALUE;
pi_device Dev = Platform->getDeviceFromNativeHandle(ZeDevice);
if (Dev == nullptr)
return PI_INVALID_VALUE;
*Device = Dev;
return PI_SUCCESS;
}

pi_result piContextCreate(const pi_context_properties *Properties,
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ struct _pi_device : _pi_object {
// Level Zero device handle.
ze_device_handle_t ZeDevice;

// Keep the subdevices that are partitioned from this pi_device for reuse
// The order of sub-devices in this vector is repeated from the
// ze_device_handle_t array that are returned from zeDeviceGetSubDevices()
// call, which will always return sub-devices in the fixed same order.
std::vector<pi_device> SubDevices;

// PI platform to which this device belongs.
pi_platform Platform;

Expand Down