@@ -124,11 +124,13 @@ static std::map<std::string, int> *ZeCallCount = nullptr;
124
124
125
125
// Trace an internal PI call; returns in case of an error.
126
126
#define PI_CALL (Call ) \
127
- if (PrintPiTrace) \
128
- fprintf (stderr, " PI ---> %s\n " , #Call); \
129
- pi_result Result = (Call); \
130
- if (Result != PI_SUCCESS) \
131
- return Result;
127
+ { \
128
+ if (PrintPiTrace) \
129
+ fprintf (stderr, " PI ---> %s\n " , #Call); \
130
+ pi_result Result = (Call); \
131
+ if (Result != PI_SUCCESS) \
132
+ return Result; \
133
+ }
132
134
133
135
enum DebugLevel {
134
136
ZE_DEBUG_NONE = 0x0 ,
@@ -1074,8 +1076,6 @@ static pi_result copyModule(ze_context_handle_t ZeContext,
1074
1076
1075
1077
static bool setEnvVar (const char *var, const char *value);
1076
1078
1077
- static pi_result populateDeviceCacheIfNeeded (pi_platform Platform);
1078
-
1079
1079
// Forward declarations for mock implementations of Level Zero APIs that
1080
1080
// do not yet work in the driver.
1081
1081
// TODO: Remove these mock definitions when they work in the driver.
@@ -1333,7 +1333,11 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
1333
1333
// Return NULL if no such PI device found.
1334
1334
pi_device _pi_platform::getDeviceFromNativeHandle (ze_device_handle_t ZeDevice) {
1335
1335
1336
- std::lock_guard<std::mutex> Lock (this ->PiDevicesCacheMutex );
1336
+ pi_result Res = populateDeviceCacheIfNeeded ();
1337
+ if (Res != PI_SUCCESS) {
1338
+ return nullptr ;
1339
+ }
1340
+
1337
1341
auto it = std::find_if (PiDevicesCache.begin (), PiDevicesCache.end (),
1338
1342
[&](std::unique_ptr<_pi_device> &D) {
1339
1343
return D.get ()->ZeDevice == ZeDevice;
@@ -1350,8 +1354,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
1350
1354
1351
1355
PI_ASSERT (Platform, PI_INVALID_PLATFORM);
1352
1356
1353
- std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
1354
- pi_result Res = populateDeviceCacheIfNeeded (Platform);
1357
+ pi_result Res = Platform->populateDeviceCacheIfNeeded ();
1355
1358
if (Res != PI_SUCCESS) {
1356
1359
return Res;
1357
1360
}
@@ -1409,15 +1412,14 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
1409
1412
return PI_SUCCESS;
1410
1413
}
1411
1414
1412
- // Check the device cache and load it if necessary. The PiDevicesCacheMutex must
1413
- // be locked before calling this function to prevent any synchronization issues.
1414
- static pi_result populateDeviceCacheIfNeeded (pi_platform Platform) {
1415
+ // Check the device cache and load it if necessary.
1416
+ pi_result _pi_platform::populateDeviceCacheIfNeeded () {
1417
+ std::lock_guard<std::mutex> Lock (PiDevicesCacheMutex);
1415
1418
1416
- if (Platform-> DeviceCachePopulated ) {
1419
+ if (DeviceCachePopulated) {
1417
1420
return PI_SUCCESS;
1418
1421
}
1419
1422
1420
- ze_driver_handle_t ZeDriver = Platform->ZeDriver ;
1421
1423
uint32_t ZeDeviceCount = 0 ;
1422
1424
ZE_CALL (zeDeviceGet, (ZeDriver, &ZeDeviceCount, nullptr ));
1423
1425
@@ -1426,21 +1428,48 @@ static pi_result populateDeviceCacheIfNeeded(pi_platform Platform) {
1426
1428
ZE_CALL (zeDeviceGet, (ZeDriver, &ZeDeviceCount, ZeDevices.data ()));
1427
1429
1428
1430
for (uint32_t I = 0 ; I < ZeDeviceCount; ++I) {
1429
- std::unique_ptr<_pi_device> Device (
1430
- new _pi_device (ZeDevices[I], Platform));
1431
+ std::unique_ptr<_pi_device> Device (new _pi_device (ZeDevices[I], this ));
1431
1432
pi_result Result = Device->initialize ();
1432
1433
if (Result != PI_SUCCESS) {
1433
1434
return Result;
1434
1435
}
1435
- // save a copy in the cache for future uses.
1436
- Platform->PiDevicesCache .push_back (std::move (Device));
1436
+
1437
+ // Additionally we need to cache all sub-devices too, such that they
1438
+ // are readily visible to the piextDeviceCreateWithNativeHandle.
1439
+ //
1440
+ pi_uint32 SubDevicesCount = 0 ;
1441
+ ZE_CALL (zeDeviceGetSubDevices,
1442
+ (Device->ZeDevice , &SubDevicesCount, nullptr ));
1443
+
1444
+ auto ZeSubdevices = new ze_device_handle_t [SubDevicesCount];
1445
+ ZE_CALL (zeDeviceGetSubDevices,
1446
+ (Device->ZeDevice , &SubDevicesCount, ZeSubdevices));
1447
+
1448
+ // Wrap the Level Zero sub-devices into PI sub-devices, and add them to
1449
+ // cache.
1450
+ for (uint32_t I = 0 ; I < SubDevicesCount; ++I) {
1451
+ std::unique_ptr<_pi_device> PiSubDevice (
1452
+ new _pi_device (ZeSubdevices[I], this , true ));
1453
+ pi_result Result = PiSubDevice->initialize ();
1454
+ if (Result != PI_SUCCESS) {
1455
+ delete[] ZeSubdevices;
1456
+ return Result;
1457
+ }
1458
+ // save pointers to sub-devices for quick retrieval in the future.
1459
+ Device->SubDevices .push_back (PiSubDevice.get ());
1460
+ PiDevicesCache.push_back (std::move (PiSubDevice));
1461
+ }
1462
+ delete[] ZeSubdevices;
1463
+
1464
+ // Save the root device in the cache for future uses.
1465
+ PiDevicesCache.push_back (std::move (Device));
1437
1466
}
1438
1467
} catch (const std::bad_alloc &) {
1439
1468
return PI_OUT_OF_HOST_MEMORY;
1440
1469
} catch (...) {
1441
1470
return PI_ERROR_UNKNOWN;
1442
1471
}
1443
- Platform-> DeviceCachePopulated = true ;
1472
+ DeviceCachePopulated = true ;
1444
1473
return PI_SUCCESS;
1445
1474
}
1446
1475
@@ -1986,66 +2015,30 @@ pi_result piDevicePartition(pi_device Device,
1986
2015
1987
2016
PI_ASSERT (Device, PI_INVALID_DEVICE);
1988
2017
1989
- // Check if Device was already partitioned into the same or bigger size
1990
- // before. If so, we can return immediately without searching the global
1991
- // device cache. Note that L0 driver always returns the same handles in the
1992
- // same order for the given number of sub-devices.
1993
- if (OutDevices && NumDevices <= Device->SubDevices .size ()) {
1994
- for (uint32_t I = 0 ; I < NumDevices; I++) {
1995
- OutDevices[I] = Device->SubDevices [I];
1996
- // reusing the same pi_device needs to increment the reference count
1997
- piDeviceRetain (OutDevices[I]);
1998
- }
1999
- if (OutNumDevices)
2000
- *OutNumDevices = NumDevices;
2001
- return PI_SUCCESS;
2018
+ // Devices cache is normally created in piDevicesGet but still make
2019
+ // sure that cache is populated.
2020
+ //
2021
+ pi_result Res = Device->Platform ->populateDeviceCacheIfNeeded ();
2022
+ if (Res != PI_SUCCESS) {
2023
+ return Res;
2002
2024
}
2003
2025
2004
- // Get the number of subdevices available.
2005
- // TODO: maybe add interface to create the specified # of subdevices.
2006
- uint32_t Count = 0 ;
2007
- ZE_CALL (zeDeviceGetSubDevices, (Device->ZeDevice , &Count, nullptr ));
2008
-
2009
2026
if (OutNumDevices) {
2010
- *OutNumDevices = Count ;
2027
+ *OutNumDevices = Device-> SubDevices . size () ;
2011
2028
}
2012
2029
2013
- if (!OutDevices) {
2014
- // If we are not given the buffer, we are done.
2015
- return PI_SUCCESS;
2016
- }
2030
+ if (OutDevices) {
2031
+ // TODO: Consider support for partitioning to <= total sub-devices.
2032
+ // Currently supported partitioning (by affinity domain/numa) would always
2033
+ // partition to all sub-devices.
2034
+ //
2035
+ PI_ASSERT (NumDevices == Device->SubDevices .size (), PI_INVALID_VALUE);
2017
2036
2018
- try {
2019
- pi_platform Platform = Device->Platform ;
2020
- auto ZeSubdevices = new ze_device_handle_t [Count];
2021
- ZE_CALL (zeDeviceGetSubDevices, (Device->ZeDevice , &Count, ZeSubdevices));
2022
-
2023
- // Wrap the Level Zero sub-devices into PI sub-devices, and write them out.
2024
- for (uint32_t I = 0 ; I < Count; ++I) {
2025
- pi_device Dev = Platform->getDeviceFromNativeHandle (ZeSubdevices[I]);
2026
- if (Dev) {
2027
- OutDevices[I] = Dev;
2028
- // reusing the same pi_device needs to increment the reference count
2029
- piDeviceRetain (OutDevices[I]);
2030
- } else {
2031
- std::unique_ptr<_pi_device> PiSubDevice (
2032
- new _pi_device (ZeSubdevices[I], Platform));
2033
- pi_result Result = PiSubDevice->initialize ();
2034
- if (Result != PI_SUCCESS) {
2035
- delete[] ZeSubdevices;
2036
- return Result;
2037
- }
2038
- OutDevices[I] = PiSubDevice.get ();
2039
- // save pointers to sub-devices for quick retrieval in the future.
2040
- Device->SubDevices .push_back (PiSubDevice.get ());
2041
- Platform->PiDevicesCache .push_back (std::move (PiSubDevice));
2042
- }
2037
+ for (uint32_t I = 0 ; I < NumDevices; I++) {
2038
+ OutDevices[I] = Device->SubDevices [I];
2039
+ // reusing the same pi_device needs to increment the reference count
2040
+ PI_CALL (piDeviceRetain (OutDevices[I]));
2043
2041
}
2044
- delete[] ZeSubdevices;
2045
- } catch (const std::bad_alloc &) {
2046
- return PI_OUT_OF_HOST_MEMORY;
2047
- } catch (...) {
2048
- return PI_ERROR_UNKNOWN;
2049
2042
}
2050
2043
return PI_SUCCESS;
2051
2044
}
@@ -2114,13 +2107,7 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
2114
2107
PI_ASSERT (Device, PI_INVALID_DEVICE);
2115
2108
PI_ASSERT (NativeHandle, PI_INVALID_VALUE);
2116
2109
PI_ASSERT (Platform, PI_INVALID_PLATFORM);
2117
- {
2118
- std::lock_guard<std::mutex> Lock (Platform->PiDevicesCacheMutex );
2119
- pi_result Res = populateDeviceCacheIfNeeded (Platform);
2120
- if (Res != PI_SUCCESS) {
2121
- return Res;
2122
- }
2123
- }
2110
+
2124
2111
auto ZeDevice = pi_cast<ze_device_handle_t >(NativeHandle);
2125
2112
2126
2113
// The SYCL spec requires that the set of devices must remain fixed for the
0 commit comments