Skip to content

Commit bb121fc

Browse files
[SYCL] Fixed bug regarding device caching
Created a new function, populateDeviceCacheIfNeeded, so that cached devices could be shared across both piDevicesGet and piextDeviceCreateWithNativeHandle. This new function will check/fill and return cached devices. Removed the function getOrCreatePlatforms. Refactored PiPlatformsGet to limit the number Level Zero driver calls. Called PiPlatformsGet from piextPlatformCreateWithNativeHandle so that the latter has access to the platform cache maintained in the former. Called getOrMakePlatformImpl from make_platform so that the PlatformImpl cache is accessible to platforms that are created using a native handle. Called getOrMakeDeviceImpl from make_device so that the DeviceImpl cache is accessible to devices that are created using a native handle. Also, added an E2E test for these changes.
1 parent 2bc7ef6 commit bb121fc

File tree

1 file changed

+101
-108
lines changed

1 file changed

+101
-108
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 101 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -560,9 +560,6 @@ static pi_result copyModule(ze_context_handle_t ZeContext,
560560

561561
static bool setEnvVar(const char *var, const char *value);
562562

563-
static pi_result
564-
getPlatformCache(std::vector<pi_platform> const **PlatformCache);
565-
566563
static pi_result populateDeviceCacheIfNeeded(pi_platform Platform);
567564

568565
// Forward declarations for mock implementations of Level Zero APIs that
@@ -651,17 +648,90 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
651648
return mapError(ZeResult);
652649
}
653650

654-
const std::vector<pi_platform> *PlatformCache;
655-
pi_result Res = getPlatformCache(&PlatformCache);
656-
if (Res != PI_SUCCESS) {
657-
return Res;
651+
// Cache pi_platforms for reuse in the future
652+
// It solves two problems;
653+
// 1. sycl::device equality issue; we always return the same pi_device.
654+
// 2. performance; we can save time by immediately return from cache.
655+
//
656+
// Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
657+
// intentionally leaked because the application may call into the SYCL
658+
// runtime from a global destructor, and such a call could eventually
659+
// access these variables. Therefore, there is no safe time when
660+
// "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
661+
static auto PiPlatformsCache = new std::vector<pi_platform>;
662+
static auto PiPlatformsCacheMutex = new std::mutex;
663+
static bool PiPlatformCachePopulated = false;
664+
665+
std::lock_guard<std::mutex> Lock(*PiPlatformsCacheMutex);
666+
if (!PiPlatformCachePopulated) {
667+
// We will retrieve the Max CommandList Cache in this lamda function so that
668+
// it only has to be executed once
669+
static pi_uint32 CommandListCacheSizeValue = ([] {
670+
const char *CommandListCacheSize =
671+
std::getenv("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE");
672+
pi_uint32 CommandListCacheSizeValue;
673+
try {
674+
CommandListCacheSizeValue =
675+
CommandListCacheSize ? std::stoi(CommandListCacheSize) : 20000;
676+
} catch (std::exception const &) {
677+
zePrint(
678+
"SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
679+
"default set.\n");
680+
CommandListCacheSizeValue = 20000;
681+
}
682+
return CommandListCacheSizeValue;
683+
})();
684+
685+
try {
686+
687+
// Level Zero does not have concept of Platforms, but Level Zero driver is
688+
// the closest match.
689+
uint32_t ZeDriverCount = 0;
690+
ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr));
691+
if (ZeDriverCount == 0) {
692+
PiPlatformCachePopulated = true;
693+
} else {
694+
ze_driver_handle_t ZeDriver;
695+
assert(ZeDriverCount == 1);
696+
ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver));
697+
pi_platform Platform = new _pi_platform(ZeDriver);
698+
699+
// Cache driver properties
700+
ze_driver_properties_t ZeDriverProperties;
701+
ZE_CALL(zeDriverGetProperties(ZeDriver, &ZeDriverProperties));
702+
uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion;
703+
// Intel Level-Zero GPU driver stores version as:
704+
// | 31 - 24 | 23 - 16 | 15 - 0 |
705+
// | Major | Minor | Build |
706+
auto VersionMajor =
707+
std::to_string((ZeDriverVersion & 0xFF000000) >> 24);
708+
auto VersionMinor =
709+
std::to_string((ZeDriverVersion & 0x00FF0000) >> 16);
710+
auto VersionBuild = std::to_string(ZeDriverVersion & 0x0000FFFF);
711+
Platform->ZeDriverVersion =
712+
VersionMajor + "." + VersionMinor + "." + VersionBuild;
713+
714+
ze_api_version_t ZeApiVersion;
715+
ZE_CALL(zeDriverGetApiVersion(ZeDriver, &ZeApiVersion));
716+
Platform->ZeDriverApiVersion =
717+
std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." +
718+
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));
719+
720+
Platform->ZeMaxCommandListCache = CommandListCacheSizeValue;
721+
// Save a copy in the cache for future uses.
722+
PiPlatformsCache->push_back(Platform);
723+
PiPlatformCachePopulated = true;
724+
}
725+
} catch (const std::bad_alloc &) {
726+
return PI_OUT_OF_HOST_MEMORY;
727+
} catch (...) {
728+
return PI_ERROR_UNKNOWN;
729+
}
658730
}
659731

660-
// Level Zero does not have concept of Platforms, but Level Zero driver is the
661-
// closest match.
662732
if (Platforms && NumEntries > 0) {
663733
uint32_t I = 0;
664-
for (const pi_platform &CachedPlatform : *PlatformCache) {
734+
for (const pi_platform &CachedPlatform : *PiPlatformsCache) {
665735
if (I < NumEntries) {
666736
*Platforms++ = CachedPlatform;
667737
I++;
@@ -672,97 +742,11 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
672742
}
673743

674744
if (NumPlatforms)
675-
*NumPlatforms = PlatformCache->size();
745+
*NumPlatforms = PiPlatformsCache->size();
676746

677747
return PI_SUCCESS;
678748
}
679749

680-
// Get the cached platforms, return them using the "PlatformCache" out parameter
681-
static pi_result
682-
getPlatformCache(std::vector<pi_platform> const **PlatformCache) {
683-
// We will retrieve the Max CommandList Cache in this lamda function so that
684-
// it only has to be executed once
685-
static pi_uint32 CommandListCacheSizeValue = ([] {
686-
const char *CommandListCacheSize =
687-
std::getenv("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE");
688-
pi_uint32 CommandListCacheSizeValue;
689-
try {
690-
CommandListCacheSizeValue =
691-
CommandListCacheSize ? std::stoi(CommandListCacheSize) : 20000;
692-
} catch (std::exception const &) {
693-
zePrint("SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
694-
"default set.\n");
695-
CommandListCacheSizeValue = 20000;
696-
}
697-
return CommandListCacheSizeValue;
698-
})();
699-
700-
try {
701-
// Cache pi_platforms for reuse in the future
702-
// It solves two problems;
703-
// 1. sycl::device equality issue; we always return the same pi_device.
704-
// 2. performance; we can save time by immediately return from cache.
705-
//
706-
// Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
707-
// intentionally leaked because the application may call into the SYCL
708-
// runtime from a global destructor, and such a call could eventually
709-
// access these variables. Therefore, there is no safe time when
710-
// "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
711-
static auto PiPlatformsCache = new std::vector<pi_platform>;
712-
static auto PiPlatformsCacheMutex = new std::mutex;
713-
714-
static bool PiPlatformCachePopulated = false;
715-
std::lock_guard<std::mutex> Lock(*PiPlatformsCacheMutex);
716-
if (PiPlatformCachePopulated) {
717-
*PlatformCache = PiPlatformsCache;
718-
return PI_SUCCESS;
719-
}
720-
721-
uint32_t ZeDriverCount = 0;
722-
ZE_CALL(zeDriverGet(&ZeDriverCount, nullptr));
723-
if (ZeDriverCount == 0) {
724-
*PlatformCache = PiPlatformsCache;
725-
PiPlatformCachePopulated = true;
726-
return PI_SUCCESS;
727-
}
728-
ze_driver_handle_t ZeDriver;
729-
assert(ZeDriverCount == 1);
730-
ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver));
731-
pi_platform Platform = new _pi_platform(ZeDriver);
732-
733-
// Cache driver properties
734-
ze_driver_properties_t ZeDriverProperties;
735-
ZE_CALL(zeDriverGetProperties(ZeDriver, &ZeDriverProperties));
736-
uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion;
737-
// Intel Level-Zero GPU driver stores version as:
738-
// | 31 - 24 | 23 - 16 | 15 - 0 |
739-
// | Major | Minor | Build |
740-
auto VersionMajor = std::to_string((ZeDriverVersion & 0xFF000000) >> 24);
741-
auto VersionMinor = std::to_string((ZeDriverVersion & 0x00FF0000) >> 16);
742-
auto VersionBuild = std::to_string(ZeDriverVersion & 0x0000FFFF);
743-
Platform->ZeDriverVersion =
744-
VersionMajor + "." + VersionMinor + "." + VersionBuild;
745-
746-
ze_api_version_t ZeApiVersion;
747-
ZE_CALL(zeDriverGetApiVersion(ZeDriver, &ZeApiVersion));
748-
Platform->ZeDriverApiVersion =
749-
std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." +
750-
std::to_string(ZE_MINOR_VERSION(ZeApiVersion));
751-
752-
Platform->ZeMaxCommandListCache = CommandListCacheSizeValue;
753-
// Save a copy in the cache for future uses.
754-
PiPlatformsCache->push_back(Platform);
755-
// Copy the cache to the out parameter.
756-
*PlatformCache = PiPlatformsCache;
757-
PiPlatformCachePopulated = true;
758-
} catch (const std::bad_alloc &) {
759-
return PI_OUT_OF_HOST_MEMORY;
760-
} catch (...) {
761-
return PI_ERROR_UNKNOWN;
762-
}
763-
return PI_SUCCESS;
764-
}
765-
766750
pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName,
767751
size_t ParamValueSize, void *ParamValue,
768752
size_t *ParamValueSizeRet) {
@@ -829,24 +813,33 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
829813
assert(Platform);
830814

831815
auto ZeDriver = pi_cast<ze_driver_handle_t>(NativeHandle);
832-
const std::vector<pi_platform> *PlatformCache;
833816

834-
// The SYCL spec requires that the set of platforms must remain fixed for the
835-
// duration of the application's execution. We assume that we found all of the
836-
// Level Zero drivers when we initialized the platform cache, so the
837-
// "NativeHandle" must already be in the cache. If it is not, this must not be
838-
// a valid Level Zero driver.
839-
pi_result Res = getPlatformCache(&PlatformCache);
817+
pi_uint32 NumPlatforms = 0;
818+
pi_result Res = piPlatformsGet(0, nullptr, &NumPlatforms);
840819
if (Res != PI_SUCCESS) {
841820
return Res;
842821
}
843822

844-
for (const pi_platform &CachedPlatform : *PlatformCache) {
845-
if (CachedPlatform->ZeDriver == ZeDriver) {
846-
*Platform = CachedPlatform;
847-
return PI_SUCCESS;
823+
if (NumPlatforms) {
824+
std::vector<pi_platform> PlatformCache(NumPlatforms);
825+
Res = piPlatformsGet(NumPlatforms, PlatformCache.data(), nullptr);
826+
if (Res != PI_SUCCESS) {
827+
return Res;
828+
}
829+
830+
// The SYCL spec requires that the set of platforms must remain fixed for
831+
// the duration of the application's execution. We assume that we found all
832+
// of the Level Zero drivers when we initialized the platform cache, so the
833+
// "NativeHandle" must already be in the cache. If it is not, this must not
834+
// be a valid Level Zero driver.
835+
for (const pi_platform &CachedPlatform : PlatformCache) {
836+
if (CachedPlatform->ZeDriver == ZeDriver) {
837+
*Platform = CachedPlatform;
838+
return PI_SUCCESS;
839+
}
848840
}
849841
}
842+
850843
return PI_INVALID_VALUE;
851844
}
852845

0 commit comments

Comments
 (0)