@@ -560,9 +560,6 @@ static pi_result copyModule(ze_context_handle_t ZeContext,
560
560
561
561
static bool setEnvVar (const char *var, const char *value);
562
562
563
- static pi_result
564
- getPlatformCache (std::vector<pi_platform> const **PlatformCache);
565
-
566
563
static pi_result populateDeviceCacheIfNeeded (pi_platform Platform);
567
564
568
565
// Forward declarations for mock implementations of Level Zero APIs that
@@ -651,17 +648,90 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
651
648
return mapError (ZeResult);
652
649
}
653
650
654
- const std::vector<pi_platform> *PlatformCache;
655
- pi_result Res = getPlatformCache (&PlatformCache);
656
- if (Res != PI_SUCCESS) {
657
- return Res;
651
+ // Cache pi_platforms for reuse in the future
652
+ // It solves two problems;
653
+ // 1. sycl::device equality issue; we always return the same pi_device.
654
+ // 2. performance; we can save time by immediately return from cache.
655
+ //
656
+ // Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
657
+ // intentionally leaked because the application may call into the SYCL
658
+ // runtime from a global destructor, and such a call could eventually
659
+ // access these variables. Therefore, there is no safe time when
660
+ // "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
661
+ static auto PiPlatformsCache = new std::vector<pi_platform>;
662
+ static auto PiPlatformsCacheMutex = new std::mutex;
663
+ static bool PiPlatformCachePopulated = false ;
664
+
665
+ std::lock_guard<std::mutex> Lock (*PiPlatformsCacheMutex);
666
+ if (!PiPlatformCachePopulated) {
667
+ // We will retrieve the Max CommandList Cache in this lamda function so that
668
+ // it only has to be executed once
669
+ static pi_uint32 CommandListCacheSizeValue = ([] {
670
+ const char *CommandListCacheSize =
671
+ std::getenv (" SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE" );
672
+ pi_uint32 CommandListCacheSizeValue;
673
+ try {
674
+ CommandListCacheSizeValue =
675
+ CommandListCacheSize ? std::stoi (CommandListCacheSize) : 20000 ;
676
+ } catch (std::exception const &) {
677
+ zePrint (
678
+ " SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
679
+ " default set.\n " );
680
+ CommandListCacheSizeValue = 20000 ;
681
+ }
682
+ return CommandListCacheSizeValue;
683
+ })();
684
+
685
+ try {
686
+
687
+ // Level Zero does not have concept of Platforms, but Level Zero driver is
688
+ // the closest match.
689
+ uint32_t ZeDriverCount = 0 ;
690
+ ZE_CALL (zeDriverGet (&ZeDriverCount, nullptr ));
691
+ if (ZeDriverCount == 0 ) {
692
+ PiPlatformCachePopulated = true ;
693
+ } else {
694
+ ze_driver_handle_t ZeDriver;
695
+ assert (ZeDriverCount == 1 );
696
+ ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
697
+ pi_platform Platform = new _pi_platform (ZeDriver);
698
+
699
+ // Cache driver properties
700
+ ze_driver_properties_t ZeDriverProperties;
701
+ ZE_CALL (zeDriverGetProperties (ZeDriver, &ZeDriverProperties));
702
+ uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion ;
703
+ // Intel Level-Zero GPU driver stores version as:
704
+ // | 31 - 24 | 23 - 16 | 15 - 0 |
705
+ // | Major | Minor | Build |
706
+ auto VersionMajor =
707
+ std::to_string ((ZeDriverVersion & 0xFF000000 ) >> 24 );
708
+ auto VersionMinor =
709
+ std::to_string ((ZeDriverVersion & 0x00FF0000 ) >> 16 );
710
+ auto VersionBuild = std::to_string (ZeDriverVersion & 0x0000FFFF );
711
+ Platform->ZeDriverVersion =
712
+ VersionMajor + " ." + VersionMinor + " ." + VersionBuild;
713
+
714
+ ze_api_version_t ZeApiVersion;
715
+ ZE_CALL (zeDriverGetApiVersion (ZeDriver, &ZeApiVersion));
716
+ Platform->ZeDriverApiVersion =
717
+ std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + " ." +
718
+ std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
719
+
720
+ Platform->ZeMaxCommandListCache = CommandListCacheSizeValue;
721
+ // Save a copy in the cache for future uses.
722
+ PiPlatformsCache->push_back (Platform);
723
+ PiPlatformCachePopulated = true ;
724
+ }
725
+ } catch (const std::bad_alloc &) {
726
+ return PI_OUT_OF_HOST_MEMORY;
727
+ } catch (...) {
728
+ return PI_ERROR_UNKNOWN;
729
+ }
658
730
}
659
731
660
- // Level Zero does not have concept of Platforms, but Level Zero driver is the
661
- // closest match.
662
732
if (Platforms && NumEntries > 0 ) {
663
733
uint32_t I = 0 ;
664
- for (const pi_platform &CachedPlatform : *PlatformCache ) {
734
+ for (const pi_platform &CachedPlatform : *PiPlatformsCache ) {
665
735
if (I < NumEntries) {
666
736
*Platforms++ = CachedPlatform;
667
737
I++;
@@ -672,97 +742,11 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
672
742
}
673
743
674
744
if (NumPlatforms)
675
- *NumPlatforms = PlatformCache ->size ();
745
+ *NumPlatforms = PiPlatformsCache ->size ();
676
746
677
747
return PI_SUCCESS;
678
748
}
679
749
680
- // Get the cached platforms, return them using the "PlatformCache" out parameter
681
- static pi_result
682
- getPlatformCache (std::vector<pi_platform> const **PlatformCache) {
683
- // We will retrieve the Max CommandList Cache in this lamda function so that
684
- // it only has to be executed once
685
- static pi_uint32 CommandListCacheSizeValue = ([] {
686
- const char *CommandListCacheSize =
687
- std::getenv (" SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE" );
688
- pi_uint32 CommandListCacheSizeValue;
689
- try {
690
- CommandListCacheSizeValue =
691
- CommandListCacheSize ? std::stoi (CommandListCacheSize) : 20000 ;
692
- } catch (std::exception const &) {
693
- zePrint (" SYCL_PI_LEVEL0_MAX_COMMAND_LIST_CACHE: invalid value provided, "
694
- " default set.\n " );
695
- CommandListCacheSizeValue = 20000 ;
696
- }
697
- return CommandListCacheSizeValue;
698
- })();
699
-
700
- try {
701
- // Cache pi_platforms for reuse in the future
702
- // It solves two problems;
703
- // 1. sycl::device equality issue; we always return the same pi_device.
704
- // 2. performance; we can save time by immediately return from cache.
705
- //
706
- // Note: The memory for "PiPlatformsCache" and "PiPlatformsCacheMutex" is
707
- // intentionally leaked because the application may call into the SYCL
708
- // runtime from a global destructor, and such a call could eventually
709
- // access these variables. Therefore, there is no safe time when
710
- // "PiPlatformsCache" and "PiPlatformsCacheMutex" could be deleted.
711
- static auto PiPlatformsCache = new std::vector<pi_platform>;
712
- static auto PiPlatformsCacheMutex = new std::mutex;
713
-
714
- static bool PiPlatformCachePopulated = false ;
715
- std::lock_guard<std::mutex> Lock (*PiPlatformsCacheMutex);
716
- if (PiPlatformCachePopulated) {
717
- *PlatformCache = PiPlatformsCache;
718
- return PI_SUCCESS;
719
- }
720
-
721
- uint32_t ZeDriverCount = 0 ;
722
- ZE_CALL (zeDriverGet (&ZeDriverCount, nullptr ));
723
- if (ZeDriverCount == 0 ) {
724
- *PlatformCache = PiPlatformsCache;
725
- PiPlatformCachePopulated = true ;
726
- return PI_SUCCESS;
727
- }
728
- ze_driver_handle_t ZeDriver;
729
- assert (ZeDriverCount == 1 );
730
- ZE_CALL (zeDriverGet (&ZeDriverCount, &ZeDriver));
731
- pi_platform Platform = new _pi_platform (ZeDriver);
732
-
733
- // Cache driver properties
734
- ze_driver_properties_t ZeDriverProperties;
735
- ZE_CALL (zeDriverGetProperties (ZeDriver, &ZeDriverProperties));
736
- uint32_t ZeDriverVersion = ZeDriverProperties.driverVersion ;
737
- // Intel Level-Zero GPU driver stores version as:
738
- // | 31 - 24 | 23 - 16 | 15 - 0 |
739
- // | Major | Minor | Build |
740
- auto VersionMajor = std::to_string ((ZeDriverVersion & 0xFF000000 ) >> 24 );
741
- auto VersionMinor = std::to_string ((ZeDriverVersion & 0x00FF0000 ) >> 16 );
742
- auto VersionBuild = std::to_string (ZeDriverVersion & 0x0000FFFF );
743
- Platform->ZeDriverVersion =
744
- VersionMajor + " ." + VersionMinor + " ." + VersionBuild;
745
-
746
- ze_api_version_t ZeApiVersion;
747
- ZE_CALL (zeDriverGetApiVersion (ZeDriver, &ZeApiVersion));
748
- Platform->ZeDriverApiVersion =
749
- std::to_string (ZE_MAJOR_VERSION (ZeApiVersion)) + " ." +
750
- std::to_string (ZE_MINOR_VERSION (ZeApiVersion));
751
-
752
- Platform->ZeMaxCommandListCache = CommandListCacheSizeValue;
753
- // Save a copy in the cache for future uses.
754
- PiPlatformsCache->push_back (Platform);
755
- // Copy the cache to the out parameter.
756
- *PlatformCache = PiPlatformsCache;
757
- PiPlatformCachePopulated = true ;
758
- } catch (const std::bad_alloc &) {
759
- return PI_OUT_OF_HOST_MEMORY;
760
- } catch (...) {
761
- return PI_ERROR_UNKNOWN;
762
- }
763
- return PI_SUCCESS;
764
- }
765
-
766
750
pi_result piPlatformGetInfo (pi_platform Platform, pi_platform_info ParamName,
767
751
size_t ParamValueSize, void *ParamValue,
768
752
size_t *ParamValueSizeRet) {
@@ -829,24 +813,33 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle,
829
813
assert (Platform);
830
814
831
815
auto ZeDriver = pi_cast<ze_driver_handle_t >(NativeHandle);
832
- const std::vector<pi_platform> *PlatformCache;
833
816
834
- // The SYCL spec requires that the set of platforms must remain fixed for the
835
- // duration of the application's execution. We assume that we found all of the
836
- // Level Zero drivers when we initialized the platform cache, so the
837
- // "NativeHandle" must already be in the cache. If it is not, this must not be
838
- // a valid Level Zero driver.
839
- pi_result Res = getPlatformCache (&PlatformCache);
817
+ pi_uint32 NumPlatforms = 0 ;
818
+ pi_result Res = piPlatformsGet (0 , nullptr , &NumPlatforms);
840
819
if (Res != PI_SUCCESS) {
841
820
return Res;
842
821
}
843
822
844
- for (const pi_platform &CachedPlatform : *PlatformCache) {
845
- if (CachedPlatform->ZeDriver == ZeDriver) {
846
- *Platform = CachedPlatform;
847
- return PI_SUCCESS;
823
+ if (NumPlatforms) {
824
+ std::vector<pi_platform> PlatformCache (NumPlatforms);
825
+ Res = piPlatformsGet (NumPlatforms, PlatformCache.data (), nullptr );
826
+ if (Res != PI_SUCCESS) {
827
+ return Res;
828
+ }
829
+
830
+ // The SYCL spec requires that the set of platforms must remain fixed for
831
+ // the duration of the application's execution. We assume that we found all
832
+ // of the Level Zero drivers when we initialized the platform cache, so the
833
+ // "NativeHandle" must already be in the cache. If it is not, this must not
834
+ // be a valid Level Zero driver.
835
+ for (const pi_platform &CachedPlatform : PlatformCache) {
836
+ if (CachedPlatform->ZeDriver == ZeDriver) {
837
+ *Platform = CachedPlatform;
838
+ return PI_SUCCESS;
839
+ }
848
840
}
849
841
}
842
+
850
843
return PI_INVALID_VALUE;
851
844
}
852
845
0 commit comments