@@ -568,65 +568,123 @@ struct get_device_info_impl<range<Dimensions>,
568
568
}
569
569
};
570
570
571
+ // This macro is only for AMD and NVIDIA GPU architectures
572
+ #define NVIDIA_AMD_ARCHES (X ) \
573
+ X (" 5.0" , oneapi_exp_arch::nvidia_gpu_sm_50) \
574
+ X (" 5.2" , oneapi_exp_arch::nvidia_gpu_sm_52) \
575
+ X (" 5.3" , oneapi_exp_arch::nvidia_gpu_sm_53) \
576
+ X (" 6.0" , oneapi_exp_arch::nvidia_gpu_sm_60) \
577
+ X (" 6.1" , oneapi_exp_arch::nvidia_gpu_sm_61) \
578
+ X (" 6.2" , oneapi_exp_arch::nvidia_gpu_sm_62) \
579
+ X (" 7.0" , oneapi_exp_arch::nvidia_gpu_sm_70) \
580
+ X (" 7.2" , oneapi_exp_arch::nvidia_gpu_sm_72) \
581
+ X (" 7.5" , oneapi_exp_arch::nvidia_gpu_sm_75) \
582
+ X (" 8.0" , oneapi_exp_arch::nvidia_gpu_sm_80) \
583
+ X (" 8.6" , oneapi_exp_arch::nvidia_gpu_sm_86) \
584
+ X (" 8.7" , oneapi_exp_arch::nvidia_gpu_sm_87) \
585
+ X (" 8.9" , oneapi_exp_arch::nvidia_gpu_sm_89) \
586
+ X (" 9.0" , oneapi_exp_arch::nvidia_gpu_sm_90) \
587
+ X (" gfx701" , oneapi_exp_arch::amd_gpu_gfx701) \
588
+ X (" gfx702" , oneapi_exp_arch::amd_gpu_gfx702) \
589
+ X (" gfx801" , oneapi_exp_arch::amd_gpu_gfx801) \
590
+ X (" gfx802" , oneapi_exp_arch::amd_gpu_gfx802) \
591
+ X (" gfx803" , oneapi_exp_arch::amd_gpu_gfx803) \
592
+ X (" gfx805" , oneapi_exp_arch::amd_gpu_gfx805) \
593
+ X (" gfx810" , oneapi_exp_arch::amd_gpu_gfx810) \
594
+ X (" gfx900" , oneapi_exp_arch::amd_gpu_gfx900) \
595
+ X (" gfx902" , oneapi_exp_arch::amd_gpu_gfx902) \
596
+ X (" gfx904" , oneapi_exp_arch::amd_gpu_gfx904) \
597
+ X (" gfx906" , oneapi_exp_arch::amd_gpu_gfx906) \
598
+ X (" gfx908" , oneapi_exp_arch::amd_gpu_gfx908) \
599
+ X (" gfx90a" , oneapi_exp_arch::amd_gpu_gfx90a) \
600
+ X (" gfx1010" , oneapi_exp_arch::amd_gpu_gfx1010) \
601
+ X (" gfx1011" , oneapi_exp_arch::amd_gpu_gfx1011) \
602
+ X (" gfx1012" , oneapi_exp_arch::amd_gpu_gfx1012) \
603
+ X (" gfx1013" , oneapi_exp_arch::amd_gpu_gfx1013) \
604
+ X (" gfx1030" , oneapi_exp_arch::amd_gpu_gfx1030) \
605
+ X (" gfx1031" , oneapi_exp_arch::amd_gpu_gfx1031) \
606
+ X (" gfx1032" , oneapi_exp_arch::amd_gpu_gfx1032) \
607
+ X (" gfx1034" , oneapi_exp_arch::amd_gpu_gfx1034)
608
+
609
+ // This macro is only for Intel GPU architectures
610
+ #define INTEL_ARCHES (X ) \
611
+ X (0x02000000 , oneapi_exp_arch::intel_gpu_bdw) \
612
+ X (0x02400009 , oneapi_exp_arch::intel_gpu_skl) \
613
+ X (0x02404009 , oneapi_exp_arch::intel_gpu_kbl) \
614
+ X (0x02408009 , oneapi_exp_arch::intel_gpu_cfl) \
615
+ X (0x0240c000 , oneapi_exp_arch::intel_gpu_apl) \
616
+ X (0x02410000 , oneapi_exp_arch::intel_gpu_glk) \
617
+ X (0x02414000 , oneapi_exp_arch::intel_gpu_whl) \
618
+ X (0x02418000 , oneapi_exp_arch::intel_gpu_aml) \
619
+ X (0x0241c000 , oneapi_exp_arch::intel_gpu_cml) \
620
+ X (0x02c00000 , oneapi_exp_arch::intel_gpu_icllp) \
621
+ X (0x02c08000 , oneapi_exp_arch::intel_gpu_ehl) \
622
+ X (0x03000000 , oneapi_exp_arch::intel_gpu_tgllp) \
623
+ X (0x03004000 , oneapi_exp_arch::intel_gpu_rkl) \
624
+ X (0x03008000 , oneapi_exp_arch::intel_gpu_adl_s) \
625
+ X (0x0300c000 , oneapi_exp_arch::intel_gpu_adl_p) \
626
+ X (0x03010000 , oneapi_exp_arch::intel_gpu_adl_n) \
627
+ X (0x03028000 , oneapi_exp_arch::intel_gpu_dg1) \
628
+ X (0x030dc008 , oneapi_exp_arch::intel_gpu_acm_g10) \
629
+ X (0x030e0005 , oneapi_exp_arch::intel_gpu_acm_g11) \
630
+ X (0x030e4000 , oneapi_exp_arch::intel_gpu_acm_g12) \
631
+ X (0x030f0007 , oneapi_exp_arch::intel_gpu_pvc)
632
+
633
+ #define CMP_NVIDIA_AMD (s, i ) \
634
+ if (strcmp(s, arch) == 0 ) \
635
+ return i;
636
+
637
+ #define CMP_INTEL (p, i ) \
638
+ if (p == arch) \
639
+ return i;
640
+
571
641
template <>
572
642
struct get_device_info_impl <
573
643
ext::oneapi::experimental::architecture,
574
644
ext::oneapi::experimental::info::device::architecture> {
575
645
static ext::oneapi::experimental::architecture get (const DeviceImplPtr &Dev) {
576
646
using oneapi_exp_arch = sycl::ext::oneapi::experimental::architecture;
577
- auto ReturnHelper = [](auto MapDeviceIpToArch, auto DeviceIp) {
578
- // TODO: use std::map::contains instead of try-catch when SYCL RT be moved
579
- // to C++20
580
- try {
581
- oneapi_exp_arch Result = MapDeviceIpToArch.at (DeviceIp);
582
- return Result;
583
- } catch (std::out_of_range &) {
647
+ backend CurrentBackend = Dev->getBackend ();
648
+ if (Dev->is_gpu () && (backend::ext_oneapi_level_zero == CurrentBackend ||
649
+ backend::opencl == CurrentBackend)) {
650
+ auto MapArchIDToArchName = [](const int arch) {
651
+ INTEL_ARCHES (CMP_INTEL);
584
652
throw sycl::exception (
585
653
make_error_code (errc::runtime),
586
654
" The current device architecture is not supported by "
587
655
" sycl_ext_oneapi_device_architecture." );
588
- }
589
- };
590
- backend CurrentBackend = Dev->getBackend ();
591
- if (Dev->is_gpu () && (backend::ext_oneapi_level_zero == CurrentBackend ||
592
- backend::opencl == CurrentBackend)) {
593
- std::map<uint32_t , oneapi_exp_arch> MapDeviceIpToArch = {
594
- {0x02000000 , oneapi_exp_arch::intel_gpu_bdw},
595
- {0x02400009 , oneapi_exp_arch::intel_gpu_skl},
596
- {0x02404009 , oneapi_exp_arch::intel_gpu_kbl},
597
- {0x02408009 , oneapi_exp_arch::intel_gpu_cfl},
598
- {0x0240c000 , oneapi_exp_arch::intel_gpu_apl},
599
- {0x02410000 , oneapi_exp_arch::intel_gpu_glk},
600
- {0x02414000 , oneapi_exp_arch::intel_gpu_whl},
601
- {0x02418000 , oneapi_exp_arch::intel_gpu_aml},
602
- {0x0241c000 , oneapi_exp_arch::intel_gpu_cml},
603
- {0x02c00000 , oneapi_exp_arch::intel_gpu_icllp},
604
- {0x02c08000 , oneapi_exp_arch::intel_gpu_ehl},
605
- {0x03000000 , oneapi_exp_arch::intel_gpu_tgllp},
606
- {0x03004000 , oneapi_exp_arch::intel_gpu_rkl},
607
- {0x03008000 , oneapi_exp_arch::intel_gpu_adl_s},
608
- {0x0300c000 , oneapi_exp_arch::intel_gpu_adl_p},
609
- {0x03010000 , oneapi_exp_arch::intel_gpu_adl_n},
610
- {0x03028000 , oneapi_exp_arch::intel_gpu_dg1},
611
- {0x030dc008 , oneapi_exp_arch::intel_gpu_acm_g10},
612
- {0x030e0005 , oneapi_exp_arch::intel_gpu_acm_g11},
613
- {0x030e4000 , oneapi_exp_arch::intel_gpu_acm_g12},
614
- {0x030f0007 , oneapi_exp_arch::intel_gpu_pvc},
615
656
};
616
657
uint32_t DeviceIp;
617
658
Dev->getPlugin ()->call <PiApiKind::piDeviceGetInfo>(
618
659
Dev->getHandleRef (),
619
660
PiInfoCode<
620
661
ext::oneapi::experimental::info::device::architecture>::value,
621
662
sizeof (DeviceIp), &DeviceIp, nullptr );
622
- return ReturnHelper (MapDeviceIpToArch, DeviceIp);
663
+ return MapArchIDToArchName (DeviceIp);
664
+ } else if (Dev->is_gpu () && (backend::ext_oneapi_cuda == CurrentBackend ||
665
+ backend::ext_oneapi_hip == CurrentBackend)) {
666
+ auto MapArchIDToArchName = [](const char *arch) {
667
+ NVIDIA_AMD_ARCHES (CMP_NVIDIA_AMD);
668
+ throw sycl::exception (
669
+ make_error_code (errc::runtime),
670
+ " The current device architecture is not supported by "
671
+ " sycl_ext_oneapi_device_architecture." );
672
+ };
673
+ size_t ResultSize = 0 ;
674
+ Dev->getPlugin ()->call <PiApiKind::piDeviceGetInfo>(
675
+ Dev->getHandleRef (), PiInfoCode<info::device::version>::value, 0 ,
676
+ nullptr , &ResultSize);
677
+ std::unique_ptr<char []> DeviceArch (new char [ResultSize]);
678
+ Dev->getPlugin ()->call <PiApiKind::piDeviceGetInfo>(
679
+ Dev->getHandleRef (), PiInfoCode<info::device::version>::value,
680
+ ResultSize, DeviceArch.get (), nullptr );
681
+ return MapArchIDToArchName (DeviceArch.get ());
623
682
} else if (Dev->is_cpu () && backend::opencl == CurrentBackend) {
624
683
// TODO: add support of different CPU architectures to
625
684
// sycl_ext_oneapi_device_architecture
626
685
return sycl::ext::oneapi::experimental::architecture::x86_64;
627
686
} // else is not needed
628
- // TODO: add support of other arhitectures by extending with else if
629
-
687
+ // TODO: add support of other architectures by extending with else if
630
688
// Generating a user-friendly error message
631
689
std::string DeviceStr;
632
690
if (Dev->is_gpu ())
0 commit comments