Skip to content

Commit 1ad69e5

Browse files
author
MartinWehking
authored
[SYCL][CUDA][HIP] Add support for querying device architecture (#10573)
Implements `sycl_ext_oneapi_device_architecture` for AMD and NVIDIA. The device architecture can be queried through `dev.get_info<sycl::ext::oneapi::experimental::info::device::architecture>()` ( `dev` is a `sycl::device`)
1 parent c043037 commit 1ad69e5

File tree

3 files changed

+96
-44
lines changed

3 files changed

+96
-44
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -589,12 +589,6 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
589589
description of the `-fsycl-targets` option. These are the target names of the
590590
form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
591591

592-
The two APIs `device::ext_oneapi_architecture_is` and the
593-
`ext::oneapi::experimental::info::device::architecture` device descriptor are
594-
currently supported only for Intel devices (both GPU and CPU). There is no
595-
support yet for Nvidia or AMD devices.
596-
597-
598592
== Future direction
599593

600594
This experimental extension is still evolving. We expect that future versions

sycl/source/detail/device_info.hpp

Lines changed: 95 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -568,65 +568,123 @@ struct get_device_info_impl<range<Dimensions>,
568568
}
569569
};
570570

571+
// This macro is only for AMD and NVIDIA GPU architectures
572+
#define NVIDIA_AMD_ARCHES(X) \
573+
X("5.0", oneapi_exp_arch::nvidia_gpu_sm_50) \
574+
X("5.2", oneapi_exp_arch::nvidia_gpu_sm_52) \
575+
X("5.3", oneapi_exp_arch::nvidia_gpu_sm_53) \
576+
X("6.0", oneapi_exp_arch::nvidia_gpu_sm_60) \
577+
X("6.1", oneapi_exp_arch::nvidia_gpu_sm_61) \
578+
X("6.2", oneapi_exp_arch::nvidia_gpu_sm_62) \
579+
X("7.0", oneapi_exp_arch::nvidia_gpu_sm_70) \
580+
X("7.2", oneapi_exp_arch::nvidia_gpu_sm_72) \
581+
X("7.5", oneapi_exp_arch::nvidia_gpu_sm_75) \
582+
X("8.0", oneapi_exp_arch::nvidia_gpu_sm_80) \
583+
X("8.6", oneapi_exp_arch::nvidia_gpu_sm_86) \
584+
X("8.7", oneapi_exp_arch::nvidia_gpu_sm_87) \
585+
X("8.9", oneapi_exp_arch::nvidia_gpu_sm_89) \
586+
X("9.0", oneapi_exp_arch::nvidia_gpu_sm_90) \
587+
X("gfx701", oneapi_exp_arch::amd_gpu_gfx701) \
588+
X("gfx702", oneapi_exp_arch::amd_gpu_gfx702) \
589+
X("gfx801", oneapi_exp_arch::amd_gpu_gfx801) \
590+
X("gfx802", oneapi_exp_arch::amd_gpu_gfx802) \
591+
X("gfx803", oneapi_exp_arch::amd_gpu_gfx803) \
592+
X("gfx805", oneapi_exp_arch::amd_gpu_gfx805) \
593+
X("gfx810", oneapi_exp_arch::amd_gpu_gfx810) \
594+
X("gfx900", oneapi_exp_arch::amd_gpu_gfx900) \
595+
X("gfx902", oneapi_exp_arch::amd_gpu_gfx902) \
596+
X("gfx904", oneapi_exp_arch::amd_gpu_gfx904) \
597+
X("gfx906", oneapi_exp_arch::amd_gpu_gfx906) \
598+
X("gfx908", oneapi_exp_arch::amd_gpu_gfx908) \
599+
X("gfx90a", oneapi_exp_arch::amd_gpu_gfx90a) \
600+
X("gfx1010", oneapi_exp_arch::amd_gpu_gfx1010) \
601+
X("gfx1011", oneapi_exp_arch::amd_gpu_gfx1011) \
602+
X("gfx1012", oneapi_exp_arch::amd_gpu_gfx1012) \
603+
X("gfx1013", oneapi_exp_arch::amd_gpu_gfx1013) \
604+
X("gfx1030", oneapi_exp_arch::amd_gpu_gfx1030) \
605+
X("gfx1031", oneapi_exp_arch::amd_gpu_gfx1031) \
606+
X("gfx1032", oneapi_exp_arch::amd_gpu_gfx1032) \
607+
X("gfx1034", oneapi_exp_arch::amd_gpu_gfx1034)
608+
609+
// This macro is only for Intel GPU architectures
610+
#define INTEL_ARCHES(X) \
611+
X(0x02000000, oneapi_exp_arch::intel_gpu_bdw) \
612+
X(0x02400009, oneapi_exp_arch::intel_gpu_skl) \
613+
X(0x02404009, oneapi_exp_arch::intel_gpu_kbl) \
614+
X(0x02408009, oneapi_exp_arch::intel_gpu_cfl) \
615+
X(0x0240c000, oneapi_exp_arch::intel_gpu_apl) \
616+
X(0x02410000, oneapi_exp_arch::intel_gpu_glk) \
617+
X(0x02414000, oneapi_exp_arch::intel_gpu_whl) \
618+
X(0x02418000, oneapi_exp_arch::intel_gpu_aml) \
619+
X(0x0241c000, oneapi_exp_arch::intel_gpu_cml) \
620+
X(0x02c00000, oneapi_exp_arch::intel_gpu_icllp) \
621+
X(0x02c08000, oneapi_exp_arch::intel_gpu_ehl) \
622+
X(0x03000000, oneapi_exp_arch::intel_gpu_tgllp) \
623+
X(0x03004000, oneapi_exp_arch::intel_gpu_rkl) \
624+
X(0x03008000, oneapi_exp_arch::intel_gpu_adl_s) \
625+
X(0x0300c000, oneapi_exp_arch::intel_gpu_adl_p) \
626+
X(0x03010000, oneapi_exp_arch::intel_gpu_adl_n) \
627+
X(0x03028000, oneapi_exp_arch::intel_gpu_dg1) \
628+
X(0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10) \
629+
X(0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11) \
630+
X(0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12) \
631+
X(0x030f0007, oneapi_exp_arch::intel_gpu_pvc)
632+
633+
#define CMP_NVIDIA_AMD(s, i) \
634+
if (strcmp(s, arch) == 0) \
635+
return i;
636+
637+
#define CMP_INTEL(p, i) \
638+
if (p == arch) \
639+
return i;
640+
571641
template <>
572642
struct get_device_info_impl<
573643
ext::oneapi::experimental::architecture,
574644
ext::oneapi::experimental::info::device::architecture> {
575645
static ext::oneapi::experimental::architecture get(const DeviceImplPtr &Dev) {
576646
using oneapi_exp_arch = sycl::ext::oneapi::experimental::architecture;
577-
auto ReturnHelper = [](auto MapDeviceIpToArch, auto DeviceIp) {
578-
// TODO: use std::map::contains instead of try-catch when SYCL RT be moved
579-
// to C++20
580-
try {
581-
oneapi_exp_arch Result = MapDeviceIpToArch.at(DeviceIp);
582-
return Result;
583-
} catch (std::out_of_range &) {
647+
backend CurrentBackend = Dev->getBackend();
648+
if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
649+
backend::opencl == CurrentBackend)) {
650+
auto MapArchIDToArchName = [](const int arch) {
651+
INTEL_ARCHES(CMP_INTEL);
584652
throw sycl::exception(
585653
make_error_code(errc::runtime),
586654
"The current device architecture is not supported by "
587655
"sycl_ext_oneapi_device_architecture.");
588-
}
589-
};
590-
backend CurrentBackend = Dev->getBackend();
591-
if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
592-
backend::opencl == CurrentBackend)) {
593-
std::map<uint32_t, oneapi_exp_arch> MapDeviceIpToArch = {
594-
{0x02000000, oneapi_exp_arch::intel_gpu_bdw},
595-
{0x02400009, oneapi_exp_arch::intel_gpu_skl},
596-
{0x02404009, oneapi_exp_arch::intel_gpu_kbl},
597-
{0x02408009, oneapi_exp_arch::intel_gpu_cfl},
598-
{0x0240c000, oneapi_exp_arch::intel_gpu_apl},
599-
{0x02410000, oneapi_exp_arch::intel_gpu_glk},
600-
{0x02414000, oneapi_exp_arch::intel_gpu_whl},
601-
{0x02418000, oneapi_exp_arch::intel_gpu_aml},
602-
{0x0241c000, oneapi_exp_arch::intel_gpu_cml},
603-
{0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
604-
{0x02c08000, oneapi_exp_arch::intel_gpu_ehl},
605-
{0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
606-
{0x03004000, oneapi_exp_arch::intel_gpu_rkl},
607-
{0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
608-
{0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
609-
{0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
610-
{0x03028000, oneapi_exp_arch::intel_gpu_dg1},
611-
{0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
612-
{0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
613-
{0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
614-
{0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
615656
};
616657
uint32_t DeviceIp;
617658
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
618659
Dev->getHandleRef(),
619660
PiInfoCode<
620661
ext::oneapi::experimental::info::device::architecture>::value,
621662
sizeof(DeviceIp), &DeviceIp, nullptr);
622-
return ReturnHelper(MapDeviceIpToArch, DeviceIp);
663+
return MapArchIDToArchName(DeviceIp);
664+
} else if (Dev->is_gpu() && (backend::ext_oneapi_cuda == CurrentBackend ||
665+
backend::ext_oneapi_hip == CurrentBackend)) {
666+
auto MapArchIDToArchName = [](const char *arch) {
667+
NVIDIA_AMD_ARCHES(CMP_NVIDIA_AMD);
668+
throw sycl::exception(
669+
make_error_code(errc::runtime),
670+
"The current device architecture is not supported by "
671+
"sycl_ext_oneapi_device_architecture.");
672+
};
673+
size_t ResultSize = 0;
674+
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
675+
Dev->getHandleRef(), PiInfoCode<info::device::version>::value, 0,
676+
nullptr, &ResultSize);
677+
std::unique_ptr<char[]> DeviceArch(new char[ResultSize]);
678+
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
679+
Dev->getHandleRef(), PiInfoCode<info::device::version>::value,
680+
ResultSize, DeviceArch.get(), nullptr);
681+
return MapArchIDToArchName(DeviceArch.get());
623682
} else if (Dev->is_cpu() && backend::opencl == CurrentBackend) {
624683
// TODO: add support of different CPU architectures to
625684
// sycl_ext_oneapi_device_architecture
626685
return sycl::ext::oneapi::experimental::architecture::x86_64;
627686
} // else is not needed
628-
// TODO: add support of other arhitectures by extending with else if
629-
687+
// TODO: add support of other architectures by extending with else if
630688
// Generating a user-friendly error message
631689
std::string DeviceStr;
632690
if (Dev->is_gpu())

sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// UNSUPPORTED: cuda, hip, esimd_emulator, accelerator
1+
// UNSUPPORTED: esimd_emulator, accelerator
22

33
// RUN: %{build} -o %t.out
44
// RUN: %{run} %t.out

0 commit comments

Comments
 (0)