Skip to content

Commit d0b01b2

Browse files
[SYCL] Implement sycl_ext_oneapi_device_architecture on host for Level Zero and OpenCL (#9843)
This patch introduces new host API for sycl_ext_oneapi_device_architecture extension and implements it, currently only for Level Zero and OpenCL Depends on - oneapi-src/unified-runtime#573 - #9873 - #9979 - #10054
1 parent e98280e commit d0b01b2

File tree

17 files changed

+205
-25
lines changed

17 files changed

+205
-25
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -523,23 +523,12 @@ call to `if_architecture_is` or `else_if_architecture_is` whose condition is
523523
architectures in the `Archs` parameter pack.
524524

525525

526-
== Limitations with the experimental version
527-
528-
The {dpcpp} implementation of this extension currently has some important
529-
limitations. The application must be compiled in ahead-of-time (AOT) mode
530-
using `-fsycl-targets=<special-target>` where `<special-target>` is one of the
531-
"special target values" listed in the link:../../UsersManual.md[users manual]
532-
description of the `-fsycl-targets` option. These are the target names of the
533-
form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
534-
535-
536-
== Future direction
526+
=== New member function of `device` class
537527

538-
This experimental extension is still evolving. We expect that future versions
539-
will include the following:
528+
This extension adds the following new member function to the `device` class,
529+
which returns a Boolean telling whether the device has the specified
530+
architecture.
540531

541-
* An extended member function like:
542-
+
543532
--
544533
```
545534
namespace sycl {
@@ -551,31 +540,56 @@ class device {
551540

552541
// namespace sycl
553542
```
554-
555-
This provides a way to query a device's architecture from host code.
556543
--
557544

558-
* An extended device information descriptor named
559-
`sycl::ext::oneapi::experimental::info::device::architecture`, which returns
560-
the architecture of the device. This allows host code such as:
561-
+
545+
=== New device descriptor
546+
547+
[%header,cols="5,1,5"]
548+
|===
549+
|Device descriptor
550+
|Return type
551+
|Description
552+
553+
|`ext::oneapi::experimental::info::device::architecture`
554+
|`ext::oneapi::experimental::architecture`
555+
|Returns the architecture of the device
556+
557+
|===
558+
559+
This device descriptor allows host code such as:
560+
562561
--
563562
```
564-
using namespace sycl::ext::oneapi::experimental;
563+
namespace syclex = sycl::ext::oneapi::experimental;
565564

566-
architecture arch = dev.get_info<info::device::architecture>();
565+
syclex::architecture arch = dev.get_info<syclex::info::device::architecture>();
567566
switch (arch) {
568-
case architecture::x86_64:
567+
case syclex::architecture::x86_64:
569568
/* ... */
570569
break;
571-
case architecture::intel_gpu_bdw:
570+
case syclex::architecture::intel_gpu_bdw:
572571
/* ... */
573572
break;
574573
/* etc. */
575574
}
576575
```
577576
--
578577

578+
== Limitations with the experimental version
579+
580+
The {dpcpp} implementation of this extension currently has some important
581+
limitations. The application must be compiled in ahead-of-time (AOT) mode
582+
using `-fsycl-targets=<special-target>` where `<special-target>` is one of the
583+
"special target values" listed in the link:../../UsersManual.md[users manual]
584+
description of the `-fsycl-targets` option. These are the target names of the
585+
form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
586+
587+
588+
== Future direction
589+
590+
This experimental extension is still evolving. We expect that future versions
591+
will include the following:
592+
579593
* A compile-time constant property that can be used to decorate kernels and
580594
non-kernel device functions:
581595
+

sycl/include/sycl/detail/pi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ typedef enum {
319319
// Intel UUID extension.
320320
PI_DEVICE_INFO_UUID = 0x106A,
321321
// These are Intel-specific extensions.
322+
PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION = 0x4250,
322323
PI_DEVICE_INFO_DEVICE_ID = 0x4251,
323324
PI_DEVICE_INFO_PCI_ADDRESS = 0x10020,
324325
PI_DEVICE_INFO_GPU_EU_COUNT = 0x10021,

sycl/include/sycl/device.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <sycl/detail/export.hpp>
1616
#include <sycl/detail/info_desc_helpers.hpp>
1717
#include <sycl/detail/owner_less_base.hpp>
18+
#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
1819
#include <sycl/ext/oneapi/weak_object_base.hpp>
1920
#include <sycl/info/info_desc.hpp>
2021
#include <sycl/platform.hpp>
@@ -240,6 +241,16 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase<device> {
240241
/// \return true if the SYCL device has the given feature.
241242
bool has(aspect Aspect) const __SYCL_WARN_IMAGE_ASPECT(Aspect);
242243

244+
/// Indicates if the SYCL device architecture equals to the one passed to
245+
/// the function.
246+
///
247+
/// \param arch is one of the architectures from architecture enum described
248+
/// in sycl_ext_oneapi_device_architecture specification.
249+
///
250+
/// \return true if the SYCL device architecture equals to the one passed to
251+
/// the function.
252+
bool ext_oneapi_architecture_is(ext::oneapi::experimental::architecture arch);
253+
243254
// TODO: Remove this diagnostics when __SYCL_WARN_IMAGE_ASPECT is removed.
244255
#if defined(__clang__)
245256
#pragma clang diagnostic pop

sycl/include/sycl/info/ext_oneapi_device_traits.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental,device, max_global_work_group
66
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<1>, id<1>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D)
77
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<2>, id<2>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D)
88
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<3>, id<3>, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D)
9+
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, architecture,
10+
ext::oneapi::experimental::architecture,
11+
PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION)
912
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
1013
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
1114
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF

sycl/include/sycl/info/info_desc.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <sycl/aspects.hpp>
1212
#include <sycl/detail/common.hpp>
1313
#include <sycl/detail/pi.hpp>
14+
#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
1415
#include <sycl/id.hpp>
1516

1617
namespace sycl {

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
10471047
case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
10481048
InfoType = UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE;
10491049
break;
1050+
case PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION:
1051+
InfoType = UR_DEVICE_INFO_IP_VERSION;
1052+
break;
10501053
case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE:
10511054
InfoType = UR_DEVICE_INFO_BUILD_ON_SUBDEVICE;
10521055
break;

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_common.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ ze_structure_type_t getZeStructureType<ze_device_memory_ext_properties_t>() {
239239
return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_EXT_PROPERTIES;
240240
}
241241
template <>
242+
ze_structure_type_t getZeStructureType<ze_device_ip_version_ext_t>() {
243+
return ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT;
244+
}
245+
template <>
242246
ze_structure_type_t getZeStructureType<ze_device_memory_access_properties_t>() {
243247
return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES;
244248
}

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
396396
uint32_t{1});
397397
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE:
398398
return ReturnValue(uint64_t{Device->ZeDeviceCacheProperties->cacheSize});
399+
case UR_DEVICE_INFO_IP_VERSION:
400+
return ReturnValue(uint32_t{Device->ZeDeviceIpVersionExt->ipVersion});
399401
case UR_DEVICE_INFO_MAX_PARAMETER_SIZE:
400402
return ReturnValue(
401403
size_t{Device->ZeDeviceModuleProperties->maxArgumentsSize});
@@ -908,6 +910,14 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
908910
ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties));
909911
};
910912

913+
ZeDeviceIpVersionExt.Compute =
914+
[ZeDevice](ze_device_ip_version_ext_t &Properties) {
915+
ze_device_properties_t P;
916+
P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
917+
P.pNext = (void *)&Properties;
918+
ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P));
919+
};
920+
911921
ZeDeviceImageProperties.Compute =
912922
[ZeDevice](ze_device_image_properties_t &Properties) {
913923
ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties));

sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,4 +168,5 @@ struct ur_device_handle_t_ : _ur_object {
168168
ZeCache<ZeStruct<ze_device_memory_access_properties_t>>
169169
ZeDeviceMemoryAccessProperties;
170170
ZeCache<ZeStruct<ze_device_cache_properties_t>> ZeDeviceCacheProperties;
171+
ZeCache<ZeStruct<ze_device_ip_version_ext_t>> ZeDeviceIpVersionExt;
171172
};

sycl/source/detail/device_impl.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,15 @@ std::string device_impl::getDeviceName() const {
480480
return MDeviceName;
481481
}
482482

483+
ext::oneapi::experimental::architecture device_impl::getDeviceArch() const {
484+
std::call_once(MDeviceArchFlag, [this]() {
485+
MDeviceArch =
486+
get_info<ext::oneapi::experimental::info::device::architecture>();
487+
});
488+
489+
return MDeviceArch;
490+
}
491+
483492
// On first call this function queries for device timestamp
484493
// along with host synchronized timestamp and stores it in memeber varaible
485494
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve

sycl/source/detail/device_impl.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,10 @@ class device_impl {
234234

235235
std::string getDeviceName() const;
236236

237+
bool extOneapiArchitectureIs(ext::oneapi::experimental::architecture Arch) {
238+
return Arch == getDeviceArch();
239+
}
240+
237241
/// Gets the current device timestamp
238242
/// @throw sycl::feature_not_supported if feature is not supported on device
239243
uint64_t getCurrentDeviceTime();
@@ -253,6 +257,7 @@ class device_impl {
253257
explicit device_impl(pi_native_handle InteropDevice,
254258
sycl::detail::pi::PiDevice Device,
255259
PlatformImplPtr Platform, const PluginPtr &Plugin);
260+
ext::oneapi::experimental::architecture getDeviceArch() const;
256261
sycl::detail::pi::PiDevice MDevice = 0;
257262
sycl::detail::pi::PiDeviceType MType;
258263
sycl::detail::pi::PiDevice MRootDevice = nullptr;
@@ -261,6 +266,8 @@ class device_impl {
261266
bool MIsAssertFailSupported = false;
262267
mutable std::string MDeviceName;
263268
mutable std::once_flag MDeviceNameFlag;
269+
mutable ext::oneapi::experimental::architecture MDeviceArch;
270+
mutable std::once_flag MDeviceArchFlag;
264271
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime;
265272
}; // class device_impl
266273

sycl/source/detail/device_info.hpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <sycl/detail/os_util.hpp>
1818
#include <sycl/detail/pi.hpp>
1919
#include <sycl/device.hpp>
20+
#include <sycl/ext/oneapi/experimental/device_architecture.hpp>
2021
#include <sycl/feature_test.hpp>
2122
#include <sycl/info/info_desc.hpp>
2223
#include <sycl/memory_enums.hpp>
@@ -567,6 +568,83 @@ struct get_device_info_impl<range<Dimensions>,
567568
}
568569
};
569570

571+
template <>
572+
struct get_device_info_impl<
573+
ext::oneapi::experimental::architecture,
574+
ext::oneapi::experimental::info::device::architecture> {
575+
static ext::oneapi::experimental::architecture get(const DeviceImplPtr &Dev) {
576+
using oneapi_exp_arch = sycl::ext::oneapi::experimental::architecture;
577+
auto ReturnHelper = [](auto MapDeviceIpToArch, auto DeviceIp) {
578+
// TODO: use std::map::contains instead of try-catch when SYCL RT be moved
579+
// to C++20
580+
try {
581+
oneapi_exp_arch Result = MapDeviceIpToArch.at(DeviceIp);
582+
return Result;
583+
} catch (std::out_of_range &) {
584+
throw sycl::exception(
585+
make_error_code(errc::runtime),
586+
"The current device architecture is not supported by "
587+
"sycl_ext_oneapi_device_architecture.");
588+
}
589+
};
590+
backend CurrentBackend = Dev->getBackend();
591+
if (Dev->is_gpu() && (backend::ext_oneapi_level_zero == CurrentBackend ||
592+
backend::opencl == CurrentBackend)) {
593+
std::map<uint32_t, oneapi_exp_arch> MapDeviceIpToArch = {
594+
{0x02000000, oneapi_exp_arch::intel_gpu_bdw},
595+
{0x02400009, oneapi_exp_arch::intel_gpu_skl},
596+
{0x02404009, oneapi_exp_arch::intel_gpu_kbl},
597+
{0x02408009, oneapi_exp_arch::intel_gpu_cfl},
598+
{0x0240c000, oneapi_exp_arch::intel_gpu_apl},
599+
{0x02410000, oneapi_exp_arch::intel_gpu_glk},
600+
{0x02414000, oneapi_exp_arch::intel_gpu_whl},
601+
{0x02418000, oneapi_exp_arch::intel_gpu_aml},
602+
{0x0241c000, oneapi_exp_arch::intel_gpu_cml},
603+
{0x02c00000, oneapi_exp_arch::intel_gpu_icllp},
604+
{0x03000000, oneapi_exp_arch::intel_gpu_tgllp},
605+
{0x03004000, oneapi_exp_arch::intel_gpu_rkl},
606+
{0x03008000, oneapi_exp_arch::intel_gpu_adl_s},
607+
{0x03008000, oneapi_exp_arch::intel_gpu_rpl_s},
608+
{0x0300c000, oneapi_exp_arch::intel_gpu_adl_p},
609+
{0x03010000, oneapi_exp_arch::intel_gpu_adl_n},
610+
{0x03028000, oneapi_exp_arch::intel_gpu_dg1},
611+
{0x030dc008, oneapi_exp_arch::intel_gpu_acm_g10},
612+
{0x030e0005, oneapi_exp_arch::intel_gpu_acm_g11},
613+
{0x030e4000, oneapi_exp_arch::intel_gpu_acm_g12},
614+
{0x030f0007, oneapi_exp_arch::intel_gpu_pvc},
615+
};
616+
uint32_t DeviceIp;
617+
Dev->getPlugin()->call<PiApiKind::piDeviceGetInfo>(
618+
Dev->getHandleRef(),
619+
PiInfoCode<
620+
ext::oneapi::experimental::info::device::architecture>::value,
621+
sizeof(DeviceIp), &DeviceIp, nullptr);
622+
return ReturnHelper(MapDeviceIpToArch, DeviceIp);
623+
} else if (Dev->is_cpu() && backend::opencl == CurrentBackend) {
624+
// TODO: add support of different CPU architectures to
625+
// sycl_ext_oneapi_device_architecture
626+
return sycl::ext::oneapi::experimental::architecture::x86_64;
627+
} // else is not needed
628+
// TODO: add support of other arhitectures by extending with else if
629+
630+
// Generating a user-friendly error message
631+
std::string DeviceStr;
632+
if (Dev->is_gpu())
633+
DeviceStr = "GPU";
634+
else if (Dev->is_cpu())
635+
DeviceStr = "CPU";
636+
else if (Dev->is_accelerator())
637+
DeviceStr = "accelerator";
638+
// else if not needed
639+
std::stringstream ErrorMessage;
640+
ErrorMessage
641+
<< "sycl_ext_oneapi_device_architecture feature is not supported on "
642+
<< DeviceStr << " device with sycl::backend::" << CurrentBackend
643+
<< " backend.";
644+
throw sycl::exception(make_error_code(errc::runtime), ErrorMessage.str());
645+
}
646+
};
647+
570648
template <>
571649
struct get_device_info_impl<
572650
size_t, ext::oneapi::experimental::info::device::max_global_work_groups> {
@@ -826,6 +904,12 @@ inline std::vector<sycl::aspect> get_device_info_host<info::device::aspects>() {
826904
return std::vector<sycl::aspect>();
827905
}
828906

907+
template <>
908+
inline ext::oneapi::experimental::architecture
909+
get_device_info_host<ext::oneapi::experimental::info::device::architecture>() {
910+
return ext::oneapi::experimental::architecture::x86_64;
911+
}
912+
829913
template <>
830914
inline info::device_type get_device_info_host<info::device::device_type>() {
831915
return info::device_type::host;

sycl/source/device.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,5 +208,10 @@ pi_native_handle device::getNative() const { return impl->getNative(); }
208208

209209
bool device::has(aspect Aspect) const { return impl->has(Aspect); }
210210

211+
bool device::ext_oneapi_architecture_is(
212+
ext::oneapi::experimental::architecture arch) {
213+
return impl->extOneapiArchitectureIs(arch);
214+
}
215+
211216
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
212217
} // namespace sycl
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// UNSUPPORTED: cuda, hip, esimd_emulator
2+
3+
// Enable this test, when GPU driver on Windows CI machines will be updated
4+
// XFAIL: windows
5+
6+
// RUN: %{build} -o %t.out
7+
// RUN: %{run} %t.out
8+
9+
#include <sycl/sycl.hpp>
10+
11+
int main() {
12+
sycl::queue q;
13+
sycl::device dev = q.get_device();
14+
15+
sycl::ext::oneapi::experimental::architecture arch = dev.get_info<
16+
sycl::ext::oneapi::experimental::info::device::architecture>();
17+
18+
assert(dev.ext_oneapi_architecture_is(arch));
19+
20+
return 0;
21+
}

0 commit comments

Comments
 (0)