Skip to content

Commit 0e32a28

Browse files
1 parent 42b2847 commit 0e32a28

File tree

4 files changed

+327
-0
lines changed

4 files changed

+327
-0
lines changed
Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
#pragma once
2+
3+
#include <sycl/detail/defines_elementary.hpp>
4+
5+
namespace sycl {
6+
__SYCL_INLINE_VER_NAMESPACE(_V1) {
7+
namespace ext::intel::experimental {
8+
9+
enum class architecture {
10+
x86_64,
11+
intel_gpu_bdw,
12+
intel_gpu_skl,
13+
intel_gpu_kbl,
14+
intel_gpu_cfl,
15+
intel_gpu_apl,
16+
intel_gpu_glk,
17+
intel_gpu_whl,
18+
intel_gpu_aml,
19+
intel_gpu_cml,
20+
intel_gpu_icllp,
21+
intel_gpu_ehl,
22+
intel_gpu_tgllp,
23+
intel_gpu_rkl,
24+
intel_gpu_adl_s,
25+
intel_gpu_rpl_s,
26+
intel_gpu_adl_p,
27+
intel_gpu_adl_n,
28+
intel_gpu_dg1,
29+
intel_gpu_acm_g10,
30+
intel_gpu_acm_g11,
31+
intel_gpu_acm_g12,
32+
intel_gpu_pvc,
33+
// Update "detail::max_architecture" below if you add new elements here!
34+
intel_gpu_8_0_0 = intel_gpu_bdw,
35+
intel_gpu_9_0_9 = intel_gpu_skl,
36+
intel_gpu_9_1_9 = intel_gpu_kbl,
37+
intel_gpu_9_2_9 = intel_gpu_cfl,
38+
intel_gpu_9_3_0 = intel_gpu_apl,
39+
intel_gpu_9_4_0 = intel_gpu_glk,
40+
intel_gpu_9_5_0 = intel_gpu_whl,
41+
intel_gpu_9_6_0 = intel_gpu_aml,
42+
intel_gpu_9_7_0 = intel_gpu_cml,
43+
intel_gpu_11_0_0 = intel_gpu_icllp,
44+
intel_gpu_11_2_0 = intel_gpu_ehl,
45+
intel_gpu_12_0_0 = intel_gpu_tgllp,
46+
intel_gpu_12_10_0 = intel_gpu_dg1,
47+
};
48+
49+
} // namespace ext::intel::experimental
50+
51+
namespace detail {
52+
53+
static constexpr ext::intel::experimental::architecture max_architecture =
54+
ext::intel::experimental::architecture::intel_gpu_pvc;
55+
56+
#ifndef __SYCL_TARGET_INTEL_X86_64__
57+
#define __SYCL_TARGET_INTEL_X86_64__ 0
58+
#endif
59+
#ifndef __SYCL_TARGET_INTEL_GPU_BDW__
60+
#define __SYCL_TARGET_INTEL_GPU_BDW__ 0
61+
#endif
62+
#ifndef __SYCL_TARGET_INTEL_GPU_SKL__
63+
#define __SYCL_TARGET_INTEL_GPU_SKL__ 0
64+
#endif
65+
#ifndef __SYCL_TARGET_INTEL_GPU_KBL__
66+
#define __SYCL_TARGET_INTEL_GPU_KBL__ 0
67+
#endif
68+
#ifndef __SYCL_TARGET_INTEL_GPU_CFL__
69+
#define __SYCL_TARGET_INTEL_GPU_CFL__ 0
70+
#endif
71+
#ifndef __SYCL_TARGET_INTEL_GPU_APL__
72+
#define __SYCL_TARGET_INTEL_GPU_APL__ 0
73+
#endif
74+
#ifndef __SYCL_TARGET_INTEL_GPU_GLK__
75+
#define __SYCL_TARGET_INTEL_GPU_GLK__ 0
76+
#endif
77+
#ifndef __SYCL_TARGET_INTEL_GPU_WHL__
78+
#define __SYCL_TARGET_INTEL_GPU_WHL__ 0
79+
#endif
80+
#ifndef __SYCL_TARGET_INTEL_GPU_AML__
81+
#define __SYCL_TARGET_INTEL_GPU_AML__ 0
82+
#endif
83+
#ifndef __SYCL_TARGET_INTEL_GPU_CML__
84+
#define __SYCL_TARGET_INTEL_GPU_CML__ 0
85+
#endif
86+
#ifndef __SYCL_TARGET_INTEL_GPU_ICLLP__
87+
#define __SYCL_TARGET_INTEL_GPU_ICLLP__ 0
88+
#endif
89+
#ifndef __SYCL_TARGET_INTEL_GPU_EHL__
90+
#define __SYCL_TARGET_INTEL_GPU_EHL__ 0
91+
#endif
92+
#ifndef __SYCL_TARGET_INTEL_GPU_TGLLP__
93+
#define __SYCL_TARGET_INTEL_GPU_TGLLP__ 0
94+
#endif
95+
#ifndef __SYCL_TARGET_INTEL_GPU_RKL__
96+
#define __SYCL_TARGET_INTEL_GPU_RKL__ 0
97+
#endif
98+
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_S__
99+
#define __SYCL_TARGET_INTEL_GPU_ADL_S__ 0
100+
#endif
101+
#ifndef __SYCL_TARGET_INTEL_GPU_RPL_S__
102+
#define __SYCL_TARGET_INTEL_GPU_RPL_S__ 0
103+
#endif
104+
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_P__
105+
#define __SYCL_TARGET_INTEL_GPU_ADL_P__ 0
106+
#endif
107+
#ifndef __SYCL_TARGET_INTEL_GPU_ADL_N__
108+
#define __SYCL_TARGET_INTEL_GPU_ADL_N__ 0
109+
#endif
110+
#ifndef __SYCL_TARGET_INTEL_GPU_DG1__
111+
#define __SYCL_TARGET_INTEL_GPU_DG1__ 0
112+
#endif
113+
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G10__
114+
#define __SYCL_TARGET_INTEL_GPU_ACM_G10__ 0
115+
#endif
116+
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G11__
117+
#define __SYCL_TARGET_INTEL_GPU_ACM_G11__ 0
118+
#endif
119+
#ifndef __SYCL_TARGET_INTEL_GPU_ACM_G12__
120+
#define __SYCL_TARGET_INTEL_GPU_ACM_G12__ 0
121+
#endif
122+
#ifndef __SYCL_TARGET_INTEL_GPU_PVC__
123+
#define __SYCL_TARGET_INTEL_GPU_PVC__ 0
124+
#endif
125+
126+
// This is true when the translation unit is compiled in AOT mode with target
127+
// names that supports the "if_architecture_is" features. If an unsupported
128+
// target name is specified via "-fsycl-targets", the associated invocation of
129+
// the device compiler will set this variable to false, and that will trigger
130+
// an error for code that uses "if_architecture_is".
131+
static constexpr bool is_allowable_aot_mode =
132+
(__SYCL_TARGET_INTEL_X86_64__ == 1) ||
133+
(__SYCL_TARGET_INTEL_GPU_BDW__ == 1) ||
134+
(__SYCL_TARGET_INTEL_GPU_SKL__ == 1) ||
135+
(__SYCL_TARGET_INTEL_GPU_KBL__ == 1) ||
136+
(__SYCL_TARGET_INTEL_GPU_CFL__ == 1) ||
137+
(__SYCL_TARGET_INTEL_GPU_APL__ == 1) ||
138+
(__SYCL_TARGET_INTEL_GPU_GLK__ == 1) ||
139+
(__SYCL_TARGET_INTEL_GPU_WHL__ == 1) ||
140+
(__SYCL_TARGET_INTEL_GPU_AML__ == 1) ||
141+
(__SYCL_TARGET_INTEL_GPU_CML__ == 1) ||
142+
(__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1) ||
143+
(__SYCL_TARGET_INTEL_GPU_EHL__ == 1) ||
144+
(__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1) ||
145+
(__SYCL_TARGET_INTEL_GPU_RKL__ == 1) ||
146+
(__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1) ||
147+
(__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1) ||
148+
(__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1) ||
149+
(__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1) ||
150+
(__SYCL_TARGET_INTEL_GPU_DG1__ == 1) ||
151+
(__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1) ||
152+
(__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1) ||
153+
(__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1) ||
154+
(__SYCL_TARGET_INTEL_GPU_PVC__ == 1);
155+
156+
struct IsAOTForArchitectureClass {
157+
// Allocate an array of size == size of ext::intel::experimental::architecture
158+
// enum.
159+
bool arr[static_cast<int>(max_architecture) + 1];
160+
161+
using arch = ext::intel::experimental::architecture;
162+
163+
constexpr IsAOTForArchitectureClass() : arr() {
164+
arr[static_cast<int>(arch::x86_64)] = __SYCL_TARGET_INTEL_X86_64__ == 1;
165+
arr[static_cast<int>(arch::intel_gpu_bdw)] =
166+
__SYCL_TARGET_INTEL_GPU_BDW__ == 1;
167+
arr[static_cast<int>(arch::intel_gpu_skl)] =
168+
__SYCL_TARGET_INTEL_GPU_SKL__ == 1;
169+
arr[static_cast<int>(arch::intel_gpu_kbl)] =
170+
__SYCL_TARGET_INTEL_GPU_KBL__ == 1;
171+
arr[static_cast<int>(arch::intel_gpu_cfl)] =
172+
__SYCL_TARGET_INTEL_GPU_CFL__ == 1;
173+
arr[static_cast<int>(arch::intel_gpu_apl)] =
174+
__SYCL_TARGET_INTEL_GPU_APL__ == 1;
175+
arr[static_cast<int>(arch::intel_gpu_glk)] =
176+
__SYCL_TARGET_INTEL_GPU_GLK__ == 1;
177+
arr[static_cast<int>(arch::intel_gpu_whl)] =
178+
__SYCL_TARGET_INTEL_GPU_WHL__ == 1;
179+
arr[static_cast<int>(arch::intel_gpu_aml)] =
180+
__SYCL_TARGET_INTEL_GPU_AML__ == 1;
181+
arr[static_cast<int>(arch::intel_gpu_cml)] =
182+
__SYCL_TARGET_INTEL_GPU_CML__ == 1;
183+
arr[static_cast<int>(arch::intel_gpu_icllp)] =
184+
__SYCL_TARGET_INTEL_GPU_ICLLP__ == 1;
185+
arr[static_cast<int>(arch::intel_gpu_ehl)] =
186+
__SYCL_TARGET_INTEL_GPU_EHL__ == 1;
187+
arr[static_cast<int>(arch::intel_gpu_tgllp)] =
188+
__SYCL_TARGET_INTEL_GPU_TGLLP__ == 1;
189+
arr[static_cast<int>(arch::intel_gpu_rkl)] =
190+
__SYCL_TARGET_INTEL_GPU_RKL__ == 1;
191+
arr[static_cast<int>(arch::intel_gpu_adl_s)] =
192+
__SYCL_TARGET_INTEL_GPU_ADL_S__ == 1;
193+
arr[static_cast<int>(arch::intel_gpu_rpl_s)] =
194+
__SYCL_TARGET_INTEL_GPU_RPL_S__ == 1;
195+
arr[static_cast<int>(arch::intel_gpu_adl_p)] =
196+
__SYCL_TARGET_INTEL_GPU_ADL_P__ == 1;
197+
arr[static_cast<int>(arch::intel_gpu_adl_n)] =
198+
__SYCL_TARGET_INTEL_GPU_ADL_N__ == 1;
199+
arr[static_cast<int>(arch::intel_gpu_dg1)] =
200+
__SYCL_TARGET_INTEL_GPU_DG1__ == 1;
201+
arr[static_cast<int>(arch::intel_gpu_acm_g10)] =
202+
__SYCL_TARGET_INTEL_GPU_ACM_G10__ == 1;
203+
arr[static_cast<int>(arch::intel_gpu_acm_g11)] =
204+
__SYCL_TARGET_INTEL_GPU_ACM_G11__ == 1;
205+
arr[static_cast<int>(arch::intel_gpu_acm_g12)] =
206+
__SYCL_TARGET_INTEL_GPU_ACM_G12__ == 1;
207+
arr[static_cast<int>(arch::intel_gpu_pvc)] =
208+
__SYCL_TARGET_INTEL_GPU_PVC__ == 1;
209+
}
210+
};
211+
212+
// One entry for each enumerator in "architecture" telling whether the AOT
213+
// target matches that architecture.
214+
static constexpr IsAOTForArchitectureClass is_aot_for_architecture;
215+
216+
// Reads the value of "is_allowable_aot_mode" via a template to defer triggering
217+
// static_assert() until template instantiation time.
218+
template <ext::intel::experimental::architecture... Archs>
219+
constexpr static bool allowable_aot_mode() {
220+
return is_allowable_aot_mode;
221+
}
222+
223+
// Tells if the current device has one of the architectures in the parameter
224+
// pack.
225+
template <ext::intel::experimental::architecture... Archs>
226+
constexpr static bool device_architecture_is() {
227+
return (is_aot_for_architecture.arr[static_cast<int>(Archs)] || ...);
228+
}
229+
230+
// Helper object used to implement "else_if_architecture_is" and "otherwise".
231+
// The "MakeCall" template parameter tells whether a previous clause in the
232+
// "if-elseif-elseif ..." chain was true. When "MakeCall" is false, some
233+
// previous clause was true, so none of the subsequent
234+
// "else_if_architecture_is" or "otherwise" member functions should call the
235+
// user's function.
236+
template <bool MakeCall> class if_architecture_helper {
237+
public:
238+
template <ext::intel::experimental::architecture... Archs, typename T,
239+
typename... Args>
240+
constexpr auto else_if_architecture_is(T fnTrue, Args... args) {
241+
if constexpr (MakeCall && device_architecture_is<Archs...>()) {
242+
fnTrue(args...);
243+
return if_architecture_helper<false>{};
244+
} else {
245+
(void)fnTrue;
246+
return if_architecture_helper<MakeCall>{};
247+
}
248+
}
249+
250+
template <typename T, typename... Args>
251+
constexpr void otherwise(T fn, Args... args) {
252+
if constexpr (MakeCall) {
253+
fn(args...);
254+
}
255+
}
256+
};
257+
} // namespace detail
258+
259+
namespace ext::intel::experimental {
260+
261+
template <architecture... Archs, typename T, typename... Args>
262+
constexpr static auto if_architecture_is(T fnTrue, Args... args) {
263+
static_assert(detail::allowable_aot_mode<Archs...>(),
264+
"The if_architecture_is function may only be used when AOT "
265+
"compiling with '-fsycl-targets=spir64_x86_64' or "
266+
"'-fsycl-targets=intel_gpu_*'");
267+
if constexpr (detail::device_architecture_is<Archs...>()) {
268+
fnTrue(args...);
269+
return detail::if_architecture_helper<false>{};
270+
} else {
271+
(void)fnTrue;
272+
return detail::if_architecture_helper<true>{};
273+
}
274+
}
275+
276+
} // namespace ext::intel::experimental
277+
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
278+
} // namespace sycl

sycl/include/sycl/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
3030

3131
// TODO: Move these feature-test macros to compiler driver.
3232
#define SYCL_EXT_INTEL_DEVICE_INFO 5
33+
#define SYCL_EXT_INTEL_DEVICE_ARCHITECTURE 1
3334
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
3435
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1
3536
#define SYCL_EXT_ONEAPI_MATRIX 1

sycl/unittests/Extensions/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@ set(CMAKE_CXX_EXTENSIONS OFF)
22

33
add_sycl_unittest(ExtensionsTests OBJECT
44
DefaultContext.cpp
5+
DeviceArchitecture.cpp
56
)
67

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//==--------------- DeviceArchitectureOneArchSelected.cpp ------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <gtest/gtest.h>
10+
11+
// define one of __SYCL_TARGET_INTEL_*** macro, e.g., the one for SKL
12+
#define __SYCL_TARGET_INTEL_GPU_SKL__ 1
13+
14+
#include <sycl/ext/intel/experimental/device_architecture.hpp>
15+
16+
using namespace sycl;
17+
using namespace sycl::detail;
18+
using namespace sycl::ext::intel::experimental;
19+
20+
TEST(DeviceArchitectureTest, DeviceArchitecture_If) {
21+
bool res = false;
22+
if_architecture_is<architecture::intel_gpu_skl>([&]() { res = true; });
23+
ASSERT_TRUE(res);
24+
}
25+
26+
TEST(DeviceArchitectureTest, DeviceArchitecture_If_Negative) {
27+
bool res = false;
28+
if_architecture_is<architecture::intel_gpu_pvc>([&]() { res = true; });
29+
ASSERT_FALSE(res);
30+
}
31+
32+
TEST(DeviceArchitectureTest, DeviceArchitecture_Else_If) {
33+
bool res = false;
34+
if_architecture_is<architecture::intel_gpu_dg1>([]() {
35+
}).else_if_architecture_is<architecture::intel_gpu_skl>([&]() {
36+
res = true;
37+
});
38+
ASSERT_TRUE(res);
39+
}
40+
41+
TEST(DeviceArchitectureTest, DeviceArchitecture_Otherwise) {
42+
bool res = false;
43+
if_architecture_is<architecture::intel_gpu_dg1>([]() {
44+
}).else_if_architecture_is<architecture::intel_gpu_pvc>([&]() {
45+
}).otherwise([&]() { res = true; });
46+
ASSERT_TRUE(res);
47+
}

0 commit comments

Comments
 (0)