Skip to content

Commit dd7ac1a

Browse files
[SYCL] get kernel info with free functions (#18866)
This PR 1. adds **get_kernel_info** functions for kernel free functions, [docs](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_free_function_kernels.asciidoc#new-free-functions-to-query-kernel-information-descriptors) 2. fixes bug when more than one property added with **add_ir_attributes_funcion**, free function was not recognized as a kernel function
1 parent b24454d commit dd7ac1a

File tree

4 files changed

+224
-0
lines changed

4 files changed

+224
-0
lines changed

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,8 @@ bool SemaSYCL::isFreeFunction(const FunctionDecl *FD) {
11971197
NameValuePair.first == "sycl-single-task-kernel";
11981198
});
11991199
IsFreeFunctionAttr = it != NameValuePairs.end();
1200+
if (IsFreeFunctionAttr)
1201+
break;
12001202
}
12011203
if (Redecl->isFirstDecl()) {
12021204
if (IsFreeFunctionAttr)

clang/test/CodeGenSYCL/free_function_int_header.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,16 @@ __attribute__((sycl_device))
254254
void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
255255
}
256256

257+
[[__sycl_detail__::add_ir_attributes_function("work_group_size", 16)]]
258+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]]
259+
void ff_21(AliasType start, AliasType *ptr) {
260+
}
261+
262+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 0)]]
263+
[[__sycl_detail__::add_ir_attributes_function("work_group_size", 16)]]
264+
void ff_22(AliasType start, AliasType *ptr) {
265+
}
266+
257267
// CHECK: const char* const kernel_names[] = {
258268
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii
259269
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii
@@ -286,6 +296,8 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
286296
// CHECK-NEXT: {{.*}}__sycl_kernel_free_functions5tests5ff_18ENS_3AggEPS1_
287297
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_19N14free_functions16KArgWithPtrArrayILi50EEE
288298
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE
299+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_217DerivedPS_
300+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_227DerivedPS_
289301

290302
// CHECK-NEXT: ""
291303
// CHECK-NEXT: };
@@ -980,6 +992,37 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
980992
// CHECK-NEXT: };
981993
// CHECK-NEXT: }
982994

995+
996+
// CHECK: void ff_21(Derived start, Derived * ptr);
997+
// CHECK-NEXT: static constexpr auto __sycl_shim30() {
998+
// CHECK-NEXT: return (void (*)(struct Derived, struct Derived *))ff_21;
999+
// CHECK-NEXT: }
1000+
// CHECK-NEXT: namespace sycl {
1001+
// CHECK-NEXT: template <>
1002+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim30()> {
1003+
// CHECK-NEXT: static constexpr bool value = true;
1004+
// CHECK-NEXT: };
1005+
// CHECK-NEXT: template <>
1006+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim30()> {
1007+
// CHECK-NEXT: static constexpr bool value = true;
1008+
// CHECK-NEXT: };
1009+
// CHECK-NEXT: }
1010+
1011+
// CHECK: void ff_22(Derived start, Derived * ptr);
1012+
// CHECK-NEXT: static constexpr auto __sycl_shim31() {
1013+
// CHECK-NEXT: return (void (*)(struct Derived, struct Derived *))ff_22;
1014+
// CHECK-NEXT: }
1015+
// CHECK-NEXT: namespace sycl {
1016+
// CHECK-NEXT: template <>
1017+
// CHECK-NEXT: struct ext::oneapi::experimental::is_kernel<__sycl_shim31()> {
1018+
// CHECK-NEXT: static constexpr bool value = true;
1019+
// CHECK-NEXT: };
1020+
// CHECK-NEXT: template <>
1021+
// CHECK-NEXT: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim31()> {
1022+
// CHECK-NEXT: static constexpr bool value = true;
1023+
// CHECK-NEXT: };
1024+
// CHECK-NEXT: }
1025+
9831026
// CHECK: #include <sycl/kernel_bundle.hpp>
9841027

9851028
// CHECK: Definition of kernel_id of _Z18__sycl_kernel_ff_2Piii
@@ -1196,3 +1239,17 @@ void ff_20(sycl::accessor<int, 1, sycl::access::mode::read_write> acc) {
11961239
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_20N4sycl3_V18accessorIiLi1ELNS0_6access4modeE1026ELNS2_6targetE2014ELNS2_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEE"});
11971240
// CHECK-NEXT: }
11981241
// CHECK-NEXT: }
1242+
1243+
// CHECK: namespace sycl {
1244+
// CHECK-NEXT: template <>
1245+
// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim30()>() {
1246+
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_217DerivedPS_"});
1247+
// CHECK-NEXT: }
1248+
// CHECK-NEXT: }
1249+
1250+
// CHECK: namespace sycl {
1251+
// CHECK-NEXT: template <>
1252+
// CHECK-NEXT: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim31()>() {
1253+
// CHECK-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"_Z19__sycl_kernel_ff_227DerivedPS_"});
1254+
// CHECK-NEXT: }
1255+
// CHECK-NEXT: }

sycl/include/sycl/ext/oneapi/get_kernel_info.hpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <sycl/detail/export.hpp>
1212
#include <sycl/detail/info_desc_helpers.hpp>
1313
#include <sycl/device.hpp>
14+
#include <sycl/kernel_bundle.hpp>
1415
#include <sycl/kernel_bundle_enums.hpp>
1516
#include <sycl/queue.hpp>
1617

@@ -53,6 +54,38 @@ get_kernel_info(const queue &Q) {
5354
Q.get_device());
5455
}
5556

57+
// For free functions.
58+
namespace experimental {
59+
60+
template <auto *Func, typename Param>
61+
std::enable_if_t<ext::oneapi::experimental::is_kernel_v<Func>,
62+
typename sycl::detail::is_kernel_info_desc<Param>::return_type>
63+
get_kernel_info(const context &ctxt) {
64+
auto Bundle = sycl::ext::oneapi::experimental::get_kernel_bundle<
65+
Func, sycl::bundle_state::executable>(ctxt);
66+
return Bundle.template ext_oneapi_get_kernel<Func>()
67+
.template get_info<Param>();
68+
}
69+
70+
template <auto *Func, typename Param>
71+
std::enable_if_t<ext::oneapi::experimental::is_kernel_v<Func>,
72+
typename sycl::detail::is_kernel_device_specific_info_desc<
73+
Param>::return_type>
74+
get_kernel_info(const context &ctxt, const device &dev) {
75+
auto Bundle = sycl::ext::oneapi::experimental::get_kernel_bundle<
76+
Func, sycl::bundle_state::executable>(ctxt);
77+
return Bundle.template ext_oneapi_get_kernel<Func>().template get_info<Param>(
78+
dev);
79+
}
80+
81+
template <auto *Func, typename Param>
82+
std::enable_if_t<ext::oneapi::experimental::is_kernel_v<Func>,
83+
typename sycl::detail::is_kernel_device_specific_info_desc<
84+
Param>::return_type>
85+
get_kernel_info(const queue &q) {
86+
return get_kernel_info<Func, Param>(q.get_context(), q.get_device());
87+
}
88+
} // namespace experimental
5689
} // namespace ext::oneapi
5790
} // namespace _V1
5891
} // namespace sycl
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// REQUIRES: aspect-usm_shared_allocations
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// XFAIL: cpu
6+
// XFAIL-TRACKER: CMPLRLLVM-68536
7+
// UNSUPPORTED: cuda, hip
8+
// UNSUPPORTED-INTENDED: Not implemented yet for Nvidia/AMD backends.
9+
10+
#include <iostream>
11+
#include <sycl/ext/oneapi/free_function_queries.hpp>
12+
#include <sycl/ext/oneapi/get_kernel_info.hpp>
13+
#include <sycl/kernel_bundle.hpp>
14+
#include <sycl/usm.hpp>
15+
16+
namespace syclext = sycl::ext::oneapi;
17+
namespace syclexp = sycl::ext::oneapi::experimental;
18+
19+
static constexpr size_t NUM = 1024;
20+
static constexpr size_t WGSIZE = 16;
21+
22+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::work_group_size<WGSIZE>))
23+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
24+
void func(float start, float *ptr) {
25+
size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
26+
ptr[id] = start + static_cast<float>(id);
27+
}
28+
29+
bool check_result(int *ptr) {
30+
for (size_t i = 0; i < NUM; ++i) {
31+
const int expected = 3 + static_cast<int>(i);
32+
if (ptr[i] != expected)
33+
return true;
34+
}
35+
return false;
36+
}
37+
38+
static bool call_kernel_code(sycl::queue &q, sycl::kernel &kernel) {
39+
int *ptr = sycl::malloc_shared<int>(NUM, q);
40+
q.submit([&](sycl::handler &cgh) {
41+
cgh.set_args(3, ptr);
42+
sycl::nd_range ndr{{NUM}, {WGSIZE}};
43+
cgh.parallel_for(ndr, kernel);
44+
}).wait();
45+
const bool ret = check_result(ptr);
46+
sycl::free(ptr, q);
47+
return ret;
48+
}
49+
50+
bool test_ctxt_dev(sycl::kernel &k, sycl::queue &q) {
51+
const auto wg_size_cmp =
52+
k.get_info<sycl::info::kernel_device_specific::work_group_size>(
53+
q.get_device());
54+
const auto wg_size = syclexp::get_kernel_info<
55+
func, sycl::info::kernel_device_specific::work_group_size>(
56+
q.get_context(), q.get_device());
57+
if (wg_size_cmp != wg_size)
58+
std::cerr << "Work group size from get_info: " << wg_size_cmp
59+
<< " is not equal to work group size from get_kernel_info: "
60+
<< wg_size << std::endl;
61+
return wg_size_cmp == wg_size;
62+
}
63+
64+
bool test_ctxt(sycl::kernel &k, sycl::queue &q) {
65+
const auto attributes =
66+
syclexp::get_kernel_info<func, sycl::info::kernel::attributes>(
67+
q.get_context());
68+
const std::string wg_size_str = "work_group_size(";
69+
if (attributes.empty() || attributes.find(wg_size_str) == std::string::npos) {
70+
std::cerr << "Work group size attribute is not found in kernel attributes, "
71+
"attributes:"
72+
<< attributes << std::endl;
73+
return false;
74+
}
75+
auto wg_size_pos = attributes.find(wg_size_str);
76+
wg_size_pos += wg_size_str.size();
77+
const auto comma_pos = attributes.find(',', wg_size_pos);
78+
if (comma_pos == std::string::npos) {
79+
std::cerr << "Comma not found in work group size attribute string"
80+
<< std::endl;
81+
return false;
82+
}
83+
84+
const auto wg_size_str_value =
85+
attributes.substr(wg_size_pos, comma_pos - wg_size_pos);
86+
const size_t wg_size = std::stoul(wg_size_str_value);
87+
if (wg_size != WGSIZE) {
88+
std::cerr << "Work group size from attributes: " << wg_size
89+
<< " is not equal to expected work group size: " << WGSIZE
90+
<< std::endl;
91+
return false;
92+
}
93+
94+
if (const auto wg_size_cmp =
95+
k.get_info<sycl::info::kernel_device_specific::work_group_size>(
96+
q.get_device());
97+
wg_size_cmp < wg_size) {
98+
std::cerr << "Work group size from get_info: " << wg_size_cmp
99+
<< " is less work group size from attributes: " << wg_size
100+
<< std::endl;
101+
return false;
102+
}
103+
return true;
104+
}
105+
106+
bool test_queue(sycl::kernel &k, sycl::queue &q) {
107+
const auto wg_size_cmp =
108+
k.get_info<sycl::info::kernel_device_specific::work_group_size>(
109+
q.get_device());
110+
const auto wg_size = syclexp::get_kernel_info<
111+
func, sycl::info::kernel_device_specific::work_group_size>(q);
112+
if (wg_size_cmp != wg_size)
113+
std::cerr << "Work group size from get_info: " << wg_size_cmp
114+
<< " is not equal to work group size from get_kernel_info: "
115+
<< wg_size << std::endl;
116+
return wg_size_cmp == wg_size;
117+
}
118+
119+
int main() {
120+
sycl::queue q;
121+
sycl::context ctxt = q.get_context();
122+
123+
auto exe_bndl =
124+
syclexp::get_kernel_bundle<func, sycl::bundle_state::executable>(ctxt);
125+
sycl::kernel k_func = exe_bndl.template ext_oneapi_get_kernel<func>();
126+
call_kernel_code(q, k_func);
127+
128+
bool ret = test_ctxt_dev(k_func, q);
129+
ret &= test_ctxt(k_func, q);
130+
ret &= test_queue(k_func, q);
131+
return ret ? 0 : 1;
132+
}

0 commit comments

Comments
 (0)