Skip to content

Commit 0d5addc

Browse files
authored
[SYCL] Device code generation for "free functions", a new way to define kernels (#13207)
This PR is intended for review of the device-side code generation of "free functions". The free function spec is here: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_free_function_kernels.asciidoc This PR does not implement the full specification. It does however allow free function markup and execution using a manual method. Later PRs will add the remaining parts of the spec. This is the current status: - Free functions are supported at file scope only. - The SYCL_EXTERNAL markup is needed for free functions in addition to the SYCL_EXT_ONEAPI_FUNCTION_PROPERTY property. - The traits described in the specification section "New traits for kernel functions" are not yet implemented. - The functions described in the specification section "New kernel bundle member functions" are not yet implemented. As a result, a free-function kernel can only be launched by finding the kernel_id that matches the function's name, and this requires knowing how the compiler mangles the function's name when creating the kernel. This is a temporary solution until the "New kernel bundle member functions" are implemented. - The compiler does not yet diagnose an error if the application violates any of the restrictions listed in the specification under the section "Defining a free function kernel". - Device code generation is supported for scalars and USM pointers only. It is not supported for complex kernel argument types requiring decomposition like accessor, local_accessor, or stream. - The implementation has not been tested to handle the case when a kernel argument is optimized away. The switch -fno-sycl-dead-args-optimization could be used to disable this optimization, if needed - The kernel information descriptor info::kernel::num_args cannot yet be used to query the number of arguments in a free function kernel.
1 parent 7271d61 commit 0d5addc

File tree

9 files changed

+1254
-32
lines changed

9 files changed

+1254
-32
lines changed

clang/include/clang/Sema/SemaSYCL.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ class SemaSYCL : public SemaBase {
318318
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
319319
void SetSYCLKernelNames();
320320
void MarkDevices();
321+
void ProcessFreeFunction(FunctionDecl *FD);
321322

322323
/// Get the number of fields or captures within the parsed type.
323324
ExprResult ActOnSYCLBuiltinNumFieldsExpr(ParsedType PT);

clang/lib/Sema/SemaDecl.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16651,6 +16651,13 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
1665116651
if (FD && !FD->isDeleted())
1665216652
checkTypeSupport(FD->getType(), FD->getLocation(), FD);
1665316653

16654+
// Handle free functions.
16655+
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLDeviceAttr>() && Body &&
16656+
(FD->getTemplatedKind() == FunctionDecl::TK_NonTemplate ||
16657+
FD->getTemplatedKind() ==
16658+
FunctionDecl::TK_FunctionTemplateSpecialization))
16659+
SYCL().ProcessFreeFunction(FD);
16660+
1665416661
return dcl;
1665516662
}
1665616663

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 783 additions & 30 deletions
Large diffs are not rendered by default.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fsycl-int-header=%t.h %s
2+
// RUN: FileCheck -input-file=%t.h %s
3+
//
4+
// This test checks integration header contents for free functions with scalar
5+
// and pointer parameters.
6+
7+
#include "mock_properties.hpp"
8+
#include "sycl.hpp"
9+
10+
// First overload of function ff_2.
11+
__attribute__((sycl_device))
12+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel",
13+
2)]] void
14+
ff_2(int *ptr, int start, int end) {
15+
for (int i = start; i <= end; i++)
16+
ptr[i] = start + 66;
17+
}
18+
19+
// Second overload of function ff_2.
20+
__attribute__((sycl_device))
21+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel",
22+
2)]] void
23+
ff_2(int* ptr, int start, int end, int value) {
24+
for (int i = start; i <= end; i++)
25+
ptr[i] = start + value;
26+
}
27+
28+
// Templated definition of function ff_3.
29+
template <typename T>
30+
__attribute__((sycl_device))
31+
[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]] void
32+
ff_3(T *ptr, T start, T end) {
33+
for (int i = start; i <= end; i++)
34+
ptr[i] = start;
35+
}
36+
37+
// Explicit instantiation of ff_3 with int type.
38+
template void ff_3(int *ptr, int start, int end);
39+
40+
// Explicit instantiation of ff_3 with float type.
41+
template void ff_3(float* ptr, float start, float end);
42+
43+
// Specialization of ff_3 with double type.
44+
template <> void ff_3<double>(double *ptr, double start, double end) {
45+
for (int i = start; i <= end; i++)
46+
ptr[i] = end;
47+
}
48+
49+
// CHECK: const char* const kernel_names[] = {
50+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii
51+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piiii
52+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_3IiEvPT_S0_S0_
53+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_3IfEvPT_S0_S0_
54+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_3IdEvPT_S0_S0_
55+
// CHECK-NEXT: };
56+
57+
// CHECK: const kernel_param_desc_t kernel_signatures[] = {
58+
// CHECK-NEXT: {{.*}}__sycl_kernel_ff_2Piii
59+
// CHECK-NEXT: { kernel_param_kind_t::kind_pointer, 8, 0 },
60+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
61+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
62+
63+
// CHECK: {{.*}}__sycl_kernel_ff_2Piiii
64+
// CHECK-NEXT: { kernel_param_kind_t::kind_pointer, 8, 0 },
65+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
66+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
67+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 },
68+
69+
// CHECK: {{.*}}__sycl_kernel_ff_3IiEvPT_S0_S0_
70+
// CHECK-NEXT: { kernel_param_kind_t::kind_pointer, 8, 0 },
71+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
72+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
73+
74+
// CHECK: {{.*}}__sycl_kernel_ff_3IfEvPT_S0_S0_
75+
// CHECK-NEXT: { kernel_param_kind_t::kind_pointer, 8, 0 },
76+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 },
77+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 },
78+
79+
// CHECK: {{.*}}__sycl_kernel_ff_3IdEvPT_S0_S0_
80+
// CHECK-NEXT: { kernel_param_kind_t::kind_pointer, 8, 0 },
81+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 8 },
82+
// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 16 },
83+
84+
// CHECK: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 },
85+
// CHECK-NEXT: };
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ast-dump \
2+
// RUN: %s -o - | FileCheck %s
3+
// This test checks parameter rewriting for free functions with parameters
4+
// of type scalar and pointer.
5+
6+
#include "sycl.hpp"
7+
8+
__attribute__((sycl_device))
9+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]]
10+
void ff_2(int *ptr, int start, int end) {
11+
for (int i = start; i <= end; i++)
12+
ptr[i] = start;
13+
}
14+
// CHECK: FunctionDecl {{.*}}__sycl_kernel_{{.*}} 'void (__global int *, int, int)'
15+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_ptr '__global int *'
16+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_start 'int'
17+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_end 'int'
18+
// CHECK-NEXT: CompoundStmt
19+
// CHECK-NEXT: CallExpr {{.*}} 'void'
20+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int *, int, int)' <FunctionToPointerDecay>
21+
// CHECK-NEXT: DeclRefExpr {{.*}} 'void (int *, int, int)' lvalue Function {{.*}} 'ff_2' 'void (int *, int, int)'
22+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int *' <AddressSpaceConversion>
23+
// CHECK-NEXT: ImplicitCastExpr {{.*}} '__global int *' <LValueToRValue>
24+
// CHECK-NEXT: DeclRefExpr {{.*}} '__global int *' lvalue ParmVar {{.*}} '__arg_ptr' '__global int *'
25+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
26+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '__arg_start' 'int'
27+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
28+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '__arg_end' 'int'
29+
30+
31+
// Templated free function definition.
32+
template <typename T>
33+
__attribute__((sycl_device))
34+
[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 0)]]
35+
void ff_3(T* ptr, T start, int end) {
36+
for (int i = start; i <= end; i++)
37+
ptr[i] = start;
38+
}
39+
40+
// Explicit instantiation with "int*"
41+
template void ff_3(int* ptr, int start, int end);
42+
43+
// CHECK: FunctionDecl {{.*}}__sycl_kernel_{{.*}} 'void (__global int *, int, int)'
44+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_ptr '__global int *'
45+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_start 'int'
46+
// CHECK-NEXT: ParmVarDecl {{.*}} __arg_end 'int'
47+
// CHECK-NEXT: CompoundStmt
48+
// CHECK-NEXT: CallExpr {{.*}} 'void'
49+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int *, int, int)' <FunctionToPointerDecay>
50+
// CHECK-NEXT: DeclRefExpr {{.*}} 'void (int *, int, int)' lvalue Function {{.*}} 'ff_3' 'void (int *, int, int)'
51+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int *' <AddressSpaceConversion>
52+
// CHECK-NEXT: ImplicitCastExpr {{.*}} '__global int *' <LValueToRValue>
53+
// CHECK-NEXT: DeclRefExpr {{.*}} '__global int *' lvalue ParmVar {{.*}} '__arg_ptr' '__global int *'
54+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
55+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '__arg_start' 'int'
56+
// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
57+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '__arg_end' 'int'

sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ struct device_has_key
6161
std::integral_constant<aspect, Aspects>...>;
6262
};
6363

64+
struct nd_range_kernel_key {
65+
template <int Dims>
66+
using value_t =
67+
property_value<nd_range_kernel_key, std::integral_constant<int, Dims>>;
68+
};
69+
70+
struct single_task_kernel_key {
71+
using value_t = property_value<single_task_kernel_key>;
72+
};
73+
6474
template <size_t Dim0, size_t... Dims>
6575
struct property_value<work_group_size_key, std::integral_constant<size_t, Dim0>,
6676
std::integral_constant<size_t, Dims>...> {
@@ -113,6 +123,21 @@ struct property_value<device_has_key,
113123
static constexpr std::array<aspect, sizeof...(Aspects)> value{Aspects...};
114124
};
115125

126+
template <int Dims>
127+
struct property_value<nd_range_kernel_key, std::integral_constant<int, Dims>> {
128+
static_assert(
129+
Dims >= 1 && Dims <= 3,
130+
"nd_range_kernel_key property must use dimension of 1, 2 or 3.");
131+
132+
using key_t = nd_range_kernel_key;
133+
using value_t = int;
134+
static constexpr int dimensions = Dims;
135+
};
136+
137+
template <> struct property_value<single_task_kernel_key> {
138+
using key_t = single_task_kernel_key;
139+
};
140+
116141
template <size_t Dim0, size_t... Dims>
117142
inline constexpr work_group_size_key::value_t<Dim0, Dims...> work_group_size;
118143

@@ -126,6 +151,11 @@ inline constexpr sub_group_size_key::value_t<Size> sub_group_size;
126151
template <aspect... Aspects>
127152
inline constexpr device_has_key::value_t<Aspects...> device_has;
128153

154+
template <int Dims>
155+
inline constexpr nd_range_kernel_key::value_t<Dims> nd_range_kernel;
156+
157+
inline constexpr single_task_kernel_key::value_t single_task_kernel;
158+
129159
struct work_group_progress_key
130160
: detail::compile_time_property_key<detail::PropKind::WorkGroupProgress> {
131161
template <forward_progress_guarantee Guarantee,
@@ -209,6 +239,7 @@ template <> struct is_property_key<sub_group_progress_key> : std::true_type {};
209239
template <> struct is_property_key<work_item_progress_key> : std::true_type {};
210240

211241
namespace detail {
242+
212243
template <size_t Dim0, size_t... Dims>
213244
struct PropertyMetaInfo<work_group_size_key::value_t<Dim0, Dims...>> {
214245
static constexpr const char *name = "sycl-work-group-size";
@@ -230,6 +261,15 @@ struct PropertyMetaInfo<device_has_key::value_t<Aspects...>> {
230261
static constexpr const char *value =
231262
SizeListToStr<static_cast<size_t>(Aspects)...>::value;
232263
};
264+
template <int Dims>
265+
struct PropertyMetaInfo<nd_range_kernel_key::value_t<Dims>> {
266+
static constexpr const char *name = "sycl-nd-range-kernel";
267+
static constexpr int value = Dims;
268+
};
269+
template <> struct PropertyMetaInfo<single_task_kernel_key::value_t> {
270+
static constexpr const char *name = "sycl-single-task-kernel";
271+
static constexpr int value = 0;
272+
};
233273

234274
template <typename T, typename = void>
235275
struct HasKernelPropertiesGetMethod : std::false_type {};
@@ -251,7 +291,6 @@ struct HasKernelPropertiesGetMethod<T,
251291
#ifdef __SYCL_DEVICE_ONLY__
252292
#define SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(PROP) \
253293
[[__sycl_detail__::add_ir_attributes_function( \
254-
{"sycl-device-has"}, \
255294
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \
256295
std::remove_cv_t<std::remove_reference_t<decltype(PROP)>>>::name, \
257296
sycl::ext::oneapi::experimental::detail::PropertyMetaInfo< \

sycl/include/sycl/ext/oneapi/properties/property.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,10 @@ enum PropKind : uint32_t {
203203
WorkGroupProgress = 62,
204204
SubGroupProgress = 63,
205205
WorkItemProgress = 64,
206+
NDRangeKernel = 65,
207+
SingleTaskKernel = 66,
206208
// PropKindSize must always be the last value.
207-
PropKindSize = 65,
209+
PropKindSize = 67,
208210
};
209211

210212
struct property_key_base_tag {};

sycl/source/feature_test.hpp.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ inline namespace _V1 {
105105
#define SYCL_EXT_INTEL_FPGA_TASK_SEQUENCE 1
106106
#define SYCL_EXT_ONEAPI_PRIVATE_ALLOCA 1
107107
#define SYCL_EXT_ONEAPI_FORWARD_PROGRESS 1
108+
#define SYCL_EXT_ONEAPI_FREE_FUNCTION_KERNELS 1
108109

109110
#ifndef __has_include
110111
#define __has_include(x) 0

0 commit comments

Comments
 (0)