Skip to content

Commit 39c3cd6

Browse files
committed
[SYCL] Always inline kernel lambda operator in entry point
This patch marks the `operator()` of the kernel lambda as `always_inline` so that it gets inlined into the kernel entry point. Kernel entry point are functions that take the captured variables as parameters, create a lambda object from that, setup the index structs and then call `operator()` on the lambda. Inlining the operator into the entry point should be beneficial in most cases as it allows the compiler to optimize out the lambda creation, which can be very important for kernels capturing a lot of variables. In a lot of cases the inliner will already do it, but when it doesn't it can lead to very confusing performance implications since the kernel entry point isn't directly visible to users.
1 parent c44a84a commit 39c3cd6

26 files changed

+33
-145
lines changed

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,13 @@ class SingleDeviceFunctionTracker {
837837
CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel);
838838
llvm::SmallVector<FunctionDecl *> CallStack;
839839
VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack);
840+
841+
// Always inline the KernelBody in the kernel entry point.
842+
if (KernelBody) {
843+
KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit(
844+
KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword,
845+
AlwaysInlineAttr::Keyword_forceinline));
846+
}
840847
}
841848

842849
public:

clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,20 +304,19 @@ int main() {
304304

305305
// Test attribute is not propagated.
306306
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]]
307-
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
308307
// CHECK-NOT: noalias
309308
// CHECK-SAME: {
310309
// CHECK: define dso_local spir_func void @_Z4foo8v()
311310
Functor10 f10;
312311
h.single_task<class kernel_name32>(f10);
313312

314313
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]]
315-
// CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
314+
// CHECK: store ptr addrspace(4) %Foo8{{.*}} !noalias
316315
Foo8 boo8;
317316
h.single_task<class kernel_name33>(boo8);
318317

319318
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]]
320-
// CHECK: define {{.*}}spir_func void @{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2
319+
// CHECK: store ptr addrspace(4){{.*}} !noalias
321320
h.single_task<class kernel_name34>(
322321
[]() [[intel::kernel_args_restrict]]{});
323322
});

clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,9 @@ int main() {
2727
// CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{
2828
// CHECK: getelementptr inbounds %class.anon, {{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]]
2929
// CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]]
30-
// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]]
31-
// CHECK: ret void, !dbg [[LINE_C0:![0-9]+]]
3230
// CHECK: [[KERNEL]] = {{.*}}!DISubprogram(name: "{{.*}}19use_kernel_for_test"
3331
// CHECK-SAME: scope: [[FILE:![0-9]+]],
3432
// CHECK-SAME: file: [[FILE]],
3533
// CHECK-SAME: flags: DIFlagArtificial | DIFlagPrototyped
3634
// CHECK: [[FILE]] = !DIFile(filename: "{{.*}}debug-info-srcpos-kernel.cpp"{{.*}})
3735
// CHECK: [[LINE_A0]] = !DILocation(line: 15,{{.*}}scope: [[KERNEL]]
38-
// CHECK: [[LINE_B0]] = !DILocation(line: 16,{{.*}}scope: [[BLOCK:![0-9]+]]
39-
// CHECK: [[BLOCK]] = distinct !DILexicalBlock(scope: [[KERNEL]]
40-
// CHECK: [[LINE_C0]] = !DILocation(line: 17,{{.*}}scope: [[KERNEL]]

clang/test/CodeGenSYCL/device-functions.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,5 @@ int main() {
2222
return 0;
2323
}
2424
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel()
25-
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this)
2625
// CHECK: define {{.*}}spir_func void @_Z3foov()
2726
// CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg)

clang/test/CodeGenSYCL/device-variables.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,18 @@ int main() {
2323
kernel<class test_kernel>([=]() {
2424
// Global variables used directly
2525
foo(global_value);
26+
// CHECK: [[LOAD:%[a-z0-9.]+]] = load ptr addrspace(4){{.*}}
2627
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @{{.*}}global_value to ptr addrspace(4)))
2728
int a = my_array[0];
28-
// CHECK: [[LOAD:%[0-9]+]] = load i32, ptr addrspace(4)
29-
// CHECK: store i32 [[LOAD]], ptr addrspace(4) %a
29+
// CHECK: store i32 42, ptr addrspace(4) %a
3030
int b = some_const;
3131
// Constant used directly
3232
// CHECK: store i32 1, ptr addrspace(4) %b
3333
foo(local_value);
3434
// Local variables and constexprs captured by lambda
35-
// CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 0
36-
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[GEP]])
35+
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[LOAD]])
3736
int some_device_local_var = some_local_var;
38-
// CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1
37+
// CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1
3938
// CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]]
4039
// CHECK: store i32 [[LOAD1]], ptr addrspace(4) %some_device_local_var
4140
});

clang/test/CodeGenSYCL/esimd_metadata2.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ __attribute__((sycl_device)) __attribute__((sycl_explicit_simd)) void esimd_func
1010
// CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{
1111
// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1]] {
1212
// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}shared_funcv() #{{[0-9]+}} {
13-
// CHECK-ESIMD-DAG: define linkonce_odr spir_func void @_ZN12ESIMDFunctorclEv({{.*}}) #{{[0-9]+}} {{.*}} !sycl_explicit_simd !{{[0-9]+}} {
1413

1514
class ESIMDFunctor {
1615
public:

clang/test/CodeGenSYCL/kernel-handler.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,3 @@ void test(int val) {
3333
// NATIVESUPPORT-NOT: load ptr addrspace(1), ptr addrspace(1) %_arg__specialization_constants_buffer.addr, align 8
3434
// NATIVESUPPORT-NOT: addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr
3535
// NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(ptr noundef align 4 nonnull align 1 dereferenceable(1) %kh, ptr noundef align 4 %{{[0-9]+}})
36-
37-
// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}}
38-
// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler")

clang/test/CodeGenSYCL/max-concurrency.cpp

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -20,48 +20,10 @@
2020
// CHECK: ret void
2121

2222
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
23-
// CHECK: entry:
24-
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
25-
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4)
26-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]])
27-
// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]])
28-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]])
29-
// CHECK: ret void
3023

3124
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
32-
// CHECK: entry
33-
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
34-
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4)
35-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]])
36-
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]])
37-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]]
38-
// CHECK: ret void
39-
40-
// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
41-
// CHECK: entry:
42-
// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8
43-
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4)
44-
// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8
45-
// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8
46-
// CHECK: ret void
4725

4826
// CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5()
49-
// CHECK: entry:
50-
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
51-
// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4)
52-
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]])
53-
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]])
54-
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]])
55-
// CHECK: ret void
56-
57-
// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
58-
// CHECK: entry:
59-
// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8
60-
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4)
61-
// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8
62-
// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8
63-
// CHECK: call spir_func void @_Z4funcILi2EEvv()
64-
// CHECK: ret void
6527

6628
template <int A>
6729
void max_concurrency() {

clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,3 @@ int main() {
5858
// CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[ARANGE]],
5959
// CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[MRANGE]],
6060
// CHECK-SAME: %"struct.sycl::_V1::id"* noundef byval({{.*}}) align 4 [[OID]])
61-
62-
// Check lambda "()" operator call
63-
// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}})

clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,20 +304,19 @@ int main() {
304304

305305
// Test attribute is not propagated.
306306
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]]
307-
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(%class.Functor10 addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
308307
// CHECK-NOT: noalias
309308
// CHECK-SAME: {
310309
// CHECK: define dso_local spir_func void @_Z4foo8v()
311310
Functor10 f10;
312311
h.single_task<class kernel_name32>(f10);
313312

314313
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]]
315-
// CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(%class.Foo8 addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
314+
// CHECK: store %class.Foo8{{.*}} !noalias
316315
Foo8 boo8;
317316
h.single_task<class kernel_name33>(boo8);
318317

319318
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]]
320-
// CHECK: define {{.*}}spir_func void @{{.*}}(%class.anon{{.*}} addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2
319+
// CHECK: store %class.anon{{.*}} !noalias
321320
h.single_task<class kernel_name34>(
322321
[]() [[intel::kernel_args_restrict]]{});
323322
});

clang/test/CodeGenSYCL/no_opaque_device-functions.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,5 @@ int main() {
2222
return 0;
2323
}
2424
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel()
25-
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this)
2625
// CHECK: define {{.*}}spir_func void @_Z3foov()
2726
// CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg)

clang/test/CodeGenSYCL/no_opaque_device-variables.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,16 @@ int main() {
2525
foo(global_value);
2626
// CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) addrspacecast (i32 addrspace(1)* @{{.*}}global_value to i32 addrspace(4)*))
2727
int a = my_array[0];
28-
// CHECK: [[LOAD:%[0-9]+]] = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @{{.*}}my_array to [1 x i32] addrspace(4)*), i64 0, i64 0)
29-
// CHECK: store i32 [[LOAD]], i32 addrspace(4)* %a
28+
// CHECK: store i32 42, i32 addrspace(4)* %a
3029
int b = some_const;
3130
// Constant used directly
3231
// CHECK: store i32 1, i32 addrspace(4)* %b
3332
foo(local_value);
3433
// Local variables and constexprs captured by lambda
35-
// CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0
34+
// CHECK: [[GEP:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0
3635
// CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) [[GEP]])
3736
int some_device_local_var = some_local_var;
38-
// CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1
37+
// CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1
3938
// CHECK: [[LOAD1:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]]
4039
// CHECK: store i32 [[LOAD1]], i32 addrspace(4)* %some_device_local_var
4140
});

clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,3 @@ void test(int val) {
3333
// NATIVESUPPORT-NOT: load i8 addrspace(1)*, i8 addrspace(1)** %_arg__specialization_constants_buffer.addr, align 8
3434
// NATIVESUPPORT-NOT: addrspacecast i8 addrspace(1)* %{{[0-9]+}} to i8*
3535
// NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(%"class.sycl::_V1::kernel_handler"* noundef align 4 nonnull align 1 dereferenceable(1) %kh, i8* noundef align 4 %{{[0-9]+}})
36-
37-
// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}}
38-
// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler")

clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,54 +19,10 @@
1919
// CHECK: ret void
2020

2121
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
22-
// CHECK: entry:
23-
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
24-
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast [[CLASS_F1]]* [[F1]] to [[CLASS_F1]] addrspace(4)*
25-
// CHECK: [[TMP0:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8*
26-
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]])
27-
// CHECK: call spir_func void @_ZNK8Functor1clEv([[CLASS_F1]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]])
28-
// CHECK: [[TMP1:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8*
29-
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP1]])
30-
// CHECK: ret void
3122

3223
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
33-
// CHECK: entry
34-
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
35-
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast [[CLASS_F3]]* [[F3]] to [[CLASS_F3]] addrspace(4)*
36-
// CHECK: [[TMP2:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8*
37-
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP2]])
38-
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv([[CLASS_F3]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]])
39-
// CHECK: [[TMP3:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8*
40-
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP3]]
41-
// CHECK: ret void
42-
43-
// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
44-
// CHECK: entry:
45-
// CHECK: [[ADDR_1:%.*]] = alloca [[CLASS_F3:%.*]] addrspace(4)*, align 8
46-
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[CLASS_F3]] addrspace(4)** [[ADDR_1]] to [[CLASS_F3]] addrspace(4)* addrspace(4)*
47-
// CHECK: store [[CLASS_F3]] addrspace(4)* %this, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
48-
// CHECK: %this1 = load [[CLASS_F3]] addrspace(4)*, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
49-
// CHECK: ret void
5024

5125
// CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5()
52-
// CHECK: entry:
53-
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
54-
// CHECK: [[H2:%.*]] = addrspacecast [[H]]* [[H1]] to [[H]] addrspace(4)*
55-
// CHECK: [[H3:%.*]] = bitcast [[H]]* [[H1]] to i8*
56-
// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[H3]])
57-
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv([[H]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[H2]])
58-
// CHECK: [[TMP4:%.*]] = bitcast [[H]]* [[H1]] to i8*
59-
// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP4]])
60-
// CHECK: ret void
61-
62-
// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
63-
// CHECK: entry:
64-
// CHECK: [[ADDR_1:%.*]] = alloca [[HH:%.*]] addrspace(4)*, align 8
65-
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[HH]] addrspace(4)** [[ADDR_1]] to [[HH]] addrspace(4)* addrspace(4)*
66-
// CHECK: store [[HH]] addrspace(4)* %this, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
67-
// CHECK: %this1 = load [[HH]] addrspace(4)*, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8
68-
// CHECK: call spir_func void @_Z4funcILi2EEvv()
69-
// CHECK: ret void
7026

7127
template <int A>
7228
void max_concurrency() {

clang/test/CodeGenSYCL/no_opaque_sampler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s
22
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
33
// CHECK-NEXT: entry:
4-
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8
4+
// CHECK: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8
55
// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8
66
// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)*
77
// CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
88
// CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8*
9-
// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4
9+
// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]])
1010
// CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0
1111
// CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
1212
// CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]])

clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
55
kernelFunc();
66
}
77

8+
void myFunc() { }
9+
810
int main() {
911

1012
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function()
1113

12-
// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}})
14+
// CHECK: call spir_func void @_Z6myFuncv()
1315

14-
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.{{.*}}anon addrspace(4)* {{[^,]*}} %this)
16+
// CHECK: define {{.*}}spir_func void @_Z6myFuncv()
1517

16-
kernel_single_task<class kernel_function>([]() {});
18+
kernel_single_task<class kernel_function>([]() { myFunc(); });
1719
return 0;
1820
}

clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ int main() {
2323
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4test9enum_typeE15kernel_function(i32 noundef %_arg_val)
2424

2525
// CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)*
26-
// CHECK: call spir_func void @_ZZ4test9enum_typeENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}})
2726

2827
test( enum_type::B );
2928
return 0;

clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,24 @@ class Foo {
2626
int main() {
2727
q.submit([&](handler &h) {
2828
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
29-
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3029
h.single_task<class test_kernel1>(
3130
FuncObj());
3231

3332
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
34-
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3533
Foo f;
3634
h.single_task<class test_kernel2>(f);
3735

3836
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
3937
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3()
4038
// CHECK-NOT: !stall_enable
4139
// CHECK-SAME: {
42-
// CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]]
4340
h.single_task<class test_kernel3>(
4441
[]() { func(); });
4542

4643
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
4744
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
4845
// CHECK-NOT: !stall_enable
4946
// CHECK-SAME: {
50-
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]]
5147
h.single_task<class test_kernel4>(
5248
[]() { func1(); });
5349

0 commit comments

Comments
 (0)