Skip to content

Commit 416405b

Browse files
committed
[SYCL] Introduce flag to disable force inlining of kernel lambda
1 parent 39c3cd6 commit 416405b

30 files changed

+221
-54
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA")
279279
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
280280
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
281281
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
282+
LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point")
282283
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
283284
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
284285
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2942,6 +2942,12 @@ defm sycl_unnamed_lambda
29422942
" >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>,
29432943
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
29442944
BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>;
2945+
defm sycl_inline_kernel_lambda
2946+
: BoolFOption<
2947+
"sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">,
2948+
DefaultTrue,
2949+
PosFlag<SetTrue, [], "Allow">, NegFlag<SetFalse, [], "Disallow">,
2950+
BothFlags<[CC1Option, CoreOption], " force inline SYCL kernels lambda in entry point">>;
29452951
def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
29462952
Flags<[NoXarchOption, CoreOption]>, HelpText<"Emit help information from the "
29472953
"related offline compilation tool. Valid values: all, fpga, gen, x86_64.">,

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -838,8 +838,13 @@ class SingleDeviceFunctionTracker {
838838
llvm::SmallVector<FunctionDecl *> CallStack;
839839
VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack);
840840

841-
// Always inline the KernelBody in the kernel entry point.
842-
if (KernelBody) {
841+
// Always inline the KernelBody in the kernel entry point. For ESIMD
842+
// inlining is handled later down the pipeline.
843+
if (KernelBody &&
844+
Parent.SemaRef.getLangOpts().SYCLForceInlineKernelLambda &&
845+
!KernelBody->hasAttr<NoInlineAttr>() &&
846+
!KernelBody->hasAttr<AlwaysInlineAttr>() &&
847+
!KernelBody->hasAttr<SYCLSimdAttr>()) {
843848
KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit(
844849
KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword,
845850
AlwaysInlineAttr::Keyword_forceinline));

clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
@@ -304,19 +304,20 @@ int main() {
304304

305305
// Test attribute is not propagated.
306306
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]]
307+
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
307308
// CHECK-NOT: noalias
308309
// CHECK-SAME: {
309310
// CHECK: define dso_local spir_func void @_Z4foo8v()
310311
Functor10 f10;
311312
h.single_task<class kernel_name32>(f10);
312313

313314
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]]
314-
// CHECK: store ptr addrspace(4) %Foo8{{.*}} !noalias
315+
// CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
315316
Foo8 boo8;
316317
h.single_task<class kernel_name33>(boo8);
317318

318319
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]]
319-
// CHECK: store ptr addrspace(4){{.*}} !noalias
320+
// CHECK: define {{.*}}spir_func void @{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2
320321
h.single_task<class kernel_name34>(
321322
[]() [[intel::kernel_args_restrict]]{});
322323
});

clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
1+
// RUN: %clang -Xclang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s
22
//
33
// Verify the SYCL kernel routine is marked artificial and has the
44
// expected source correlation.
@@ -27,9 +27,14 @@ int main() {
2727
// CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{
2828
// CHECK: getelementptr inbounds %class.anon, {{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]]
2929
// CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]]
30+
// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]]
31+
// CHECK: ret void, !dbg [[LINE_C0:![0-9]+]]
3032
// CHECK: [[KERNEL]] = {{.*}}!DISubprogram(name: "{{.*}}19use_kernel_for_test"
3133
// CHECK-SAME: scope: [[FILE:![0-9]+]],
3234
// CHECK-SAME: file: [[FILE]],
3335
// CHECK-SAME: flags: DIFlagArtificial | DIFlagPrototyped
3436
// CHECK: [[FILE]] = !DIFile(filename: "{{.*}}debug-info-srcpos-kernel.cpp"{{.*}})
3537
// CHECK: [[LINE_A0]] = !DILocation(line: 15,{{.*}}scope: [[KERNEL]]
38+
// CHECK: [[LINE_B0]] = !DILocation(line: 16,{{.*}}scope: [[BLOCK:![0-9]+]]
39+
// CHECK: [[BLOCK]] = distinct !DILexicalBlock(scope: [[KERNEL]]
40+
// CHECK: [[LINE_C0]] = !DILocation(line: 17,{{.*}}scope: [[KERNEL]]

clang/test/CodeGenSYCL/device-functions.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename T>
44
T bar(T arg);
@@ -22,5 +22,6 @@ int main() {
2222
return 0;
2323
}
2424
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel()
25+
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this)
2526
// CHECK: define {{.*}}spir_func void @_Z3foov()
2627
// CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg)

clang/test/CodeGenSYCL/device-variables.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
enum class test_type { value1, value2, value3 };
44

@@ -23,18 +23,19 @@ int main() {
2323
kernel<class test_kernel>([=]() {
2424
// Global variables used directly
2525
foo(global_value);
26-
// CHECK: [[LOAD:%[a-z0-9.]+]] = load ptr addrspace(4){{.*}}
2726
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @{{.*}}global_value to ptr addrspace(4)))
2827
int a = my_array[0];
29-
// CHECK: store i32 42, ptr addrspace(4) %a
28+
// CHECK: [[LOAD:%[0-9]+]] = load i32, ptr addrspace(4)
29+
// CHECK: store i32 [[LOAD]], ptr addrspace(4) %a
3030
int b = some_const;
3131
// Constant used directly
3232
// CHECK: store i32 1, ptr addrspace(4) %b
3333
foo(local_value);
3434
// Local variables and constexprs captured by lambda
35-
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[LOAD]])
35+
// CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 0
36+
// CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[GEP]])
3637
int some_device_local_var = some_local_var;
37-
// CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1
38+
// CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1
3839
// CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]]
3940
// CHECK: store i32 [[LOAD1]], ptr addrspace(4) %some_device_local_var
4041
});

clang/test/CodeGenSYCL/esimd_metadata2.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ __attribute__((sycl_device)) __attribute__((sycl_explicit_simd)) void esimd_func
1010
// CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{
1111
// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1]] {
1212
// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}shared_funcv() #{{[0-9]+}} {
13+
// CHECK-ESIMD-DAG: define linkonce_odr spir_func void @_ZN12ESIMDFunctorclEv({{.*}}) #{{[0-9]+}} {{.*}} !sycl_explicit_simd !{{[0-9]+}} {
1314

1415
class ESIMDFunctor {
1516
public:

clang/test/CodeGenSYCL/kernel-handler.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
2+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
33

44
// This test checks IR generated when kernel_handler argument
55
// (used to handle SYCL 2020 specialization constants) is passed
@@ -33,3 +33,6 @@ void test(int val) {
3333
// NATIVESUPPORT-NOT: load ptr addrspace(1), ptr addrspace(1) %_arg__specialization_constants_buffer.addr, align 8
3434
// NATIVESUPPORT-NOT: addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr
3535
// NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(ptr noundef align 4 nonnull align 1 dereferenceable(1) %kh, ptr noundef align 4 %{{[0-9]+}})
36+
37+
// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}}
38+
// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler")

clang/test/CodeGenSYCL/max-concurrency.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
#include "sycl.hpp"
44

@@ -20,10 +20,48 @@
2020
// CHECK: ret void
2121

2222
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
23+
// CHECK: entry:
24+
// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1
25+
// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4)
26+
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]])
27+
// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]])
28+
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]])
29+
// CHECK: ret void
2330

2431
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]]
32+
// CHECK: entry
33+
// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1
34+
// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4)
35+
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]])
36+
// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]])
37+
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]]
38+
// CHECK: ret void
39+
40+
// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv
41+
// CHECK: entry:
42+
// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8
43+
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4)
44+
// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8
45+
// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8
46+
// CHECK: ret void
2547

2648
// CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5()
49+
// CHECK: entry:
50+
// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1
51+
// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4)
52+
// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]])
53+
// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]])
54+
// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]])
55+
// CHECK: ret void
56+
57+
// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv
58+
// CHECK: entry:
59+
// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8
60+
// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4)
61+
// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8
62+
// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8
63+
// CHECK: call spir_func void @_Z4funcILi2EEvv()
64+
// CHECK: ret void
2765

2866
template <int A>
2967
void max_concurrency() {

clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
// This test checks that compiler generates correct kernel wrapper for basic
44
// case.
@@ -58,3 +58,6 @@ int main() {
5858
// CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[ARANGE]],
5959
// CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[MRANGE]],
6060
// CHECK-SAME: %"struct.sycl::_V1::id"* noundef byval({{.*}}) align 4 [[OID]])
61+
62+
// Check lambda "()" operator call
63+
// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}})

clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s
22

33
// Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]],
44
// [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]],
@@ -304,19 +304,20 @@ int main() {
304304

305305
// Test attribute is not propagated.
306306
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]]
307+
// CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(%class.Functor10 addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
307308
// CHECK-NOT: noalias
308309
// CHECK-SAME: {
309310
// CHECK: define dso_local spir_func void @_Z4foo8v()
310311
Functor10 f10;
311312
h.single_task<class kernel_name32>(f10);
312313

313314
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]]
314-
// CHECK: store %class.Foo8{{.*}} !noalias
315+
// CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(%class.Foo8 addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2
315316
Foo8 boo8;
316317
h.single_task<class kernel_name33>(boo8);
317318

318319
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]]
319-
// CHECK: store %class.anon{{.*}} !noalias
320+
// CHECK: define {{.*}}spir_func void @{{.*}}(%class.anon{{.*}} addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2
320321
h.single_task<class kernel_name34>(
321322
[]() [[intel::kernel_args_restrict]]{});
322323
});

clang/test/CodeGenSYCL/no_opaque_device-functions.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
template <typename T>
44
T bar(T arg);
@@ -22,5 +22,6 @@ int main() {
2222
return 0;
2323
}
2424
// CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel()
25+
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this)
2526
// CHECK: define {{.*}}spir_func void @_Z3foov()
2627
// CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg)

clang/test/CodeGenSYCL/no_opaque_device-variables.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
22

33
enum class test_type { value1, value2, value3 };
44

@@ -25,16 +25,17 @@ int main() {
2525
foo(global_value);
2626
// CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) addrspacecast (i32 addrspace(1)* @{{.*}}global_value to i32 addrspace(4)*))
2727
int a = my_array[0];
28-
// CHECK: store i32 42, i32 addrspace(4)* %a
28+
// CHECK: [[LOAD:%[0-9]+]] = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @{{.*}}my_array to [1 x i32] addrspace(4)*), i64 0, i64 0)
29+
// CHECK: store i32 [[LOAD]], i32 addrspace(4)* %a
2930
int b = some_const;
3031
// Constant used directly
3132
// CHECK: store i32 1, i32 addrspace(4)* %b
3233
foo(local_value);
3334
// Local variables and constexprs captured by lambda
34-
// CHECK: [[GEP:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0
35+
// CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0
3536
// CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) [[GEP]])
3637
int some_device_local_var = some_local_var;
37-
// CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1
38+
// CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1
3839
// CHECK: [[LOAD1:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]]
3940
// CHECK: store i32 [[LOAD1]], i32 addrspace(4)* %some_device_local_var
4041
});

0 commit comments

Comments
 (0)