Skip to content

Commit 7a9d3b1

Browse files
authored
[SYCL][NVPTX] Do not decompose SYCL functor unless necessary (#14434)
CUDA backend can support passing pointer in the generic address space. The patch prevent the decomposition of the SYCL functor if there is no special types in it. --------- Signed-off-by: Victor Lomuller <[email protected]>
1 parent ac98c33 commit 7a9d3b1

File tree

10 files changed

+246
-78
lines changed

10 files changed

+246
-78
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
304304
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
305305
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
306306
LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels")
307+
LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor")
307308
LANGOPT(
308309
SYCLValueFitInMaxInt, 1, 1,
309310
"SYCL compiler assumes value fits within MAX_INT for member function of "

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4028,6 +4028,13 @@ defm sycl_instrument_device_code
40284028
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], " Instrumentation and Tracing "
40294029
"Technology (ITT) instrumentation intrinsics calls "
40304030
"(experimental)">>;
4031+
defm sycl_decompose_functor
4032+
: BoolFOption<"sycl-decompose-functor",
4033+
LangOpts<"SYCLDecomposeStruct">, DefaultTrue,
4034+
PosFlag<SetTrue, [], [ClangOption], "Do">,
4035+
NegFlag<SetFalse, [], [ClangOption], "Do not">,
4036+
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option],
4037+
" decompose SYCL functor if possible (experimental, CUDA only)">>;
40314038
def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">,
40324039
Group<Link_Group>, HelpText<"Generate and use a custom linker script for huge"
40334040
" device code sections">;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5447,6 +5447,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
54475447
CmdArgs.push_back("-fsycl-allow-func-ptr");
54485448
}
54495449

5450+
Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor,
5451+
options::OPT_fno_sycl_decompose_functor);
5452+
54505453
// Forward -fsycl-instrument-device-code option to cc1. This option will
54515454
// only be used for SPIR/SPIR-V based targets.
54525455
if (Triple.isSPIROrSPIRV())

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 148 additions & 44 deletions
Large diffs are not rendered by default.

clang/test/CodeGenSYCL/kernel-handler.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
22
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT
33

44
// This test checks IR generated when kernel_handler argument
@@ -23,7 +23,8 @@ void test(int val) {
2323
}
2424

2525
// ALL: define dso_local{{ spir_kernel | }}void @{{.*}}test_kernel_handler{{[^(]*}}
26-
// ALL-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
26+
// NONATIVESUPPORT-SAME: (ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
27+
// NATIVESUPPORT-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
2728
// ALL: %kh = alloca %"class.sycl::_V1::kernel_handler", align 1
2829

2930
// NONATIVESUPPORT: %[[KH:[0-9]+]] = load ptr addrspace(1), ptr %_arg__specialization_constants_buffer.addr, align 8

clang/test/CodeGenSYCL/nvvm-annotations.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
2-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
1+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
2+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
33

4-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
5-
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
4+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
5+
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
66

77
// Tests that certain SYCL kernel parameters are annotated with "grid_constant" for supported microarchitectures.
88

@@ -18,7 +18,7 @@ int main() {
1818
} s;
1919

2020
q.submit([&](handler &h) {
21-
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%struct.S) align 4 %_arg_s)
21+
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor)
2222
h.single_task<class kernel_grid_const_params>([=]() { (void) s;});
2323
});
2424

clang/test/Driver/sycl-offload.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,13 @@
542542
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd
543543
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib
544544
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib
545+
546+
/// ###########################################################################
547+
548+
/// Check -fsycl-decompose-functor behaviors from source
549+
// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
550+
// RUN: | FileCheck -check-prefix=CHK-DECOMP %s
551+
// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
552+
// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s
553+
// CHK-DECOMP: -fsycl-decompose-functor
554+
// CHK-NODECOMP: -fno-sycl-decompose-functor

clang/test/SemaSYCL/kernel-arg-opt-report.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,10 @@ int main() {
348348
// SPIR-NEXT: String: 'Arg '
349349
// SPIR-NEXT: Argument: '13'
350350
// SPIR-NEXT: String: ':'
351-
// SPIR-NEXT: String: ''
352-
// SPIR-NEXT: String: A
351+
// SPIR-NEXT: String: 'Compiler generated argument for decomposed struct/class,'
352+
// SPIR-NEXT: String: KernelFunctor
353353
// SPIR-NEXT: String: ' ('
354-
// SPIR-NEXT: String: ''
354+
// SPIR-NEXT: String: 'Field:A, '
355355
// SPIR-NEXT: String: 'Type:'
356356
// SPIR-NEXT: String: int
357357
// SPIR-NEXT: String: ', '

clang/test/SemaSYCL/kernel-handler.cpp

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
1+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
22
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT
33

44
// This test checks that the compiler handles kernel_handler type (for
@@ -28,19 +28,12 @@ int main() {
2828
}
2929

3030
// Check test_kernel_handler parameters
31-
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void (int, __global char *)'
32-
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
31+
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
32+
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}'
3333
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'
3434

35-
// Check declaration and initialization of kernel object local clone
36-
// NONATIVESUPPORT-NEXT: CompoundStmt
37-
// NONATIVESUPPORT-NEXT: DeclStmt
38-
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
39-
// NONATIVESUPPORT-NEXT: InitListExpr
40-
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
41-
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'
42-
4335
// Check declaration and initialization of kernel handler local clone using default constructor
36+
// NONATIVESUPPORT-NEXT: CompoundStmt
4437
// NONATIVESUPPORT-NEXT: DeclStmt
4538
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
4639
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
@@ -58,26 +51,19 @@ int main() {
5851
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'void (sycl::kernel_handler) const' lvalue CXXMethod {{.*}} 'operator()' 'void (sycl::kernel_handler) const'
5952
// Kernel body with clones
6053
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
61-
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
54+
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '_arg__sycl_functor' '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
6255
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}} 'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
6356
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const kernel_handler':'const sycl::kernel_handler' lvalue
6457
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'kernel_handler':'sycl::kernel_handler' lvalue Var {{.*}} 'kh' 'kernel_handler':'sycl::kernel_handler'
6558

6659
// Check test_pfwg_kernel_handler parameters
67-
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void (int, __global char *)'
68-
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
69-
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'
70-
71-
// Check declaration and initialization of kernel object local clone
72-
// NONATIVESUPPORT-NEXT: CompoundStmt
73-
// NONATIVESUPPORT-NEXT: DeclStmt
74-
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
75-
// NONATIVESUPPORT-NEXT: InitListExpr
76-
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
77-
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'
60+
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
61+
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
7862
// NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup
63+
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'
7964

8065
// Check declaration and initialization of kernel handler local clone using default constructor
66+
// NONATIVESUPPORT-NEXT: CompoundStmt
8167
// NONATIVESUPPORT-NEXT: DeclStmt
8268
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
8369
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
@@ -96,7 +82,7 @@ int main() {
9682

9783
// Kernel body with clones
9884
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
99-
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
85+
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
10086
// NONATIVESUPPORT-NEXT: CXXTemporaryObjectExpr {{.*}} 'group<1>':'sycl::group<>' 'void () noexcept' zeroing
10187
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'kernel_handler':'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
10288
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}}'const sycl::kernel_handler' lvalue

clang/test/SemaSYCL/no-decomp.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP
2+
// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
3+
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP
4+
5+
#include "Inputs/sycl.hpp"
6+
7+
class with_acc {
8+
public:
9+
int *d;
10+
sycl::accessor<char, 1, sycl::access::mode::read> AccField;
11+
};
12+
13+
class wrapping_acc {
14+
public:
15+
with_acc acc;
16+
void operator()() const {
17+
}
18+
};
19+
20+
class pointer_wrap {
21+
public:
22+
int *d;
23+
void operator()() const {
24+
}
25+
};
26+
27+
class empty {
28+
public:
29+
void operator()() const {
30+
}
31+
};
32+
33+
int main() {
34+
sycl::queue q;
35+
36+
q.submit([&](sycl::handler &cgh) {
37+
wrapping_acc acc;
38+
cgh.single_task(acc);
39+
});
40+
// ALL: FunctionDecl {{.*}} _ZTS12wrapping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)'
41+
42+
q.submit([&](sycl::handler &cgh) {
43+
pointer_wrap ptr;
44+
cgh.single_task(ptr);
45+
});
46+
// NODECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (pointer_wrap)'
47+
// DECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (__global int *)'
48+
49+
q.submit([&](sycl::handler &cgh) {
50+
empty e;
51+
cgh.single_task(e);
52+
});
53+
// ALL: FunctionDecl {{.*}} _ZTS5empty 'void ()'
54+
55+
return 0;
56+
}

0 commit comments

Comments
 (0)