Skip to content

[SYCL][NVPTX] Do not decompose SYCL functor unless necessary #14434

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels")
LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor")
LANGOPT(
SYCLValueFitInMaxInt, 1, 1,
"SYCL compiler assumes value fits within MAX_INT for member function of "
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4028,6 +4028,13 @@ defm sycl_instrument_device_code
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], " Instrumentation and Tracing "
"Technology (ITT) instrumentation intrinsics calls "
"(experimental)">>;
defm sycl_decompose_functor
: BoolFOption<"sycl-decompose-functor",
LangOpts<"SYCLDecomposeStruct">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption], "Do">,
NegFlag<SetFalse, [], [ClangOption], "Do not">,
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option],
" decompose SYCL functor if possible (experimental, CUDA only)">>;
def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">,
Group<Link_Group>, HelpText<"Generate and use a custom linker script for huge"
" device code sections">;
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5447,6 +5447,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fsycl-allow-func-ptr");
}

Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor,
options::OPT_fno_sycl_decompose_functor);

// Forward -fsycl-instrument-device-code option to cc1. This option will
// only be used for SPIR/SPIR-V based targets.
if (Triple.isSPIROrSPIRV())
Expand Down
192 changes: 148 additions & 44 deletions clang/lib/Sema/SemaSYCL.cpp

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions clang/test/CodeGenSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT

// This test checks IR generated when kernel_handler argument
Expand All @@ -23,7 +23,8 @@ void test(int val) {
}

// ALL: define dso_local{{ spir_kernel | }}void @{{.*}}test_kernel_handler{{[^(]*}}
// ALL-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// NONATIVESUPPORT-SAME: (ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// NATIVESUPPORT-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// ALL: %kh = alloca %"class.sycl::_V1::kernel_handler", align 1

// NONATIVESUPPORT: %[[KH:[0-9]+]] = load ptr addrspace(1), ptr %_arg__specialization_constants_buffer.addr, align 8
Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGenSYCL/nvvm-annotations.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST

// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST

// Tests that certain SYCL kernel parameters are annotated with "grid_constant" for supported microarchitectures.

Expand All @@ -18,7 +18,7 @@ int main() {
} s;

q.submit([&](handler &h) {
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%struct.S) align 4 %_arg_s)
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor)
h.single_task<class kernel_grid_const_params>([=]() { (void) s;});
});

Expand Down
10 changes: 10 additions & 0 deletions clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,13 @@
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib

/// ###########################################################################

/// Check -fsycl-decompose-functor behaviors from source
// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DECOMP %s
// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s
// CHK-DECOMP: -fsycl-decompose-functor
// CHK-NODECOMP: -fno-sycl-decompose-functor
6 changes: 3 additions & 3 deletions clang/test/SemaSYCL/kernel-arg-opt-report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,10 @@ int main() {
// SPIR-NEXT: String: 'Arg '
// SPIR-NEXT: Argument: '13'
// SPIR-NEXT: String: ':'
// SPIR-NEXT: String: ''
// SPIR-NEXT: String: A
// SPIR-NEXT: String: 'Compiler generated argument for decomposed struct/class,'
// SPIR-NEXT: String: KernelFunctor
// SPIR-NEXT: String: ' ('
// SPIR-NEXT: String: ''
// SPIR-NEXT: String: 'Field:A, '
// SPIR-NEXT: String: 'Type:'
// SPIR-NEXT: String: int
// SPIR-NEXT: String: ', '
Expand Down
34 changes: 10 additions & 24 deletions clang/test/SemaSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT

// This test checks that the compiler handles kernel_handler type (for
Expand Down Expand Up @@ -28,19 +28,12 @@ int main() {
}

// Check test_kernel_handler parameters
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void (int, __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel object local clone
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
// NONATIVESUPPORT-NEXT: InitListExpr
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'

// Check declaration and initialization of kernel handler local clone using default constructor
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
Expand All @@ -58,26 +51,19 @@ int main() {
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'void (sycl::kernel_handler) const' lvalue CXXMethod {{.*}} 'operator()' 'void (sycl::kernel_handler) const'
// Kernel body with clones
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '_arg__sycl_functor' '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}} 'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const kernel_handler':'const sycl::kernel_handler' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'kernel_handler':'sycl::kernel_handler' lvalue Var {{.*}} 'kh' 'kernel_handler':'sycl::kernel_handler'

// Check test_pfwg_kernel_handler parameters
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void (int, __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel object local clone
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
// NONATIVESUPPORT-NEXT: InitListExpr
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel handler local clone using default constructor
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
Expand All @@ -96,7 +82,7 @@ int main() {

// Kernel body with clones
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: CXXTemporaryObjectExpr {{.*}} 'group<1>':'sycl::group<>' 'void () noexcept' zeroing
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'kernel_handler':'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}}'const sycl::kernel_handler' lvalue
Expand Down
56 changes: 56 additions & 0 deletions clang/test/SemaSYCL/no-decomp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP
// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP

#include "Inputs/sycl.hpp"

class with_acc {
public:
int *d;
sycl::accessor<char, 1, sycl::access::mode::read> AccField;
};

class wrapping_acc {
public:
with_acc acc;
void operator()() const {
}
};

class pointer_wrap {
public:
int *d;
void operator()() const {
}
};

class empty {
public:
void operator()() const {
}
};

int main() {
sycl::queue q;

q.submit([&](sycl::handler &cgh) {
wrapping_acc acc;
cgh.single_task(acc);
});
// ALL: FunctionDecl {{.*}} _ZTS12wrapping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)'

q.submit([&](sycl::handler &cgh) {
pointer_wrap ptr;
cgh.single_task(ptr);
});
// NODECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (pointer_wrap)'
// DECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (__global int *)'

q.submit([&](sycl::handler &cgh) {
empty e;
cgh.single_task(e);
});
// ALL: FunctionDecl {{.*}} _ZTS5empty 'void ()'

return 0;
}
Loading