Skip to content

Commit f53ede9

Browse files
mdtoguchiFznamznonbadersergey-semenov
authored
[Driver][SYCL] Enable Dead Parameter Elimination Optimization (#2340)
Behavior is off by default. Added support for -fsycl-dead-args-optimization to control enabling. Also updated tests to use option to exercise the optimization. Signed-off-by: Michael D Toguchi <[email protected]> Co-authored-by: Mariya Podchishchaeva <[email protected]> Co-authored-by: Alexey Bader <[email protected]> Co-authored-by: Sergey Semenov <[email protected]>
1 parent 36f6ab6 commit f53ede9

File tree

12 files changed

+43
-12
lines changed

12 files changed

+43
-12
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3551,7 +3551,12 @@ def fsycl_esimd : Flag<["-"], "fsycl-explicit-simd">, Group<sycl_Group>, Flags<[
35513551
def fno_sycl_esimd : Flag<["-"], "fno-sycl-explicit-simd">, Group<sycl_Group>,
35523552
HelpText<"Disable SYCL explicit SIMD extension">, Flags<[NoArgumentUnused, CoreOption]>;
35533553
defm sycl_early_optimizations : OptOutFFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>;
3554-
3554+
def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">,
3555+
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables "
3556+
"elimination of DPC++ dead kernel arguments">;
3557+
def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimization">,
3558+
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables "
3559+
"elimination of DPC++ dead kernel arguments">;
35553560
//===----------------------------------------------------------------------===//
35563561
// CC1 Options
35573562
//===----------------------------------------------------------------------===//

clang/lib/Driver/Driver.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3514,6 +3514,9 @@ class OffloadingActionBuilder final {
35143514
/// Flag to signal if the user requested device code split.
35153515
bool DeviceCodeSplit = false;
35163516

3517+
/// Flag to signal if DAE optimization is turned on.
3518+
bool EnableDAE = false;
3519+
35173520
/// The SYCL actions for the current input.
35183521
ActionList SYCLDeviceActions;
35193522

@@ -3951,7 +3954,7 @@ class OffloadingActionBuilder final {
39513954
ActionList WrapperInputs;
39523955
// post link is not optional - even if not splitting, always need to
39533956
// process specialization constants
3954-
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit;
3957+
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit || EnableDAE;
39553958
types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps
39563959
? types::TY_LLVM_BC
39573960
: types::TY_Tempfiletable;
@@ -4108,6 +4111,9 @@ class OffloadingActionBuilder final {
41084111
WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ);
41094112
auto *DeviceCodeSplitArg =
41104113
Args.getLastArg(options::OPT_fsycl_device_code_split_EQ);
4114+
EnableDAE =
4115+
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
4116+
options::OPT_fno_sycl_dead_args_optimization, false);
41114117
// -fsycl-device-code-split is an alias to
41124118
// -fsycl-device-code-split=per_source
41134119
DeviceCodeSplit = DeviceCodeSplitArg &&

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4123,6 +4123,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
41234123
CmdArgs.push_back("-mllvm");
41244124
CmdArgs.push_back("-sycl-opt");
41254125
}
4126+
// Turn on Dead Parameter Elimination Optimization with early optimizations
4127+
if (!RawTriple.isNVPTX() &&
4128+
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
4129+
options::OPT_fno_sycl_dead_args_optimization, false))
4130+
CmdArgs.push_back("-fenable-sycl-dae");
41264131

41274132
// Pass the triple of host when doing SYCL
41284133
auto AuxT = llvm::Triple(llvm::sys::getProcessTriple());
@@ -7807,6 +7812,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
78077812
// OPT_fsycl_device_code_split is not checked as it is an alias to
78087813
// -fsycl-device-code-split=per_source
78097814

7815+
// Turn on Dead Parameter Elimination Optimization with early optimizations
7816+
if (!getToolChain().getTriple().isNVPTX() &&
7817+
TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization,
7818+
options::OPT_fno_sycl_dead_args_optimization, false))
7819+
addArgs(CmdArgs, TCArgs, {"-emit-param-info"});
78107820
if (JA.getType() == types::TY_LLVM_BC) {
78117821
// single file output requested - this means only perform necessary IR
78127822
// transformations (like specialization constant intrinsic lowering) and

clang/test/Driver/sycl-device-optimizations.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,11 @@
2828
// RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 \
2929
// RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-EARLY-OPTS %s
3030
// CHECK-NO-SYCL-EARLY-OPTS: "-fno-sycl-early-optimizations"
31+
32+
/// Check that Dead Parameter Elimination Optimization is enabled
33+
// RUN: %clang -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
34+
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
35+
// RUN: %clang_cl -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
36+
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
37+
// CHECK-DAE: clang{{.*}} "-fenable-sycl-dae"
38+
// CHECK-DAE: sycl-post-link{{.*}} "-emit-param-info"

sycl/test/basic_tests/sampler/sampler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out -L %opencl_libs_dir -lOpenCL
22
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
33
// RUN: %CPU_RUN_PLACEHOLDER %t.out
44
// RUN: %GPU_RUN_PLACEHOLDER %t.out

sycl/test/kernel_from_file/hw.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// CUDA does not support SPIR-V.
33

44
//-fsycl-targets=%sycl_triple
5-
// RUN: %clangxx -fsycl-device-only -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
5+
// Runtime assumes that the image passed with SYCL_USE_KERNEL_SPV has no
6+
// eliminated arguments, compile without early optimizations.
7+
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
68
// RUN: %clangxx -include %t.h -g %s -o %t.out -lsycl -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning
79
// RUN: env SYCL_BE=%sycl_be SYCL_USE_KERNEL_SPV=%t.spv %t.out | FileCheck %s
810
// CHECK: Passed

sycl/test/multi_ptr/multi_ptr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out
22
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
33
// RUN: %CPU_RUN_PLACEHOLDER %t.out
44
// RUN: %GPU_RUN_PLACEHOLDER %t.out
55
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6-
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
6+
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t1.out
77
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
88
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
99
// RUN: %GPU_RUN_PLACEHOLDER %t1.out

sycl/test/scheduler/HandleException.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
22
// RUN: %CPU_RUN_PLACEHOLDER %t.out
33
#include <CL/sycl.hpp>
44
#include <array>

sycl/test/scheduler/HostAccDestruction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clangxx -fsycl -I %sycl_source_dir %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
22
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
33
//==---------------------- HostAccDestruction.cpp --------------------------==//
44
//

sycl/test/scheduler/ReleaseResourcesTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
22
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
33
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
44
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER

sycl/test/separate-compile/test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
//
44
// >> ---- compile src1
55
// >> device compilation...
6-
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
6+
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
77
// >> host compilation...
88
// RUN: %clangxx -include sycl_ihdr_a.h -g -c %s -o a.o -I %sycl_include -Wno-sycl-strict
99
//
1010
// >> ---- compile src2
1111
// >> device compilation...
12-
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
12+
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
1313
// >> host compilation...
1414
// RUN: %clangxx -DB_CPP=1 -include sycl_ihdr_b.h -g -c %s -o b.o -I %sycl_include -Wno-sycl-strict
1515
//

sycl/test/usm/pfor_flatten.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// UNSUPPORTED: cuda
22
// CUDA does not support the unnamed lambda extension.
33
//
4-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t1.out
4+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda -fsycl-dead-args-optimization %s -o %t1.out
55
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
66
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
77
// RUN: %GPU_RUN_PLACEHOLDER %t1.out

0 commit comments

Comments
 (0)