Skip to content

[Driver][SYCL] Enable Dead Parameter Elimination Optimization #2340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
7 changes: 6 additions & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3551,7 +3551,12 @@ def fsycl_esimd : Flag<["-"], "fsycl-explicit-simd">, Group<sycl_Group>, Flags<[
def fno_sycl_esimd : Flag<["-"], "fno-sycl-explicit-simd">, Group<sycl_Group>,
HelpText<"Disable SYCL explicit SIMD extension">, Flags<[NoArgumentUnused, CoreOption]>;
defm sycl_early_optimizations : OptOutFFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>;

def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">,
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables "
"elimination of DPC++ dead kernel arguments">;
def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimization">,
Group<sycl_Group>, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables "
"elimination of DPC++ dead kernel arguments">;
//===----------------------------------------------------------------------===//
// CC1 Options
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3514,6 +3514,9 @@ class OffloadingActionBuilder final {
/// Flag to signal if the user requested device code split.
bool DeviceCodeSplit = false;

/// Flag to signal if DAE optimization is turned on.
bool EnableDAE = false;

/// The SYCL actions for the current input.
ActionList SYCLDeviceActions;

Expand Down Expand Up @@ -3951,7 +3954,7 @@ class OffloadingActionBuilder final {
ActionList WrapperInputs;
// post link is not optional - even if not splitting, always need to
// process specialization constants
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit;
bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit || EnableDAE;
types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps
? types::TY_LLVM_BC
: types::TY_Tempfiletable;
Expand Down Expand Up @@ -4108,6 +4111,9 @@ class OffloadingActionBuilder final {
WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ);
auto *DeviceCodeSplitArg =
Args.getLastArg(options::OPT_fsycl_device_code_split_EQ);
EnableDAE =
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false);
// -fsycl-device-code-split is an alias to
// -fsycl-device-code-split=per_source
DeviceCodeSplit = DeviceCodeSplitArg &&
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4123,6 +4123,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("-sycl-opt");
}
// Turn on Dead Parameter Elimination Optimization with early optimizations
if (!RawTriple.isNVPTX() &&
Args.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false))
CmdArgs.push_back("-fenable-sycl-dae");

// Pass the triple of host when doing SYCL
auto AuxT = llvm::Triple(llvm::sys::getProcessTriple());
Expand Down Expand Up @@ -7807,6 +7812,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
// OPT_fsycl_device_code_split is not checked as it is an alias to
// -fsycl-device-code-split=per_source

// Turn on Dead Parameter Elimination Optimization with early optimizations
if (!getToolChain().getTriple().isNVPTX() &&
TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization,
options::OPT_fno_sycl_dead_args_optimization, false))
addArgs(CmdArgs, TCArgs, {"-emit-param-info"});
if (JA.getType() == types::TY_LLVM_BC) {
// single file output requested - this means only perform necessary IR
// transformations (like specialization constant intrinsic lowering) and
Expand Down
8 changes: 8 additions & 0 deletions clang/test/Driver/sycl-device-optimizations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,11 @@
// RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-EARLY-OPTS %s
// CHECK-NO-SYCL-EARLY-OPTS: "-fno-sycl-early-optimizations"

/// Check that Dead Parameter Elimination Optimization is enabled
// RUN: %clang -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
// RUN: %clang_cl -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHECK-DAE %s
// CHECK-DAE: clang{{.*}} "-fenable-sycl-dae"
// CHECK-DAE: sycl-post-link{{.*}} "-emit-param-info"
2 changes: 1 addition & 1 deletion sycl/test/basic_tests/sampler/sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out -L %opencl_libs_dir -lOpenCL
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/kernel_from_file/hw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// CUDA does not support SPIR-V.

//-fsycl-targets=%sycl_triple
// RUN: %clangxx -fsycl-device-only -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// Runtime assumes that the image passed with SYCL_USE_KERNEL_SPV has no
// eliminated arguments, compile without early optimizations.
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict
// RUN: %clangxx -include %t.h -g %s -o %t.out -lsycl -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning
// RUN: env SYCL_BE=%sycl_be SYCL_USE_KERNEL_SPV=%t.spv %t.out | FileCheck %s
// CHECK: Passed
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/multi_ptr/multi_ptr.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/HandleException.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
#include <CL/sycl.hpp>
#include <array>
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/HostAccDestruction.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
//==---------------------- HostAccDestruction.cpp --------------------------==//
//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/scheduler/ReleaseResourcesTest.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER
// RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER
Expand Down
4 changes: 2 additions & 2 deletions sycl/test/separate-compile/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
//
// >> ---- compile src1
// >> device compilation...
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict
// >> host compilation...
// RUN: %clangxx -include sycl_ihdr_a.h -g -c %s -o a.o -I %sycl_include -Wno-sycl-strict
//
// >> ---- compile src2
// >> device compilation...
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict
// >> host compilation...
// RUN: %clangxx -DB_CPP=1 -include sycl_ihdr_b.h -g -c %s -o b.o -I %sycl_include -Wno-sycl-strict
//
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/usm/pfor_flatten.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// UNSUPPORTED: cuda
// CUDA does not support the unnamed lambda extension.
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t1.out
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda -fsycl-dead-args-optimization %s -o %t1.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
// RUN: %GPU_RUN_PLACEHOLDER %t1.out
Expand Down