Skip to content

Commit a14cac0

Browse files
committed
[New offload model] Cleanup the way sycl-post-link options are generated.
Signed-off-by: Arvind Sudarsanam <[email protected]>
1 parent 47a0341 commit a14cac0

File tree

4 files changed

+131
-110
lines changed

4 files changed

+131
-110
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 29 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10648,31 +10648,33 @@ static void getNonTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
1064810648
addArgs(PostLinkArgs, TCArgs, {"-lower-esimd-force-stateless-mem=false"});
1064910649
}
1065010650

10651-
// Add any sycl-post-link options that rely on a specific Triple.
10652-
static void
10653-
getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
10654-
const llvm::opt::ArgList &TCArgs,
10655-
llvm::Triple Triple, ArgStringList &PostLinkArgs,
10656-
bool SpecConsts, types::ID OutputType) {
10657-
bool NewOffloadDriver = TC.getDriver().getUseNewOffloadingDriver();
10658-
// Note: Do not use Triple when NewOffloadDriver is 'true'.
10659-
if (!NewOffloadDriver && (OutputType == types::TY_LLVM_BC)) {
10651+
// Add any sycl-post-link options that rely on a specific Triple in addition
10652+
// to user supplied options. This function is invoked only for old offloading
10653+
// model. For new offloading model, a slightly modified version of this
10654+
// function is called inside clang-linker-wrapper.
10655+
// NOTE: Any changes made here should be reflected in the similarly named
10656+
// function in clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp.
10657+
static void getTripleBasedSYCLPostLinkOpts(const ToolChain &TC,
10658+
const llvm::opt::ArgList &TCArgs,
10659+
ArgStringList &PostLinkArgs,
10660+
llvm::Triple Triple, bool SpecConsts,
10661+
types::ID OutputType) {
10662+
if (OutputType == types::TY_LLVM_BC) {
1066010663
// single file output requested - this means only perform necessary IR
1066110664
// transformations (like specialization constant intrinsic lowering) and
1066210665
// output LLVMIR
1066310666
addArgs(PostLinkArgs, TCArgs, {"-ir-output-only"});
1066410667
}
10665-
// specialization constants processing is mandatory
1066610668
if (SpecConsts)
1066710669
addArgs(PostLinkArgs, TCArgs, {"-spec-const=native"});
1066810670
else
1066910671
addArgs(PostLinkArgs, TCArgs, {"-spec-const=emulation"});
1067010672

1067110673
// See if device code splitting is requested. The logic here works along side
10672-
// the behavior in setOtherSYCLPostLinkOpts, where the option is added based
10673-
// on the user setting of-fsycl-device-code-split.
10674+
// the behavior in getNonTripleBasedSYCLPostLinkOpts, where the option is
10675+
// added based on the user setting of -fsycl-device-code-split.
1067410676
if (!TCArgs.hasArg(options::OPT_fsycl_device_code_split_EQ) &&
10675-
(NewOffloadDriver || !(Triple.getArchName() == "spir64_fpga")))
10677+
(!(Triple.getArchName() == "spir64_fpga")))
1067610678
addArgs(PostLinkArgs, TCArgs, {"-split=auto"});
1067710679

1067810680
// On Intel targets we don't need non-kernel functions as entry points,
@@ -10683,18 +10685,17 @@ getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
1068310685
options::OPT_fsycl_remove_unused_external_funcs,
1068410686
false) &&
1068510687
!isSYCLNativeCPU(TC)) &&
10686-
(NewOffloadDriver || (!Triple.isNVPTX() && !Triple.isAMDGPU())))
10688+
!Triple.isNVPTX() && !Triple.isAMDGPU())
1068710689
addArgs(PostLinkArgs, TCArgs, {"-emit-only-kernels-as-entry-points"});
1068810690

10689-
if (!NewOffloadDriver && !Triple.isAMDGCN())
10691+
if (!Triple.isAMDGCN())
1069010692
addArgs(PostLinkArgs, TCArgs, {"-emit-param-info"});
1069110693
// Enable program metadata
10692-
if ((!NewOffloadDriver && (Triple.isNVPTX() || Triple.isAMDGCN())) ||
10693-
isSYCLNativeCPU(TC))
10694+
if (Triple.isNVPTX() || Triple.isAMDGCN() || isSYCLNativeCPU(TC))
1069410695
addArgs(PostLinkArgs, TCArgs, {"-emit-program-metadata"});
1069510696
if (OutputType != types::TY_LLVM_BC) {
1069610697
assert(OutputType == types::TY_Tempfiletable);
10697-
bool SplitEsimdByDefault = !NewOffloadDriver && Triple.isSPIROrSPIRV();
10698+
bool SplitEsimdByDefault = Triple.isSPIROrSPIRV();
1069810699
bool SplitEsimd = TCArgs.hasFlag(
1069910700
options::OPT_fsycl_device_code_split_esimd,
1070010701
options::OPT_fno_sycl_device_code_split_esimd, SplitEsimdByDefault);
@@ -10714,7 +10715,7 @@ getTripleBasedSYCLPostLinkOpts(const ToolChain &TC, const JobAction &JA,
1071410715
if (TCArgs.hasFlag(options::OPT_fsycl_add_default_spec_consts_image,
1071510716
options::OPT_fno_sycl_add_default_spec_consts_image,
1071610717
false) &&
10717-
(IsAOT || NewOffloadDriver))
10718+
IsAOT)
1071810719
addArgs(PostLinkArgs, TCArgs,
1071910720
{"-generate-device-image-default-spec-consts"});
1072010721
}
@@ -10737,8 +10738,16 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
1073710738
ArgStringList CmdArgs;
1073810739

1073910740
llvm::Triple T = getToolChain().getTriple();
10741+
const toolchains::SYCLToolChain &TC =
10742+
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
10743+
10744+
// Handle -Xdevice-post-link
10745+
TC.TranslateTargetOpt(T, TCArgs, CmdArgs, options::OPT_Xdevice_post_link,
10746+
options::OPT_Xdevice_post_link_EQ,
10747+
JA.getOffloadingArch());
10748+
1074010749
getNonTripleBasedSYCLPostLinkOpts(getToolChain(), JA, TCArgs, CmdArgs);
10741-
getTripleBasedSYCLPostLinkOpts(getToolChain(), JA, TCArgs, T, CmdArgs,
10750+
getTripleBasedSYCLPostLinkOpts(getToolChain(), TCArgs, CmdArgs, T,
1074210751
SYCLPostLink->getRTSetsSpecConstants(),
1074310752
SYCLPostLink->getTrueType());
1074410753

@@ -10751,14 +10760,6 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
1075110760

1075210761
addArgs(CmdArgs, TCArgs, {"-o", OutputArg});
1075310762

10754-
const toolchains::SYCLToolChain &TC =
10755-
static_cast<const toolchains::SYCLToolChain &>(getToolChain());
10756-
10757-
// Handle -Xdevice-post-link
10758-
TC.TranslateTargetOpt(T, TCArgs, CmdArgs, options::OPT_Xdevice_post_link,
10759-
options::OPT_Xdevice_post_link_EQ,
10760-
JA.getOffloadingArch());
10761-
1076210763
// Add input file
1076310764
assert(Inputs.size() == 1 && Inputs.front().isFilename() &&
1076410765
"single input file expected");
@@ -11116,17 +11117,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
1111611117
appendOption(PostLinkOptString, A);
1111711118
}
1111811119
ArgStringList PostLinkArgs;
11119-
bool IsSYCLNativeCPU = driver::isSYCLNativeCPU(Args);
11120-
types::ID OutputType = TargetTriple.isSPIROrSPIRV() || IsSYCLNativeCPU
11121-
? types::TY_Tempfiletable
11122-
: types::TY_LLVM_BC;
11123-
bool SpecConsts = TargetTriple.isSPIROrSPIRV();
1112411120
getNonTripleBasedSYCLPostLinkOpts(getToolChain(), JA, Args, PostLinkArgs);
11125-
// Some options like -spec-consts=* depend on target triple as well as some
11126-
// user options. So, these options are partly computed here and then
11127-
// updated inside the clang-linker-wrapper.
11128-
getTripleBasedSYCLPostLinkOpts(getToolChain(), JA, Args, TargetTriple,
11129-
PostLinkArgs, SpecConsts, OutputType);
1113011121
for (const auto &A : PostLinkArgs)
1113111122
appendOption(PostLinkOptString, A);
1113211123
if (!PostLinkOptString.empty())
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/// Verify same set of sycl-post-link options generated for old and new offloading model
2+
// Test for JIT compilation
3+
// RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --offload-new-driver \
4+
// RUN: -Xdevice-post-link -O0 -v %s 2>&1 \
5+
// RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT %s
6+
// RUN: %clangxx --target=x86_64-unknown-linux-gnu -fsycl --no-offload-new-driver \
7+
// RUN: -Xdevice-post-link -O0 -v %s 2>&1 \
8+
// RUN: | FileCheck -check-prefix OPTIONS_POSTLINK_JIT %s
9+
// OPTIONS_POSTLINK_JIT: sycl-post-link{{.*}} -O0 -O2 -device-globals -spec-const=native -split=auto -emit-only-kernels-as-entry-points -emit-param-info -symbols -emit-exported-symbols -split-esimd -lower-esimd
10+
11+
#include <sycl/sycl.hpp>
12+
using namespace sycl;
13+
14+
int main(void) {
15+
sycl::queue queue;
16+
sycl::event event = queue.submit([&](sycl::handler &cgh) {
17+
cgh.parallel_for<class set_range>(sycl::range<1>{16},
18+
[=](sycl::id<1> idx) {});
19+
});
20+
return 0;
21+
}

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 66 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -522,78 +522,72 @@ static Expected<StringRef> convertSPIRVToIR(StringRef Filename,
522522
return *TempFileOrErr;
523523
}
524524

525-
// Update sycl-post-link options based on target triple.
526-
static void updateCmdArgs(SmallVector<StringRef, 8> &CmdArgs,
527-
llvm::Triple Triple) {
528-
// Get an argument in CmdArgs that contains Str. If there is no such
529-
// argument, an empty argument is returned
530-
auto getArg = [&](const StringRef &Str) {
531-
for (auto Arg : CmdArgs)
532-
if (Arg.contains(Str))
533-
return Arg;
534-
return StringRef("");
535-
};
536-
// Add a new argument Arg to CmdArgs if not present already.
537-
auto addArg = [&](const StringRef &Arg) {
538-
if (getArg(Arg).empty())
539-
CmdArgs.push_back(Arg);
540-
};
541-
// Replace an argument in CmdArgs that contains Str with NewArg. If no such
542-
// argument is present, add the NewArg to CmdArgs.
543-
auto replaceOrAddArg = [&](const StringRef &NewArg, const StringRef &Str) {
544-
for (auto &Arg : CmdArgs)
545-
if (Arg.contains(Str)) {
546-
Arg = NewArg;
547-
return;
548-
}
549-
CmdArgs.push_back(NewArg);
550-
};
551-
// Remove argument containing Str from CmdArgs.
552-
auto removeArg = [&](const StringRef &Str) {
553-
CmdArgs.erase(
554-
std::remove_if(CmdArgs.begin(), CmdArgs.end(),
555-
[&](StringRef Arg) { return Arg.contains(Str); }),
556-
CmdArgs.end());
557-
};
558-
559-
// specialization constants processing.
560-
bool IsAOTGPU = Triple.isNVPTX() || Triple.isAMDGCN() || Triple.isSPIRAOT();
561-
if (!IsAOTGPU)
562-
replaceOrAddArg("-spec-const=native", "-spec-const");
563-
else
564-
replaceOrAddArg("-spec-const=emulation", "-spec-const");
565-
566-
// -emit-only-kernels-as-entry-points is set by the user and is enabled only
567-
// for Intel targets.
568-
auto EmitOnlyKernelsAsEntryPointsArg =
569-
getArg("-emit-only-kernels-as-entry-points");
570-
if ((!EmitOnlyKernelsAsEntryPointsArg.empty()) && !Triple.isNVPTX() &&
571-
!Triple.isAMDGPU())
572-
addArg("-emit-only-kernels-as-entry-points");
525+
// Add any sycl-post-link options that rely on a specific Triple in addition
526+
// to user supplied options.
527+
// NOTE: Any changes made here should be reflected in the similarly named
528+
// function in clang/lib/Driver/ToolChains/Clang.cpp.
529+
static void
530+
getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
531+
SmallVector<StringRef, 8> &PostLinkArgs,
532+
const llvm::Triple Triple) {
533+
const llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ));
534+
bool SYCLNativeCPU = (HostTriple == Triple);
535+
bool SpecConsts = !(Triple.isNVPTX() || Triple.isAMDGCN() ||
536+
Triple.isSPIRAOT() || SYCLNativeCPU);
537+
if (SpecConsts)
538+
PostLinkArgs.push_back("-spec-const=native");
573539
else
574-
removeArg("-emit-only-kernels-as-entry-points");
575-
576-
if (!(Triple.isAMDGCN()))
577-
addArg("-emit-param-info");
578-
579-
if (Triple.isNVPTX() || Triple.isAMDGCN())
580-
addArg("-emit-program-metadata");
581-
582-
if (Triple.isSPIROrSPIRV()) {
583-
addArg("-symbols");
584-
addArg("-emit-exported-symbols");
585-
addArg("-split-esimd");
586-
addArg("-lower-esimd");
587-
}
588-
589-
// Here, IsAOT includes x86_64 device as well.
590-
bool IsAOT =
591-
IsAOTGPU || Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64;
592-
auto GenDeviceImageArg = getArg("-generate-device-image-default-spec-consts");
593-
if ((!GenDeviceImageArg.empty()) && IsAOT)
594-
addArg("-generate-device-image-default-spec-consts");
595-
else
596-
removeArg("-generate-device-image-default-spec-consts");
540+
PostLinkArgs.push_back("-spec-const=emulation");
541+
542+
// See if device code splitting is requested. The logic here works along side
543+
// the behavior in getNonTripleBasedSYCLPostLinkOpts, where the option is
544+
// added based on the user setting of -fsycl-device-code-split.
545+
bool NoSplit = true;
546+
for (auto Arg : PostLinkArgs)
547+
if (Arg.contains("-split=")) {
548+
NoSplit = false;
549+
break;
550+
}
551+
if (NoSplit && (Triple.getSubArch() != llvm::Triple::SPIRSubArch_fpga))
552+
PostLinkArgs.push_back("-split=auto");
553+
554+
// On Intel targets we don't need non-kernel functions as entry points,
555+
// because it only increases amount of code for device compiler to handle,
556+
// without any actual benefits.
557+
// TODO: Try to extend this feature for non-Intel GPUs.
558+
if ((!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs,
559+
OPT_sycl_remove_unused_external_funcs, false) &&
560+
!SYCLNativeCPU) &&
561+
!Triple.isNVPTX() && !Triple.isAMDGPU())
562+
PostLinkArgs.push_back("-emit-only-kernels-as-entry-points");
563+
564+
if (!Triple.isAMDGCN())
565+
PostLinkArgs.push_back("-emit-param-info");
566+
// Enable program metadata
567+
if (Triple.isNVPTX() || Triple.isAMDGCN() || SYCLNativeCPU)
568+
PostLinkArgs.push_back("-emit-program-metadata");
569+
570+
bool SplitEsimdByDefault = Triple.isSPIROrSPIRV();
571+
bool SplitEsimd =
572+
Args.hasFlag(OPT_sycl_device_code_split_esimd,
573+
OPT_no_sycl_device_code_split_esimd, SplitEsimdByDefault);
574+
575+
// Symbol file and specialization constant info generation is mandatory -
576+
// add options unconditionally
577+
PostLinkArgs.push_back("-symbols");
578+
PostLinkArgs.push_back("-emit-exported-symbols");
579+
if (SplitEsimd)
580+
PostLinkArgs.push_back("-split-esimd");
581+
PostLinkArgs.push_back("-lower-esimd");
582+
583+
bool IsAOT = Triple.isNVPTX() || Triple.isAMDGCN() ||
584+
Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga ||
585+
Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen ||
586+
Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64;
587+
if (Args.hasFlag(OPT_sycl_add_default_spec_consts_image,
588+
OPT_no_sycl_add_default_spec_consts_image, false) &&
589+
IsAOT)
590+
PostLinkArgs.push_back("-generate-device-image-default-spec-consts");
597591
}
598592

599593
// Run sycl-post-link tool
@@ -619,7 +613,7 @@ static Expected<StringRef> runSYCLPostLink(ArrayRef<StringRef> InputFiles,
619613
SYCLPostLinkOptions.split(CmdArgs, " ", /* MaxSplit = */ -1,
620614
/* KeepEmpty = */ false);
621615
const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
622-
updateCmdArgs(CmdArgs, Triple);
616+
getTripleBasedSYCLPostLinkOpts(Args, CmdArgs, Triple);
623617
CmdArgs.push_back("-o");
624618
CmdArgs.push_back(*TempFileOrErr);
625619
for (auto &File : InputFiles)

clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,18 @@ def sycl_post_link_options_EQ : Joined<["--", "-"], "sycl-post-link-options=">,
170170
def llvm_spirv_options_EQ : Joined<["--", "-"], "llvm-spirv-options=">,
171171
Flags<[WrapperOnlyOption]>,
172172
HelpText<"Options that will control llvm-spirv step">;
173+
174+
// Extra SYCL options to help generate sycl-post-link options that also depend
175+
// on target triple.
176+
def sycl_remove_unused_external_funcs : Flag<["--", "-"], "sycl-remove-unused-external-funcs">,
177+
Flags<[WrapperOnlyOption, HelpHidden]>;
178+
def no_sycl_remove_unused_external_funcs : Flag<["--", "-"], "no-sycl-remove-unused-external-funcs">,
179+
Flags<[WrapperOnlyOption, HelpHidden]>;
180+
def sycl_device_code_split_esimd : Flag<["--", "-"], "sycl-device-code-split-esimd">,
181+
Flags<[WrapperOnlyOption, HelpHidden]>;
182+
def no_sycl_device_code_split_esimd : Flag<["--", "-"], "no-sycl-device-code-split-esimd">,
183+
Flags<[WrapperOnlyOption, HelpHidden]>;
184+
def sycl_add_default_spec_consts_image : Flag<["--", "-"], "sycl-add-default-spec-consts-image">,
185+
Flags<[WrapperOnlyOption, HelpHidden]>;
186+
def no_sycl_add_default_spec_consts_image : Flag<["--", "-"], "no-sycl-add-default-spec-consts-image">,
187+
Flags<[WrapperOnlyOption, HelpHidden]>;

0 commit comments

Comments
 (0)