Skip to content

Commit 2813ac1

Browse files
committed
Merge branch 'sycl' of https://github.com/intel/llvm into accessor-iterators
2 parents f31f26e + 7b5d4f1 commit 2813ac1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1261
-313
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ def warn_drv_sycl_offload_target_duplicate : Warning<
345345
def warn_drv_sycl_target_missing : Warning<
346346
"linked binaries do not contain expected '%0' target; found targets: '%1'">,
347347
InGroup<SyclTarget>;
348+
def err_drv_multiple_target_with_forced_target : Error<
349+
"multiple target usage with '%0' is not supported with '%1'">;
348350
def err_drv_failed_to_deduce_target_from_arch : Error<
349351
"failed to deduce triple for target architecture '%0'; specify the triple "
350352
"using '-fopenmp-targets' and '-Xopenmp-target' instead.">;

clang/include/clang/Driver/Options.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,12 @@ def cuda_include_ptx_EQ : Joined<["--"], "cuda-include-ptx=">, Flags<[NoXarchOpt
956956
HelpText<"Include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
957957
def no_cuda_include_ptx_EQ : Joined<["--"], "no-cuda-include-ptx=">, Flags<[NoXarchOption]>,
958958
HelpText<"Do not include PTX for the following GPU architecture (e.g. sm_35) or 'all'. May be specified more than once.">;
959+
def fno_bundle_offload_arch : Flag<["-"], "fno-bundle-offload-arch">,
960+
HelpText<"Specify that the offload bundler should not identify a bundle with "
961+
"specific arch. For example, the bundle for `nvptx64-nvidia-cuda-sm_80` "
962+
"uses the bundle tag `nvptx64-nvidia-cuda` when used. "
963+
"This allows .o files to contain .bc bundles that are unspecific "
964+
"to a particular arch version.">;
959965
def offload_arch_EQ : Joined<["--"], "offload-arch=">, Flags<[NoXarchOption]>,
960966
HelpText<"CUDA offloading device architecture (e.g. sm_35), or HIP offloading target ID in the form of a "
961967
"device architecture followed by target ID features delimited by a colon. Each target ID feature "
@@ -2848,6 +2854,10 @@ def fsycl_link_targets_EQ : CommaJoined<["-"], "fsycl-link-targets=">,
28482854
Flags<[NoXarchOption, CC1Option, CoreOption, Deprecated]>,
28492855
HelpText<"Specify comma-separated list of triples SYCL offloading targets "
28502856
"to produce linked device images (deprecated)">;
2857+
def fsycl_force_target_EQ : Joined<["-"], "fsycl-force-target=">,
2858+
Flags<[NoXarchOption, CoreOption]>,
2859+
HelpText<"Force the usage of the given triple when extracting device code "
2860+
"from any given objects on the command line">;
28512861
def fsycl_device_code_split_EQ : Joined<["-"], "fsycl-device-code-split=">,
28522862
Flags<[CC1Option, CoreOption]>, HelpText<"Perform SYCL device code split: per_kernel (device code module is "
28532863
"created for each SYCL kernel) | per_source (device code module is created for each source (translation unit)) | off (no device code split). | auto (use heuristic to select the best way of splitting device code). "

clang/lib/Driver/Driver.cpp

Lines changed: 111 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,8 @@ static bool addSYCLDefaultTriple(Compilation &C,
785785
/// Returns true if a triple is added to SYCLTriples, false otherwise
786786
if (!C.getDriver().isSYCLDefaultTripleImplied())
787787
return false;
788+
if (C.getInputArgs().hasArg(options::OPT_fsycl_force_target_EQ))
789+
return false;
788790
for (const auto &SYCLTriple : SYCLTriples) {
789791
if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch &&
790792
SYCLTriple.isSPIR())
@@ -1057,6 +1059,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10571059
C.getInputArgs().getLastArg(options::OPT_fsycl_device_code_split_EQ),
10581060
{"per_kernel", "per_source", "auto", "off"});
10591061

1062+
Arg *SYCLForceTarget =
1063+
getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ);
1064+
if (SYCLForceTarget) {
1065+
StringRef Val(SYCLForceTarget->getValue());
1066+
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1067+
if (!isValidSYCLTriple(TT))
1068+
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
1069+
}
10601070
bool HasSYCLTargetsOption = SYCLTargets || SYCLLinkTargets || SYCLAddTargets;
10611071
llvm::StringMap<StringRef> FoundNormalizedTriples;
10621072
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
@@ -1066,6 +1076,15 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10661076
Arg *SYCLTargetsValues = SYCLTargets ? SYCLTargets : SYCLLinkTargets;
10671077
if (SYCLTargetsValues) {
10681078
if (SYCLTargetsValues->getNumValues()) {
1079+
1080+
// Multiple targets are currently not supported when using
1081+
// -fsycl-force-target as the bundler does not allow for multiple
1082+
// outputs of the same target.
1083+
if (SYCLForceTarget && SYCLTargetsValues->getNumValues() > 1)
1084+
Diag(clang::diag::err_drv_multiple_target_with_forced_target)
1085+
<< SYCLTargetsValues->getAsString(C.getInputArgs())
1086+
<< SYCLForceTarget->getAsString(C.getInputArgs());
1087+
10691088
for (StringRef Val : SYCLTargetsValues->getValues()) {
10701089
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
10711090
if (!isValidSYCLTriple(TT)) {
@@ -4661,6 +4680,9 @@ class OffloadingActionBuilder final {
46614680
/// List of static archives to extract FPGA dependency info from
46624681
ActionList FPGAArchiveInputs;
46634682

4683+
// SYCLInstallation is needed in order to link SYCLDeviceLibs
4684+
SYCLInstallationDetector SYCLInstallation;
4685+
46644686
/// List of GPU architectures to use in this compilation with NVPTX/AMDGCN
46654687
/// targets.
46664688
SmallVector<std::pair<llvm::Triple, const char *>, 8> GpuArchList;
@@ -4701,7 +4723,8 @@ class OffloadingActionBuilder final {
47014723
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
47024724
const Driver::InputList &Inputs,
47034725
OffloadingActionBuilder &OAB)
4704-
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB) {}
4726+
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL, OAB),
4727+
SYCLInstallation(C.getDriver()) {}
47054728

47064729
void withBoundArchForToolChain(const ToolChain *TC,
47074730
llvm::function_ref<void(const char *)> Op) {
@@ -5080,10 +5103,8 @@ class OffloadingActionBuilder final {
50805103
}
50815104
}
50825105

5083-
const toolchains::SYCLToolChain *SYCLTC =
5084-
static_cast<const toolchains::SYCLToolChain *>(TC);
50855106
SmallVector<SmallString<128>, 4> LibLocCandidates;
5086-
SYCLTC->SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates);
5107+
SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates);
50875108
StringRef LibSuffix = isMSVCEnv ? ".obj" : ".o";
50885109
using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;
50895110

@@ -5136,20 +5157,100 @@ class OffloadingActionBuilder final {
51365157
auto *SYCLDeviceLibsUnbundleAction =
51375158
C.MakeAction<OffloadUnbundlingJobAction>(
51385159
SYCLDeviceLibsInputAction);
5139-
addDeviceDepences(SYCLDeviceLibsUnbundleAction);
5140-
DeviceLinkObjects.push_back(SYCLDeviceLibsUnbundleAction);
5160+
5161+
// We are using BoundArch="" here since the NVPTX bundles in
5162+
// the devicelib .o files do not contain any arch information
5163+
SYCLDeviceLibsUnbundleAction->registerDependentActionInfo(
5164+
TC, /*BoundArch=*/"", Action::OFK_SYCL);
5165+
OffloadAction::DeviceDependences Dep;
5166+
Dep.add(*SYCLDeviceLibsUnbundleAction, *TC, /*BoundArch=*/"",
5167+
Action::OFK_SYCL);
5168+
auto *SYCLDeviceLibsDependenciesAction =
5169+
C.MakeAction<OffloadAction>(
5170+
Dep, SYCLDeviceLibsUnbundleAction->getType());
5171+
5172+
DeviceLinkObjects.push_back(SYCLDeviceLibsDependenciesAction);
51415173
if (!LibLocSelected)
51425174
LibLocSelected = !LibLocSelected;
51435175
}
51445176
}
51455177
}
51465178
};
5179+
51475180
addInputs(sycl_device_wrapper_libs);
5148-
if (isSpirvAOT)
5181+
if (isSpirvAOT || TC->getTriple().isNVPTX())
51495182
addInputs(sycl_device_fallback_libs);
51505183
if (Args.hasFlag(options::OPT_fsycl_instrument_device_code,
51515184
options::OPT_fno_sycl_instrument_device_code, true))
51525185
addInputs(sycl_device_annotation_libs);
5186+
5187+
// For NVPTX backend we need to also link libclc and CUDA libdevice
5188+
// at the same stage that we link all of the unbundled SYCL libdevice
5189+
// objects together.
5190+
if (TC->getTriple().isNVPTX() && NumOfDeviceLibLinked) {
5191+
std::string LibSpirvFile;
5192+
if (Args.hasArg(options::OPT_fsycl_libspirv_path_EQ)) {
5193+
auto ProvidedPath =
5194+
Args.getLastArgValue(options::OPT_fsycl_libspirv_path_EQ).str();
5195+
if (llvm::sys::fs::exists(ProvidedPath))
5196+
LibSpirvFile = ProvidedPath;
5197+
} else {
5198+
SmallVector<StringRef, 8> LibraryPaths;
5199+
5200+
// Expected path w/out install.
5201+
SmallString<256> WithoutInstallPath(C.getDriver().ResourceDir);
5202+
llvm::sys::path::append(WithoutInstallPath, Twine("../../clc"));
5203+
LibraryPaths.emplace_back(WithoutInstallPath.c_str());
5204+
5205+
// Expected path w/ install.
5206+
SmallString<256> WithInstallPath(C.getDriver().ResourceDir);
5207+
llvm::sys::path::append(WithInstallPath, Twine("../../../share/clc"));
5208+
LibraryPaths.emplace_back(WithInstallPath.c_str());
5209+
5210+
// Select remangled libclc variant
5211+
std::string LibSpirvTargetName =
5212+
(TC->getAuxTriple()->isOSWindows())
5213+
? "remangled-l32-signed_char.libspirv-nvptx64--nvidiacl."
5214+
"bc"
5215+
: "remangled-l64-signed_char.libspirv-nvptx64--nvidiacl."
5216+
"bc";
5217+
5218+
for (StringRef LibraryPath : LibraryPaths) {
5219+
SmallString<128> LibSpirvTargetFile(LibraryPath);
5220+
llvm::sys::path::append(LibSpirvTargetFile, LibSpirvTargetName);
5221+
if (llvm::sys::fs::exists(LibSpirvTargetFile) ||
5222+
Args.hasArg(options::OPT__HASH_HASH_HASH)) {
5223+
LibSpirvFile = std::string(LibSpirvTargetFile.str());
5224+
break;
5225+
}
5226+
}
5227+
}
5228+
5229+
if (!LibSpirvFile.empty()) {
5230+
Arg *LibClcInputArg = MakeInputArg(Args, C.getDriver().getOpts(),
5231+
Args.MakeArgString(LibSpirvFile));
5232+
auto *SYCLLibClcInputAction =
5233+
C.MakeAction<InputAction>(*LibClcInputArg, types::TY_LLVM_BC);
5234+
DeviceLinkObjects.push_back(SYCLLibClcInputAction);
5235+
}
5236+
5237+
const toolchains::CudaToolChain *CudaTC =
5238+
static_cast<const toolchains::CudaToolChain *>(TC);
5239+
for (auto LinkInputEnum : enumerate(DeviceLinkerInputs)) {
5240+
const char *BoundArch =
5241+
SYCLTargetInfoList[LinkInputEnum.index()].BoundArch;
5242+
std::string LibDeviceFile =
5243+
CudaTC->CudaInstallation.getLibDeviceFile(BoundArch);
5244+
if (!LibDeviceFile.empty()) {
5245+
Arg *CudaDeviceLibInputArg =
5246+
MakeInputArg(Args, C.getDriver().getOpts(),
5247+
Args.MakeArgString(LibDeviceFile));
5248+
auto *SYCLDeviceLibInputAction = C.MakeAction<InputAction>(
5249+
*CudaDeviceLibInputArg, types::TY_LLVM_BC);
5250+
DeviceLinkObjects.push_back(SYCLDeviceLibInputAction);
5251+
}
5252+
}
5253+
}
51535254
return NumOfDeviceLibLinked != 0;
51545255
}
51555256

@@ -5299,11 +5400,12 @@ class OffloadingActionBuilder final {
52995400
// When spv online link is supported by all backends, the fallback
53005401
// device libraries are only needed when current toolchain is using
53015402
// AOT compilation.
5302-
if (isSPIR) {
5403+
if (isSPIR || isNVPTX) {
53035404
bool UseJitLink =
5405+
isSPIR &&
53045406
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
53055407
options::OPT_fno_sycl_device_lib_jit_link, false);
5306-
bool UseAOTLink = isSpirvAOT || !UseJitLink;
5408+
bool UseAOTLink = isSPIR && (isSpirvAOT || !UseJitLink);
53075409
SYCLDeviceLibLinked = addSYCLDeviceLibs(
53085410
TC, FullLinkObjects, UseAOTLink,
53095411
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment());

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8741,7 +8741,8 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
87418741
Triples += CurTC->getTriple().normalize();
87428742
if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP ||
87438743
CurKind == Action::OFK_Cuda || CurKind == Action::OFK_SYCL) &&
8744-
!StringRef(CurDep->getOffloadingArch()).empty()) {
8744+
!StringRef(CurDep->getOffloadingArch()).empty() &&
8745+
!TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) {
87458746
Triples += '-';
87468747
Triples += CurDep->getOffloadingArch();
87478748
}
@@ -8916,12 +8917,22 @@ void OffloadBundler::ConstructJobMultipleOutputs(
89168917
Triples += ',';
89178918
Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
89188919
Triples += '-';
8919-
Triples += Dep.DependentToolChain->getTriple().normalize();
8920+
// When -fsycl-force-target is used, this value overrides the expected
8921+
// output type we are unbundling.
8922+
if (Dep.DependentOffloadKind == Action::OFK_SYCL &&
8923+
TCArgs.hasArg(options::OPT_fsycl_force_target_EQ)) {
8924+
StringRef Val(
8925+
TCArgs.getLastArg(options::OPT_fsycl_force_target_EQ)->getValue());
8926+
llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val));
8927+
Triples += TT.normalize();
8928+
} else
8929+
Triples += Dep.DependentToolChain->getTriple().normalize();
89208930
if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
89218931
Dep.DependentOffloadKind == Action::OFK_OpenMP ||
89228932
Dep.DependentOffloadKind == Action::OFK_Cuda ||
89238933
Dep.DependentOffloadKind == Action::OFK_SYCL) &&
8924-
!Dep.DependentBoundArch.empty()) {
8934+
!Dep.DependentBoundArch.empty() &&
8935+
!TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) {
89258936
Triples += '-';
89268937
Triples += Dep.DependentBoundArch;
89278938
}

clang/lib/Driver/ToolChains/Cuda.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,9 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
183183
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override;
184184
void adjustDebugInfoKind(codegenoptions::DebugInfoKind &DebugInfoKind,
185185
const llvm::opt::ArgList &Args) const override;
186-
bool IsMathErrnoDefault() const override { return false; }
186+
187+
// math-errno should be the default for SYCL but not other OFK using CUDA TC
188+
bool IsMathErrnoDefault() const override { return OK == Action::OFK_SYCL; }
187189

188190
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
189191
llvm::opt::ArgStringList &CC1Args) const override;

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,13 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
170170
LibPostfix = ".obj";
171171
std::string FileName = this->getToolChain().getInputFilename(II);
172172
StringRef InputFilename = llvm::sys::path::filename(FileName);
173+
if (this->getToolChain().getTriple().isNVPTX()) {
174+
// Linking SYCL Device libs requires libclc as well as libdevice
175+
if ((InputFilename.find("nvidiacl") != InputFilename.npos ||
176+
InputFilename.find("libdevice") != InputFilename.npos))
177+
return true;
178+
LibPostfix = ".cubin";
179+
}
173180
StringRef LibSyclPrefix("libsycl-");
174181
if (!InputFilename.startswith(LibSyclPrefix) ||
175182
!InputFilename.endswith(LibPostfix) || (InputFilename.count('-') < 2))
@@ -620,7 +627,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob(
620627

621628
SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple,
622629
const ToolChain &HostTC, const ArgList &Args)
623-
: ToolChain(D, Triple, Args), HostTC(HostTC), SYCLInstallation(D) {
630+
: ToolChain(D, Triple, Args), HostTC(HostTC) {
624631
// Lookup binaries into the driver directory, this is used to
625632
// discover the clang-offload-bundler executable.
626633
getProgramPaths().push_back(getDriver().Dir);

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,7 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
172172
const llvm::opt::ArgList &Args,
173173
llvm::opt::ArgStringList &CC1Args) const override;
174174

175-
176175
const ToolChain &HostTC;
177-
const SYCLInstallationDetector SYCLInstallation;
178176

179177
protected:
180178
Tool *buildBackendCompiler() const override;

clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l32-signed_char.libspirv-nvptx64--nvidiacl.bc

Whitespace-only changes.

clang/test/Driver/Inputs/SYCL/lib/nvidiacl/remangled-l64-signed_char.libspirv-nvptx64--nvidiacl.bc

Whitespace-only changes.

0 commit comments

Comments
 (0)