Skip to content

Commit 9727781

Browse files
committed
locally land: [Clang] Determine offloading architectures at Toolchain creation (llvm#145799)
1 parent 7849590 commit 9727781

File tree

3 files changed

+77
-73
lines changed

3 files changed

+77
-73
lines changed

clang/include/clang/Driver/Driver.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,10 @@ class Driver {
372372

373373
/// Number of parallel jobs.
374374
unsigned NumParallelJobs;
375-
/// Cache of known offloading architectures for the ToolChain already derived.
376-
/// This should only be modified when we first initialize the offloading
377-
/// toolchains.
378-
llvm::DenseMap<const ToolChain *, llvm::DenseSet<llvm::StringRef>> KnownArchs;
375+
376+
/// The associated offloading architectures with each toolchain.
377+
llvm::DenseMap<const ToolChain *, llvm::SmallVector<llvm::StringRef>>
378+
OffloadArchs;
379379

380380
private:
381381
/// TranslateInputArgs - Create a new derived argument list from the input
@@ -545,11 +545,11 @@ class Driver {
545545

546546
/// Returns the set of bound architectures active for this offload kind.
547547
/// If there are no bound architctures we return a set containing only the
548-
/// empty string. The \p SuppressError option is used to suppress errors.
549-
llvm::DenseSet<StringRef>
548+
/// empty string.
549+
llvm::SmallVector<StringRef>
550550
getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
551551
Action::OffloadKind Kind, const ToolChain *TC,
552-
bool SuppressError = false) const;
552+
bool SpecificToolchain = true) const;
553553

554554
/// Check that the file referenced by Value exists. If it doesn't,
555555
/// issue a diagnostic and return false.

clang/lib/Driver/Driver.cpp

Lines changed: 69 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
991991
if (CudaInstallation.isValid())
992992
CudaInstallation.WarnIfUnsupportedVersion();
993993
C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
994+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC,
995+
/*SpecificToolchain=*/true);
994996
} else if (IsHIP && !UseLLVMOffload) {
995997
if (auto *OMPTargetArg =
996998
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
@@ -1007,6 +1009,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10071009
getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple,
10081010
C.getDefaultToolChain().getTriple());
10091011
C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP);
1012+
1013+
// TODO: Fix 'amdgcnspirv' handling with the new driver.
1014+
if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
1015+
options::OPT_no_offload_new_driver, false))
1016+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC,
1017+
/*SpecificToolchain=*/true);
10101018
}
10111019

10121020
if (IsCuda || IsHIP)
@@ -1072,40 +1080,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10721080
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
10731081
C.getDefaultToolChain().getTriple());
10741082
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1083+
OffloadArchs[&TC] =
1084+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1085+
/*SpecificToolchain=*/true);
10751086
}
10761087
} else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
10771088
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
10781089
llvm::Triple AMDTriple("amdgcn-amd-amdhsa");
10791090
llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda");
10801091

1081-
// Attempt to deduce the offloading triple from the set of architectures.
1082-
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1083-
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1084-
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1085-
C.getDefaultToolChain().getTriple());
1086-
1087-
llvm::DenseSet<StringRef> Archs =
1088-
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, true);
1089-
llvm::DenseSet<StringRef> ArchsForTarget;
1090-
for (StringRef Arch : Archs) {
1092+
for (StringRef A :
1093+
C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) {
1094+
for (StringRef Arch : llvm::split(A, ",")) {
10911095
bool IsNVPTX = IsNVIDIAOffloadArch(
10921096
StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch)));
10931097
bool IsAMDGPU = IsAMDOffloadArch(
10941098
StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch)));
1095-
if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive("native")) {
1099+
if (!IsNVPTX && !IsAMDGPU && !Arch.empty() &&
1100+
!Arch.equals_insensitive("native")) {
10961101
Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
10971102
<< Arch;
10981103
return;
10991104
}
1100-
1101-
if (TT.isNVPTX() && IsNVPTX)
1102-
ArchsForTarget.insert(Arch);
1103-
else if (TT.isAMDGPU() && IsAMDGPU)
1104-
ArchsForTarget.insert(Arch);
11051105
}
1106-
if (!ArchsForTarget.empty()) {
1106+
}
1107+
1108+
// Attempt to deduce the offloading triple from the set of architectures.
1109+
// We can only correctly deduce NVPTX / AMDGPU triples currently.
1110+
for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1111+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
1112+
C.getDefaultToolChain().getTriple());
1113+
1114+
llvm::SmallVector<StringRef> Archs =
1115+
getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
1116+
/*SpecificToolchain=*/false);
1117+
if (!Archs.empty()) {
11071118
C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
1108-
KnownArchs[&TC] = ArchsForTarget;
1119+
OffloadArchs[&TC] = Archs;
11091120
}
11101121
}
11111122

@@ -1146,9 +1157,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
11461157
// going to create will depend on both.
11471158
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
11481159
for (const auto &TT : UniqueSYCLTriplesVec) {
1149-
auto SYCLTC = &getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1150-
HostTC->getTriple());
1151-
C.addOffloadDeviceToolChain(SYCLTC, Action::OFK_SYCL);
1160+
auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
1161+
HostTC->getTriple());
1162+
C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL);
1163+
OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC,
1164+
/*SpecificToolchain=*/true);
11521165
}
11531166
}
11541167

@@ -4729,20 +4742,22 @@ static StringRef getCanonicalArchString(Compilation &C,
47294742
const llvm::opt::DerivedArgList &Args,
47304743
StringRef ArchStr,
47314744
const llvm::Triple &Triple,
4732-
bool SuppressError = false) {
4745+
bool SpecificToolchain) {
47334746
// Lookup the CUDA / HIP architecture string. Only report an error if we were
47344747
// expecting the triple to be only NVPTX / AMDGPU.
47354748
OffloadArch Arch =
47364749
StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
4737-
if (!SuppressError && Triple.isNVPTX() &&
4750+
if (Triple.isNVPTX() &&
47384751
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
4739-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4740-
<< "CUDA" << ArchStr;
4752+
if (SpecificToolchain)
4753+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4754+
<< "CUDA" << ArchStr;
47414755
return StringRef();
4742-
} else if (!SuppressError && Triple.isAMDGPU() &&
4756+
} else if (Triple.isAMDGPU() &&
47434757
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
4744-
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4745-
<< "HIP" << ArchStr;
4758+
if (SpecificToolchain)
4759+
C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
4760+
<< "HIP" << ArchStr;
47464761
return StringRef();
47474762
}
47484763

@@ -4751,13 +4766,9 @@ static StringRef getCanonicalArchString(Compilation &C,
47514766

47524767
if (IsAMDOffloadArch(Arch)) {
47534768
llvm::StringMap<bool> Features;
4754-
auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
4755-
if (!HIPTriple)
4756-
return StringRef();
4757-
auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features);
4769+
std::optional<StringRef> Arch = parseTargetID(Triple, ArchStr, &Features);
47584770
if (!Arch) {
47594771
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
4760-
C.setContainsError();
47614772
return StringRef();
47624773
}
47634774
return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features));
@@ -4780,10 +4791,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
47804791
return getConflictTargetIDCombination(ArchSet);
47814792
}
47824793

4783-
llvm::DenseSet<StringRef>
4794+
llvm::SmallVector<StringRef>
47844795
Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47854796
Action::OffloadKind Kind, const ToolChain *TC,
4786-
bool SuppressError) const {
4797+
bool SpecificToolchain) const {
47874798
if (!TC)
47884799
TC = &C.getDefaultToolChain();
47894800

@@ -4798,14 +4809,11 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
47984809
: "--no-offload-arch");
47994810
}
48004811

4801-
if (KnownArchs.contains(TC))
4802-
return KnownArchs.lookup(TC);
4803-
48044812
llvm::DenseSet<StringRef> Archs;
48054813

48064814
if (!TC->getTargetID().empty()) {
48074815
Archs.insert(TC->getTargetID());
4808-
return Archs;
4816+
return llvm::SmallVector<StringRef>();
48094817
}
48104818

48114819
for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
@@ -4816,7 +4824,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48164824
if (Arch == "native" || Arch.empty()) {
48174825
auto GPUsOrErr = TC->getSystemGPUArchs(Args);
48184826
if (!GPUsOrErr) {
4819-
if (SuppressError)
4827+
if (!SpecificToolchain)
48204828
llvm::consumeError(GPUsOrErr.takeError());
48214829
else
48224830
TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
@@ -4826,16 +4834,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48264834
}
48274835

48284836
for (auto ArchStr : *GPUsOrErr) {
4829-
Archs.insert(
4837+
StringRef CanonicalStr =
48304838
getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr),
4831-
TC->getTriple(), SuppressError));
4839+
TC->getTriple(), SpecificToolchain);
4840+
if (!CanonicalStr.empty())
4841+
Archs.insert(CanonicalStr);
4842+
else if (SpecificToolchain)
4843+
return llvm::SmallVector<StringRef>();
48324844
}
48334845
} else {
4834-
StringRef ArchStr = getCanonicalArchString(
4835-
C, Args, Arch, TC->getTriple(), SuppressError);
4836-
if (ArchStr.empty())
4837-
return Archs;
4838-
Archs.insert(ArchStr);
4846+
StringRef CanonicalStr = getCanonicalArchString(
4847+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
4848+
if (!CanonicalStr.empty())
4849+
Archs.insert(CanonicalStr);
4850+
else if (SpecificToolchain)
4851+
return llvm::SmallVector<StringRef>();
48394852
}
48404853
}
48414854
} else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
@@ -4844,27 +4857,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48444857
Archs.clear();
48454858
} else {
48464859
StringRef ArchStr = getCanonicalArchString(
4847-
C, Args, Arch, TC->getTriple(), SuppressError);
4848-
if (ArchStr.empty())
4849-
return Archs;
4860+
C, Args, Arch, TC->getTriple(), SpecificToolchain);
48504861
Archs.erase(ArchStr);
48514862
}
48524863
}
48534864
}
48544865
}
48554866

48564867
if (auto ConflictingArchs =
4857-
getConflictOffloadArchCombination(Archs, TC->getTriple())) {
4868+
getConflictOffloadArchCombination(Archs, TC->getTriple()))
48584869
C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
48594870
<< ConflictingArchs->first << ConflictingArchs->second;
4860-
C.setContainsError();
4861-
}
48624871

48634872
// Skip filling defaults if we're just querying what is availible.
4864-
if (SuppressError)
4865-
return Archs;
4866-
4867-
if (Archs.empty()) {
4873+
if (SpecificToolchain && Archs.empty()) {
48684874
if (Kind == Action::OFK_Cuda) {
48694875
Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
48704876
} else if (Kind == Action::OFK_HIP) {
@@ -4890,12 +4896,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
48904896
}
48914897
}
48924898
}
4893-
} else {
4894-
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4895-
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48964899
}
4900+
Args.ClaimAllArgs(options::OPT_offload_arch_EQ);
4901+
Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ);
48974902

4898-
return Archs;
4903+
SmallVector<StringRef> Sorted(Archs.begin(), Archs.end());
4904+
llvm::sort(Sorted);
4905+
return Sorted;
48994906
}
49004907

49014908
Action *Driver::BuildOffloadingActions(Compilation &C,
@@ -4959,10 +4966,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
49594966
// Get the product of all bound architectures and toolchains.
49604967
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
49614968
for (const ToolChain *TC : ToolChains) {
4962-
llvm::DenseSet<StringRef> Arches = getOffloadArchs(C, Args, Kind, TC);
4963-
SmallVector<StringRef, 0> Sorted(Arches.begin(), Arches.end());
4964-
llvm::sort(Sorted);
4965-
for (StringRef Arch : Sorted) {
4969+
for (StringRef Arch : OffloadArchs.lookup(TC)) {
49664970
TCAndArchs.push_back(std::make_pair(TC, Arch));
49674971
DeviceActions.push_back(
49684972
C.MakeAction<InputAction>(*InputArg, InputType, CUID));

clang/test/Driver/openmp-offload-gpu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@
311311
// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out
312312

313313
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
314-
// RUN: --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
314+
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY
315315
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]"
316316
// CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OUTPUT]]"], output: "a.out"
317317

0 commit comments

Comments
 (0)