@@ -991,6 +991,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
991
991
if (CudaInstallation.isValid ())
992
992
CudaInstallation.WarnIfUnsupportedVersion ();
993
993
C.addOffloadDeviceToolChain (&TC, Action::OFK_Cuda);
994
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_Cuda, &TC,
995
+ /* SpecificToolchain=*/ true );
994
996
} else if (IsHIP && !UseLLVMOffload) {
995
997
if (auto *OMPTargetArg =
996
998
C.getInputArgs ().getLastArg (options::OPT_fopenmp_targets_EQ)) {
@@ -1007,6 +1009,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1007
1009
getOffloadToolChain (C.getInputArgs (), Action::OFK_HIP, *HIPTriple,
1008
1010
C.getDefaultToolChain ().getTriple ());
1009
1011
C.addOffloadDeviceToolChain (&TC, Action::OFK_HIP);
1012
+
1013
+ // TODO: Fix 'amdgcnspirv' handling with the new driver.
1014
+ if (C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1015
+ options::OPT_no_offload_new_driver, false ))
1016
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_HIP, &TC,
1017
+ /* SpecificToolchain=*/ true );
1010
1018
}
1011
1019
1012
1020
if (IsCuda || IsHIP)
@@ -1072,40 +1080,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1072
1080
auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1073
1081
C.getDefaultToolChain ().getTriple ());
1074
1082
C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1083
+ OffloadArchs[&TC] =
1084
+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1085
+ /* SpecificToolchain=*/ true );
1075
1086
}
1076
1087
} else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) &&
1077
1088
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
1078
1089
llvm::Triple AMDTriple (" amdgcn-amd-amdhsa" );
1079
1090
llvm::Triple NVPTXTriple (" nvptx64-nvidia-cuda" );
1080
1091
1081
- // Attempt to deduce the offloading triple from the set of architectures.
1082
- // We can only correctly deduce NVPTX / AMDGPU triples currently.
1083
- for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1084
- auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1085
- C.getDefaultToolChain ().getTriple ());
1086
-
1087
- llvm::DenseSet<StringRef> Archs =
1088
- getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC, true );
1089
- llvm::DenseSet<StringRef> ArchsForTarget;
1090
- for (StringRef Arch : Archs) {
1092
+ for (StringRef A :
1093
+ C.getInputArgs ().getAllArgValues (options::OPT_offload_arch_EQ)) {
1094
+ for (StringRef Arch : llvm::split (A, " ," )) {
1091
1095
bool IsNVPTX = IsNVIDIAOffloadArch (
1092
1096
StringToOffloadArch (getProcessorFromTargetID (NVPTXTriple, Arch)));
1093
1097
bool IsAMDGPU = IsAMDOffloadArch (
1094
1098
StringToOffloadArch (getProcessorFromTargetID (AMDTriple, Arch)));
1095
- if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive (" native" )) {
1099
+ if (!IsNVPTX && !IsAMDGPU && !Arch.empty () &&
1100
+ !Arch.equals_insensitive (" native" )) {
1096
1101
Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
1097
1102
<< Arch;
1098
1103
return ;
1099
1104
}
1100
-
1101
- if (TT.isNVPTX () && IsNVPTX)
1102
- ArchsForTarget.insert (Arch);
1103
- else if (TT.isAMDGPU () && IsAMDGPU)
1104
- ArchsForTarget.insert (Arch);
1105
1105
}
1106
- if (!ArchsForTarget.empty ()) {
1106
+ }
1107
+
1108
+ // Attempt to deduce the offloading triple from the set of architectures.
1109
+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
1110
+ for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1111
+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1112
+ C.getDefaultToolChain ().getTriple ());
1113
+
1114
+ llvm::SmallVector<StringRef> Archs =
1115
+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1116
+ /* SpecificToolchain=*/ false );
1117
+ if (!Archs.empty ()) {
1107
1118
C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1108
- KnownArchs [&TC] = ArchsForTarget ;
1119
+ OffloadArchs [&TC] = Archs ;
1109
1120
}
1110
1121
}
1111
1122
@@ -1146,9 +1157,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1146
1157
// going to create will depend on both.
1147
1158
const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1148
1159
for (const auto &TT : UniqueSYCLTriplesVec) {
1149
- auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1150
- HostTC->getTriple ());
1151
- C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1160
+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1161
+ HostTC->getTriple ());
1162
+ C.addOffloadDeviceToolChain (&TC, Action::OFK_SYCL);
1163
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &TC,
1164
+ /* SpecificToolchain=*/ true );
1152
1165
}
1153
1166
}
1154
1167
@@ -4729,20 +4742,22 @@ static StringRef getCanonicalArchString(Compilation &C,
4729
4742
const llvm::opt::DerivedArgList &Args,
4730
4743
StringRef ArchStr,
4731
4744
const llvm::Triple &Triple,
4732
- bool SuppressError = false ) {
4745
+ bool SpecificToolchain ) {
4733
4746
// Lookup the CUDA / HIP architecture string. Only report an error if we were
4734
4747
// expecting the triple to be only NVPTX / AMDGPU.
4735
4748
OffloadArch Arch =
4736
4749
StringToOffloadArch (getProcessorFromTargetID (Triple, ArchStr));
4737
- if (!SuppressError && Triple.isNVPTX () &&
4750
+ if (Triple.isNVPTX () &&
4738
4751
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch (Arch))) {
4739
- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4740
- << " CUDA" << ArchStr;
4752
+ if (SpecificToolchain)
4753
+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4754
+ << " CUDA" << ArchStr;
4741
4755
return StringRef ();
4742
- } else if (!SuppressError && Triple.isAMDGPU () &&
4756
+ } else if (Triple.isAMDGPU () &&
4743
4757
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch (Arch))) {
4744
- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4745
- << " HIP" << ArchStr;
4758
+ if (SpecificToolchain)
4759
+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4760
+ << " HIP" << ArchStr;
4746
4761
return StringRef ();
4747
4762
}
4748
4763
@@ -4751,13 +4766,9 @@ static StringRef getCanonicalArchString(Compilation &C,
4751
4766
4752
4767
if (IsAMDOffloadArch (Arch)) {
4753
4768
llvm::StringMap<bool > Features;
4754
- auto HIPTriple = getHIPOffloadTargetTriple (C.getDriver (), C.getInputArgs ());
4755
- if (!HIPTriple)
4756
- return StringRef ();
4757
- auto Arch = parseTargetID (*HIPTriple, ArchStr, &Features);
4769
+ std::optional<StringRef> Arch = parseTargetID (Triple, ArchStr, &Features);
4758
4770
if (!Arch) {
4759
4771
C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4760
- C.setContainsError ();
4761
4772
return StringRef ();
4762
4773
}
4763
4774
return Args.MakeArgStringRef (getCanonicalTargetID (*Arch, Features));
@@ -4780,10 +4791,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
4780
4791
return getConflictTargetIDCombination (ArchSet);
4781
4792
}
4782
4793
4783
- llvm::DenseSet <StringRef>
4794
+ llvm::SmallVector <StringRef>
4784
4795
Driver::getOffloadArchs (Compilation &C, const llvm::opt::DerivedArgList &Args,
4785
4796
Action::OffloadKind Kind, const ToolChain *TC,
4786
- bool SuppressError ) const {
4797
+ bool SpecificToolchain ) const {
4787
4798
if (!TC)
4788
4799
TC = &C.getDefaultToolChain ();
4789
4800
@@ -4798,14 +4809,11 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4798
4809
: " --no-offload-arch" );
4799
4810
}
4800
4811
4801
- if (KnownArchs.contains (TC))
4802
- return KnownArchs.lookup (TC);
4803
-
4804
4812
llvm::DenseSet<StringRef> Archs;
4805
4813
4806
4814
if (!TC->getTargetID ().empty ()) {
4807
4815
Archs.insert (TC->getTargetID ());
4808
- return Archs ;
4816
+ return llvm::SmallVector<StringRef>() ;
4809
4817
}
4810
4818
4811
4819
for (auto *Arg : C.getArgsForToolChain (TC, /* BoundArch=*/ " " , Kind)) {
@@ -4816,7 +4824,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4816
4824
if (Arch == " native" || Arch.empty ()) {
4817
4825
auto GPUsOrErr = TC->getSystemGPUArchs (Args);
4818
4826
if (!GPUsOrErr) {
4819
- if (SuppressError )
4827
+ if (!SpecificToolchain )
4820
4828
llvm::consumeError (GPUsOrErr.takeError ());
4821
4829
else
4822
4830
TC->getDriver ().Diag (diag::err_drv_undetermined_gpu_arch)
@@ -4826,16 +4834,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4826
4834
}
4827
4835
4828
4836
for (auto ArchStr : *GPUsOrErr) {
4829
- Archs. insert (
4837
+ StringRef CanonicalStr =
4830
4838
getCanonicalArchString (C, Args, Args.MakeArgString (ArchStr),
4831
- TC->getTriple (), SuppressError));
4839
+ TC->getTriple (), SpecificToolchain);
4840
+ if (!CanonicalStr.empty ())
4841
+ Archs.insert (CanonicalStr);
4842
+ else if (SpecificToolchain)
4843
+ return llvm::SmallVector<StringRef>();
4832
4844
}
4833
4845
} else {
4834
- StringRef ArchStr = getCanonicalArchString (
4835
- C, Args, Arch, TC->getTriple (), SuppressError);
4836
- if (ArchStr.empty ())
4837
- return Archs;
4838
- Archs.insert (ArchStr);
4846
+ StringRef CanonicalStr = getCanonicalArchString (
4847
+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4848
+ if (!CanonicalStr.empty ())
4849
+ Archs.insert (CanonicalStr);
4850
+ else if (SpecificToolchain)
4851
+ return llvm::SmallVector<StringRef>();
4839
4852
}
4840
4853
}
4841
4854
} else if (Arg->getOption ().matches (options::OPT_no_offload_arch_EQ)) {
@@ -4844,27 +4857,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4844
4857
Archs.clear ();
4845
4858
} else {
4846
4859
StringRef ArchStr = getCanonicalArchString (
4847
- C, Args, Arch, TC->getTriple (), SuppressError);
4848
- if (ArchStr.empty ())
4849
- return Archs;
4860
+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4850
4861
Archs.erase (ArchStr);
4851
4862
}
4852
4863
}
4853
4864
}
4854
4865
}
4855
4866
4856
4867
if (auto ConflictingArchs =
4857
- getConflictOffloadArchCombination (Archs, TC->getTriple ())) {
4868
+ getConflictOffloadArchCombination (Archs, TC->getTriple ()))
4858
4869
C.getDriver ().Diag (clang::diag::err_drv_bad_offload_arch_combo)
4859
4870
<< ConflictingArchs->first << ConflictingArchs->second ;
4860
- C.setContainsError ();
4861
- }
4862
4871
4863
4872
// Skip filling defaults if we're just querying what is availible.
4864
- if (SuppressError)
4865
- return Archs;
4866
-
4867
- if (Archs.empty ()) {
4873
+ if (SpecificToolchain && Archs.empty ()) {
4868
4874
if (Kind == Action::OFK_Cuda) {
4869
4875
Archs.insert (OffloadArchToString (OffloadArch::CudaDefault));
4870
4876
} else if (Kind == Action::OFK_HIP) {
@@ -4890,12 +4896,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4890
4896
}
4891
4897
}
4892
4898
}
4893
- } else {
4894
- Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4895
- Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
4896
4899
}
4900
+ Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4901
+ Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
4897
4902
4898
- return Archs;
4903
+ SmallVector<StringRef> Sorted (Archs.begin (), Archs.end ());
4904
+ llvm::sort (Sorted);
4905
+ return Sorted;
4899
4906
}
4900
4907
4901
4908
Action *Driver::BuildOffloadingActions (Compilation &C,
@@ -4959,10 +4966,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
4959
4966
// Get the product of all bound architectures and toolchains.
4960
4967
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
4961
4968
for (const ToolChain *TC : ToolChains) {
4962
- llvm::DenseSet<StringRef> Arches = getOffloadArchs (C, Args, Kind, TC);
4963
- SmallVector<StringRef, 0 > Sorted (Arches.begin (), Arches.end ());
4964
- llvm::sort (Sorted);
4965
- for (StringRef Arch : Sorted) {
4969
+ for (StringRef Arch : OffloadArchs.lookup (TC)) {
4966
4970
TCAndArchs.push_back (std::make_pair (TC, Arch));
4967
4971
DeviceActions.push_back (
4968
4972
C.MakeAction <InputAction>(*InputArg, InputType, CUID));
0 commit comments