@@ -988,6 +988,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
988
988
if (CudaInstallation.isValid ())
989
989
CudaInstallation.WarnIfUnsupportedVersion ();
990
990
C.addOffloadDeviceToolChain (&TC, Action::OFK_Cuda);
991
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_Cuda, &TC,
992
+ /* SpecificToolchain=*/ true );
991
993
} else if (IsHIP && !UseLLVMOffload) {
992
994
if (auto *OMPTargetArg =
993
995
C.getInputArgs ().getLastArg (options::OPT_fopenmp_targets_EQ)) {
@@ -1004,6 +1006,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1004
1006
getOffloadToolChain (C.getInputArgs (), Action::OFK_HIP, *HIPTriple,
1005
1007
C.getDefaultToolChain ().getTriple ());
1006
1008
C.addOffloadDeviceToolChain (&TC, Action::OFK_HIP);
1009
+
1010
+ // TODO: Fix 'amdgcnspirv' handling with the new driver.
1011
+ if (C.getInputArgs ().hasFlag (options::OPT_offload_new_driver,
1012
+ options::OPT_no_offload_new_driver, false ))
1013
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_HIP, &TC,
1014
+ /* SpecificToolchain=*/ true );
1007
1015
}
1008
1016
1009
1017
if (IsCuda || IsHIP)
@@ -1069,40 +1077,43 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1069
1077
auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1070
1078
C.getDefaultToolChain ().getTriple ());
1071
1079
C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1080
+ OffloadArchs[&TC] =
1081
+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1082
+ /* SpecificToolchain=*/ true );
1072
1083
}
1073
1084
} else if (C.getInputArgs ().hasArg (options::OPT_offload_arch_EQ) &&
1074
1085
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
1075
1086
llvm::Triple AMDTriple (" amdgcn-amd-amdhsa" );
1076
1087
llvm::Triple NVPTXTriple (" nvptx64-nvidia-cuda" );
1077
1088
1078
- // Attempt to deduce the offloading triple from the set of architectures.
1079
- // We can only correctly deduce NVPTX / AMDGPU triples currently.
1080
- for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1081
- auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1082
- C.getDefaultToolChain ().getTriple ());
1083
-
1084
- llvm::DenseSet<StringRef> Archs =
1085
- getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC, true );
1086
- llvm::DenseSet<StringRef> ArchsForTarget;
1087
- for (StringRef Arch : Archs) {
1089
+ for (StringRef A :
1090
+ C.getInputArgs ().getAllArgValues (options::OPT_offload_arch_EQ)) {
1091
+ for (StringRef Arch : llvm::split (A, " ," )) {
1088
1092
bool IsNVPTX = IsNVIDIAOffloadArch (
1089
1093
StringToOffloadArch (getProcessorFromTargetID (NVPTXTriple, Arch)));
1090
1094
bool IsAMDGPU = IsAMDOffloadArch (
1091
1095
StringToOffloadArch (getProcessorFromTargetID (AMDTriple, Arch)));
1092
- if (!IsNVPTX && !IsAMDGPU && !Arch.equals_insensitive (" native" )) {
1096
+ if (!IsNVPTX && !IsAMDGPU && !Arch.empty () &&
1097
+ !Arch.equals_insensitive (" native" )) {
1093
1098
Diag (clang::diag::err_drv_failed_to_deduce_target_from_arch)
1094
1099
<< Arch;
1095
1100
return ;
1096
1101
}
1097
-
1098
- if (TT.isNVPTX () && IsNVPTX)
1099
- ArchsForTarget.insert (Arch);
1100
- else if (TT.isAMDGPU () && IsAMDGPU)
1101
- ArchsForTarget.insert (Arch);
1102
1102
}
1103
- if (!ArchsForTarget.empty ()) {
1103
+ }
1104
+
1105
+ // Attempt to deduce the offloading triple from the set of architectures.
1106
+ // We can only correctly deduce NVPTX / AMDGPU triples currently.
1107
+ for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
1108
+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_OpenMP, TT,
1109
+ C.getDefaultToolChain ().getTriple ());
1110
+
1111
+ llvm::SmallVector<StringRef> Archs =
1112
+ getOffloadArchs (C, C.getArgs (), Action::OFK_OpenMP, &TC,
1113
+ /* SpecificToolchain=*/ false );
1114
+ if (!Archs.empty ()) {
1104
1115
C.addOffloadDeviceToolChain (&TC, Action::OFK_OpenMP);
1105
- KnownArchs [&TC] = ArchsForTarget ;
1116
+ OffloadArchs [&TC] = Archs ;
1106
1117
}
1107
1118
}
1108
1119
@@ -1143,9 +1154,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
1143
1154
// going to create will depend on both.
1144
1155
const ToolChain *HostTC = C.getSingleOffloadToolChain <Action::OFK_Host>();
1145
1156
for (const auto &TT : UniqueSYCLTriplesVec) {
1146
- auto SYCLTC = &getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1147
- HostTC->getTriple ());
1148
- C.addOffloadDeviceToolChain (SYCLTC, Action::OFK_SYCL);
1157
+ auto &TC = getOffloadToolChain (C.getInputArgs (), Action::OFK_SYCL, TT,
1158
+ HostTC->getTriple ());
1159
+ C.addOffloadDeviceToolChain (&TC, Action::OFK_SYCL);
1160
+ OffloadArchs[&TC] = getOffloadArchs (C, C.getArgs (), Action::OFK_SYCL, &TC,
1161
+ /* SpecificToolchain=*/ true );
1149
1162
}
1150
1163
}
1151
1164
@@ -4703,20 +4716,22 @@ static StringRef getCanonicalArchString(Compilation &C,
4703
4716
const llvm::opt::DerivedArgList &Args,
4704
4717
StringRef ArchStr,
4705
4718
const llvm::Triple &Triple,
4706
- bool SuppressError = false ) {
4719
+ bool SpecificToolchain ) {
4707
4720
// Lookup the CUDA / HIP architecture string. Only report an error if we were
4708
4721
// expecting the triple to be only NVPTX / AMDGPU.
4709
4722
OffloadArch Arch =
4710
4723
StringToOffloadArch (getProcessorFromTargetID (Triple, ArchStr));
4711
- if (!SuppressError && Triple.isNVPTX () &&
4724
+ if (Triple.isNVPTX () &&
4712
4725
(Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch (Arch))) {
4713
- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4714
- << " CUDA" << ArchStr;
4726
+ if (SpecificToolchain)
4727
+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4728
+ << " CUDA" << ArchStr;
4715
4729
return StringRef ();
4716
- } else if (!SuppressError && Triple.isAMDGPU () &&
4730
+ } else if (Triple.isAMDGPU () &&
4717
4731
(Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch (Arch))) {
4718
- C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4719
- << " HIP" << ArchStr;
4732
+ if (SpecificToolchain)
4733
+ C.getDriver ().Diag (clang::diag::err_drv_offload_bad_gpu_arch)
4734
+ << " HIP" << ArchStr;
4720
4735
return StringRef ();
4721
4736
}
4722
4737
@@ -4725,13 +4740,9 @@ static StringRef getCanonicalArchString(Compilation &C,
4725
4740
4726
4741
if (IsAMDOffloadArch (Arch)) {
4727
4742
llvm::StringMap<bool > Features;
4728
- auto HIPTriple = getHIPOffloadTargetTriple (C.getDriver (), C.getInputArgs ());
4729
- if (!HIPTriple)
4730
- return StringRef ();
4731
- auto Arch = parseTargetID (*HIPTriple, ArchStr, &Features);
4743
+ std::optional<StringRef> Arch = parseTargetID (Triple, ArchStr, &Features);
4732
4744
if (!Arch) {
4733
4745
C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4734
- C.setContainsError ();
4735
4746
return StringRef ();
4736
4747
}
4737
4748
return Args.MakeArgStringRef (getCanonicalTargetID (*Arch, Features));
@@ -4754,10 +4765,10 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
4754
4765
return getConflictTargetIDCombination (ArchSet);
4755
4766
}
4756
4767
4757
- llvm::DenseSet <StringRef>
4768
+ llvm::SmallVector <StringRef>
4758
4769
Driver::getOffloadArchs (Compilation &C, const llvm::opt::DerivedArgList &Args,
4759
4770
Action::OffloadKind Kind, const ToolChain *TC,
4760
- bool SuppressError ) const {
4771
+ bool SpecificToolchain ) const {
4761
4772
if (!TC)
4762
4773
TC = &C.getDefaultToolChain ();
4763
4774
@@ -4772,9 +4783,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4772
4783
: " --no-offload-arch" );
4773
4784
}
4774
4785
4775
- if (KnownArchs.contains (TC))
4776
- return KnownArchs.lookup (TC);
4777
-
4778
4786
llvm::DenseSet<StringRef> Archs;
4779
4787
for (auto *Arg : C.getArgsForToolChain (TC, /* BoundArch=*/ " " , Kind)) {
4780
4788
// Add or remove the seen architectures in order of appearance. If an
@@ -4784,7 +4792,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4784
4792
if (Arch == " native" || Arch.empty ()) {
4785
4793
auto GPUsOrErr = TC->getSystemGPUArchs (Args);
4786
4794
if (!GPUsOrErr) {
4787
- if (SuppressError )
4795
+ if (!SpecificToolchain )
4788
4796
llvm::consumeError (GPUsOrErr.takeError ());
4789
4797
else
4790
4798
TC->getDriver ().Diag (diag::err_drv_undetermined_gpu_arch)
@@ -4794,16 +4802,21 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4794
4802
}
4795
4803
4796
4804
for (auto ArchStr : *GPUsOrErr) {
4797
- Archs. insert (
4805
+ StringRef CanonicalStr =
4798
4806
getCanonicalArchString (C, Args, Args.MakeArgString (ArchStr),
4799
- TC->getTriple (), SuppressError));
4807
+ TC->getTriple (), SpecificToolchain);
4808
+ if (!CanonicalStr.empty ())
4809
+ Archs.insert (CanonicalStr);
4810
+ else if (SpecificToolchain)
4811
+ return llvm::SmallVector<StringRef>();
4800
4812
}
4801
4813
} else {
4802
- StringRef ArchStr = getCanonicalArchString (
4803
- C, Args, Arch, TC->getTriple (), SuppressError);
4804
- if (ArchStr.empty ())
4805
- return Archs;
4806
- Archs.insert (ArchStr);
4814
+ StringRef CanonicalStr = getCanonicalArchString (
4815
+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4816
+ if (!CanonicalStr.empty ())
4817
+ Archs.insert (CanonicalStr);
4818
+ else if (SpecificToolchain)
4819
+ return llvm::SmallVector<StringRef>();
4807
4820
}
4808
4821
}
4809
4822
} else if (Arg->getOption ().matches (options::OPT_no_offload_arch_EQ)) {
@@ -4812,27 +4825,20 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4812
4825
Archs.clear ();
4813
4826
} else {
4814
4827
StringRef ArchStr = getCanonicalArchString (
4815
- C, Args, Arch, TC->getTriple (), SuppressError);
4816
- if (ArchStr.empty ())
4817
- return Archs;
4828
+ C, Args, Arch, TC->getTriple (), SpecificToolchain);
4818
4829
Archs.erase (ArchStr);
4819
4830
}
4820
4831
}
4821
4832
}
4822
4833
}
4823
4834
4824
4835
if (auto ConflictingArchs =
4825
- getConflictOffloadArchCombination (Archs, TC->getTriple ())) {
4836
+ getConflictOffloadArchCombination (Archs, TC->getTriple ()))
4826
4837
C.getDriver ().Diag (clang::diag::err_drv_bad_offload_arch_combo)
4827
4838
<< ConflictingArchs->first << ConflictingArchs->second ;
4828
- C.setContainsError ();
4829
- }
4830
4839
4831
4840
// Skip filling defaults if we're just querying what is availible.
4832
- if (SuppressError)
4833
- return Archs;
4834
-
4835
- if (Archs.empty ()) {
4841
+ if (SpecificToolchain && Archs.empty ()) {
4836
4842
if (Kind == Action::OFK_Cuda) {
4837
4843
Archs.insert (OffloadArchToString (OffloadArch::CudaDefault));
4838
4844
} else if (Kind == Action::OFK_HIP) {
@@ -4858,12 +4864,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
4858
4864
}
4859
4865
}
4860
4866
}
4861
- } else {
4862
- Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4863
- Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
4864
4867
}
4868
+ Args.ClaimAllArgs (options::OPT_offload_arch_EQ);
4869
+ Args.ClaimAllArgs (options::OPT_no_offload_arch_EQ);
4865
4870
4866
- return Archs;
4871
+ SmallVector<StringRef> Sorted (Archs.begin (), Archs.end ());
4872
+ llvm::sort (Sorted);
4873
+ return Sorted;
4867
4874
}
4868
4875
4869
4876
Action *Driver::BuildOffloadingActions (Compilation &C,
@@ -4927,10 +4934,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
4927
4934
// Get the product of all bound architectures and toolchains.
4928
4935
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
4929
4936
for (const ToolChain *TC : ToolChains) {
4930
- llvm::DenseSet<StringRef> Arches = getOffloadArchs (C, Args, Kind, TC);
4931
- SmallVector<StringRef, 0 > Sorted (Arches.begin (), Arches.end ());
4932
- llvm::sort (Sorted);
4933
- for (StringRef Arch : Sorted) {
4937
+ for (StringRef Arch : OffloadArchs.lookup (TC)) {
4934
4938
TCAndArchs.push_back (std::make_pair (TC, Arch));
4935
4939
DeviceActions.push_back (
4936
4940
C.MakeAction <InputAction>(*InputArg, InputType, CUID));
0 commit comments