-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][Driver][HIP] Add support for mixing AMDGCNSPIRV & concrete offload-arch
s.
#113509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: Alex Voicu (AlexVlx) ChangesThis removes the temporary ban on mixing AMDGCN flavoured SPIR-V and concrete targets (e.g. Full diff: https://github.com/llvm/llvm-project/pull/113509.diff 5 Files Affected:
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 9878a9dad78d40..1590fbad5e4949 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -149,13 +149,9 @@ static std::optional<llvm::Triple>
getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
if (!Args.hasArg(options::OPT_offload_EQ)) {
auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
- if (llvm::is_contained(OffloadArchs, "amdgcnspirv")) {
- if (OffloadArchs.size() == 1)
- return llvm::Triple("spirv64-amd-amdhsa");
- // Mixing specific & SPIR-V compilation is not supported for now.
- D.Diag(diag::err_drv_only_one_offload_target_supported);
- return std::nullopt;
- }
+ if (llvm::is_contained(OffloadArchs, "amdgcnspirv") &&
+ OffloadArchs.size() == 1)
+ return llvm::Triple("spirv64-amd-amdhsa");
return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
}
auto TT = getOffloadTargetTriple(D, Args);
@@ -3477,9 +3473,10 @@ class OffloadingActionBuilder final {
llvm::StringMap<bool> Features;
// getHIPOffloadTargetTriple() is known to return valid value as it has
// been called successfully in the CreateOffloadingDeviceToolChains().
- auto ArchStr = parseTargetID(
- *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), IdStr,
- &Features);
+ auto T = (IdStr == "amdgcnspirv") ?
+ llvm::Triple("spirv64-amd-amdhsa") :
+ *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
+ auto ArchStr = parseTargetID(T, IdStr, &Features);
if (!ArchStr) {
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
C.setContainsError();
@@ -5750,7 +5747,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
// We only have to generate a prefix for the host if this is not a top-level
// action.
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
- A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
+ A->getOffloadingDeviceKind(), EffectiveTriple.normalize(),
/*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
!(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
AtTopLevel));
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 4df31770950858..b3c2fe84e177c8 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -1095,6 +1095,12 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
}
case llvm::Triple::aarch64_32:
return getTripleString();
+ case llvm::Triple::amdgcn: {
+ llvm::Triple Triple = getTriple();
+ if (Args.getLastArgValue(options::OPT_mcpu_EQ) == "amdgcnspirv")
+ Triple.setArch(llvm::Triple::ArchType::spirv64);
+ return Triple.getTriple();
+ }
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index bae05cc0bb7353..4eb8c4f58923fd 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -205,7 +205,7 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (JA.getType() == types::TY_LLVM_BC)
return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
- if (getToolChain().getTriple().isSPIRV())
+ if (getToolChain().getEffectiveTriple().isSPIRV())
return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args);
return constructLldCommand(C, JA, Inputs, Output, Args);
@@ -264,12 +264,14 @@ void HIPAMDToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
- // For SPIR-V we embed the command-line into the generated binary, in order to
- // retrieve it at JIT time and be able to do target specific compilation with
- // options that match the user-supplied ones.
- if (getTriple().isSPIRV() &&
- !DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
- CC1Args.push_back("-fembed-bitcode=marker");
+ if (getEffectiveTriple().isSPIRV()) {
+ // For SPIR-V we embed the command-line into the generated binary, in order
+ // to retrieve it at JIT time and be able to do target specific compilation
+ // with options that match the user-supplied ones.
+ if (!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
+ CC1Args.push_back("-fembed-bitcode=marker");
+ return; // No DeviceLibs for SPIR-V.
+ }
for (auto BCFile : getDeviceLibs(DriverArgs)) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
@@ -361,8 +363,7 @@ llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib) ||
- (getTriple().getArch() == llvm::Triple::spirv64 &&
- getTriple().getVendor() == llvm::Triple::AMD))
+ getGPUArch(DriverArgs) == "amdgcnspirv")
return {};
ArgStringList LibraryPaths;
@@ -437,8 +438,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
void HIPAMDToolChain::checkTargetID(
const llvm::opt::ArgList &DriverArgs) const {
auto PTID = getParsedTargetID(DriverArgs);
- if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
+ if (PTID.OptionalTargetID && !PTID.OptionalGPUArch &&
+ PTID.OptionalTargetID != "amdgcnspirv")
getDriver().Diag(clang::diag::err_drv_bad_target_id)
<< *PTID.OptionalTargetID;
- }
}
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 9fe4f1e0e20965..c8075cbfe36b35 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -304,10 +304,14 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
for (const auto &II : Inputs) {
const auto *A = II.getAction();
auto ArchStr = llvm::StringRef(A->getOffloadingArch());
- BundlerTargetArg +=
- "," + OffloadKind + "-" + normalizeForBundler(TT, !ArchStr.empty());
+ BundlerTargetArg += ',' + OffloadKind + '-';
+ if (ArchStr == "amdgcnspirv")
+ BundlerTargetArg +=
+ normalizeForBundler(llvm::Triple("spirv64-amd-amdhsa"), true);
+ else
+ BundlerTargetArg += normalizeForBundler(TT, !ArchStr.empty());
if (!ArchStr.empty())
- BundlerTargetArg += "-" + ArchStr.str();
+ BundlerTargetArg += '-' + ArchStr.str();
}
BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 0cdc82ead65408..047617350222d9 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -36,6 +36,11 @@
// RUN: %t/a.o %t/b.o \
// RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s
+// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: --offload-arch=amdgcnspirv --offload-arch=gfx900 \
+// RUN: %s -nogpuinc -nogpulib \
+// RUN: 2>&1 | FileCheck -check-prefixes=AMDGCNSPIRV %s
+
//
// Compile device code in a.cu to code object for gfx803.
//
@@ -177,3 +182,16 @@
// LKONLY-NOT: {{".*/llc"}}
// LKONLY: [[LD:".*ld.*"]] {{.*}} "{{.*/a.o}}" "{{.*/b.o}}"
// LKONLY-NOT: "-T" "{{.*}}.lk"
+
+//
+// Check mixed AMDGCNSPIRV and concrete GPU arch.
+//
+
+// AMDGCNSPIRV: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-obj" {{.*}} "-o" "[[AMDGCNSPV_OBJ:.*o]]"
+// AMDGCNSPIRV: {{".*llvm-link"}} "-o" "[[AMDGCNSPV_TMP:.*out]]" "[[AMDGCNSPV_OBJ]]"
+// AMDGCNSPIRV: {{".*llvm-spirv"}} "--spirv-max-version=1.6" "--spirv-ext=+all" {{.*}} "[[AMDGCNSPV_TMP]]" {{.*}}"-o" "[[AMDGCNSPV_CO:.*out]]"
+// AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" {{.*}}"-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]"
+// AMDGCNSPIRV: {{".*lld"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_OBJ]]"
+// AMDGCNSPIRV: {{".*clang-offload-bundler"}} "-type=o"
+// AMDGCNSPIRV-SAME: "-targets={{.*}}hipv4-spirv64-amd-amdhsa--amdgcnspirv,hipv4-amdgcn-amd-amdhsa--gfx900"
+// AMDGCNSPIRV-SAME: "-input=[[AMDGCNSPV_CO]]" "-input=[[GFX900_CO]]"
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…v_and_concrete_are_friends
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, I'm thinking that SPIR-V here fits better as a triple. I've been wanting to generalize the --offload
option to behave more like -fopenmp-targets
. I.e. we would have --offload=spirv64-amd-amdhsa,amdgcn-amd-amdhsa -Xarch_device_amdgcn-amd-amdhsa --offload-arch=gfx1030,gfx90a,gfx942
for a really verbose example. I'd like a separate option that simply targets the toolchain and then one that creates the BoundArch
s. If it's not set then we default to some value (whatever amdgpu-arch says?).
if (getTriple().isSPIRV() && | ||
!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker)) | ||
CC1Args.push_back("-fembed-bitcode=marker"); | ||
if (getEffectiveTriple().isSPIRV()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I remember seeing an existing way to do this for LTO or something but I don't remember.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, I'm thinking that SPIR-V here fits better as a triple. I've been wanting to generalize the
--offload
option to behave more like-fopenmp-targets
. I.e. we would have--offload=spirv64-amd-amdhsa,amdgcn-amd-amdhsa -Xarch_device_amdgcn-amd-amdhsa --offload-arch=gfx1030,gfx90a,gfx942
for a really verbose example. I'd like a separate option that simply targets the toolchain and then one that creates theBoundArch
s. If it's not set then we default to some value (whatever amdgpu-arch says?).
We don't use offload
at the moment, that's for HIPSPV. Of course, future, fancy work is more than welcome, but this merely slots into the existing infra and current use cases.
I'm not a huge fan of smuggling what is essentially a Toolchain behind what was intended as a |
It's how it works today, I believe: |
It's pretty ambiguous, right now it mostly works through a combination of the file type and good guessing because the targets people care about now have distinct names. For OpenMP, you can offload to x64 CPUs or whatever and then it doesn't work via I don't think this is a blocker, but I think it bears deep consideration on how we expect these toolchains to actually work. |
Gentle ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this is good for now, but we really need to work with how to actually handle multiple toolchains at once.
…ffload-arch`s. (llvm#113509) This removes the temporary ban on mixing AMDGCN flavoured SPIR-V and concrete targets (e.g. `gfx900`) in the same HIPAMD compilation. This is done primarily by tweaking the effective / observable triple when the target is `amdgcnspirv`, which seamlessly composes with the existing infra. The test is stolen from llvm#75357.
Adds the following patches AMDGPU: Remove wavefrontsize64 feature from dummy target llvm#117410 [LLVM][NFC] Use used's element type if available llvm#116804 [llvm][AMDGPU] Fold llvm.amdgcn.wavefrontsize early llvm#114481 [clang][Driver][HIP] Add support for mixing AMDGCNSPIRV & concrete offload-archs. llvm#113509 [clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V llvm#110695 [llvm][opt][Transforms] Replacement calloc should match replaced malloc llvm#110524 [clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV llvm#110447 [cuda][HIP] constant should imply constant llvm#110182 [llvm][SPIRV] Expose fast popcnt support for SPIR-V targets llvm#109845 [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface llvm#109415 [SPIRV][RFC] Rework / extend support for memory scopes llvm#106429 [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. llvm#102776 Change-Id: I2b9ab54aba1c9345b9b0eb84409e6ed6c3cdb6cd
This removes the temporary ban on mixing AMDGCN flavoured SPIR-V and concrete targets (e.g.
gfx900
) in the same HIPAMD compilation. This is done primarily by tweaking the effective / observable triple when the target isamdgcnspirv
, which seamlessly composes with the existing infra. The test is stolen from #75357.