Skip to content

Commit 740313c

Browse files
AlexVlxAlexisPerry
authored andcommitted
[clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (llvm#95061)
This patch augments the HIPAMD driver to allow it to target AMDGCN flavoured SPIR-V compilation. It's mostly straightforward, as we re-use some of the existing SPIRV infra, however there are a few notable additions: - we introduce an `amdgcnspirv` offload arch, rather than relying on using `generic` (this is already fairly overloaded) or simply using `spirv` or `spirv64` (we'll want to use these to denote unflavoured SPIRV, once we bring up that capability) - initially it is won't be possible to mix-in SPIR-V and concrete AMDGPU targets, as it would require some relatively intrusive surgery in the HIPAMD Toolchain and the Driver to deal with two triples (`spirv64-amd-amdhsa` and `amdgcn-amd-amdhsa`, respectively) - in order to retain user provided compiler flags and have them available at JIT time, we rely on embedding the command line via `-fembed-bitcode=marker`, which the bitcode writer had previously not implemented for SPIRV; we only allow it conditionally for AMDGCN flavoured SPIRV, and it is handled correctly by the Translator (it ends up as a string literal) Once the SPIRV BE is no longer experimental we'll switch to using that rather than the translator. There's some additional work that'll come via a separate PR around correctly piping through AMDGCN's implementation of `printf`, for now we merely handle its flags correctly.
1 parent ed81db5 commit 740313c

File tree

13 files changed

+87
-11
lines changed

13 files changed

+87
-11
lines changed

clang/include/clang/Basic/Cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ enum class CudaArch {
128128
GFX12_GENERIC,
129129
GFX1200,
130130
GFX1201,
131+
AMDGCNSPIRV,
131132
Generic, // A processor model named 'generic' if the target backend defines a
132133
// public one.
133134
LAST,

clang/lib/Basic/Cuda.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ static const CudaArchToStringMap arch_names[] = {
148148
{CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"},
149149
GFX(1200), // gfx1200
150150
GFX(1201), // gfx1201
151+
{CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"},
151152
{CudaArch::Generic, "generic", ""},
152153
// clang-format on
153154
};

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
232232
case CudaArch::GFX12_GENERIC:
233233
case CudaArch::GFX1200:
234234
case CudaArch::GFX1201:
235+
case CudaArch::AMDGCNSPIRV:
235236
case CudaArch::Generic:
236237
case CudaArch::LAST:
237238
break;

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3541,6 +3541,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
35413541
case CudaArch::GFX12_GENERIC:
35423542
case CudaArch::GFX1200:
35433543
case CudaArch::GFX1201:
3544+
case CudaArch::AMDGCNSPIRV:
35443545
case CudaArch::Generic:
35453546
case CudaArch::UNUSED:
35463547
case CudaArch::UNKNOWN:

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,8 @@ void CodeGenModule::Release() {
907907
if (Context.getTargetInfo().getTriple().isWasm())
908908
EmitMainVoidAlias();
909909

910-
if (getTriple().isAMDGPU()) {
910+
if (getTriple().isAMDGPU() ||
911+
(getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) {
911912
// Emit amdhsa_code_object_version module flag, which is code object version
912913
// times 100.
913914
if (getTarget().getTargetOpts().CodeObjectVersion !=

clang/lib/Driver/Driver.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,14 @@ getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args,
147147
static std::optional<llvm::Triple>
148148
getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
149149
if (!Args.hasArg(options::OPT_offload_EQ)) {
150+
auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
151+
if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) {
152+
if (OffloadArchs.size() == 1)
153+
return llvm::Triple("spirv64-amd-amdhsa");
154+
// Mixing specific & SPIR-V compilation is not supported for now.
155+
D.Diag(diag::err_drv_only_one_offload_target_supported);
156+
return std::nullopt;
157+
}
150158
return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
151159
}
152160
auto TT = getOffloadTargetTriple(D, Args);
@@ -3231,10 +3239,14 @@ class OffloadingActionBuilder final {
32313239
// supported GPUs. sm_20 code should work correctly, if
32323240
// suboptimally, on all newer GPUs.
32333241
if (GpuArchList.empty()) {
3234-
if (ToolChains.front()->getTriple().isSPIRV())
3235-
GpuArchList.push_back(CudaArch::Generic);
3236-
else
3242+
if (ToolChains.front()->getTriple().isSPIRV()) {
3243+
if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD)
3244+
GpuArchList.push_back(CudaArch::AMDGCNSPIRV);
3245+
else
3246+
GpuArchList.push_back(CudaArch::Generic);
3247+
} else {
32373248
GpuArchList.push_back(DefaultCudaArch);
3249+
}
32383250
}
32393251

32403252
return Error;
@@ -6501,9 +6513,11 @@ const ToolChain &Driver::getOffloadingDeviceToolChain(
65016513
// things.
65026514
switch (TargetDeviceOffloadKind) {
65036515
case Action::OFK_HIP: {
6504-
if (Target.getArch() == llvm::Triple::amdgcn &&
6505-
Target.getVendor() == llvm::Triple::AMD &&
6506-
Target.getOS() == llvm::Triple::AMDHSA)
6516+
if (((Target.getArch() == llvm::Triple::amdgcn ||
6517+
Target.getArch() == llvm::Triple::spirv64) &&
6518+
Target.getVendor() == llvm::Triple::AMD &&
6519+
Target.getOS() == llvm::Triple::AMDHSA) ||
6520+
!Args.hasArgNoClaim(options::OPT_offload_EQ))
65076521
TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target,
65086522
HostTC, Args);
65096523
else if (Target.getArch() == llvm::Triple::spirv64 &&

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4939,7 +4939,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
49394939
CmdArgs.push_back(Args.MakeArgString(NormalizedTriple));
49404940

49414941
if (JA.isDeviceOffloading(Action::OFK_HIP) &&
4942-
getToolChain().getTriple().isAMDGPU()) {
4942+
(getToolChain().getTriple().isAMDGPU() ||
4943+
(getToolChain().getTriple().isSPIRV() &&
4944+
getToolChain().getTriple().getVendor() == llvm::Triple::AMD))) {
49434945
// Device side compilation printf
49444946
if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
49454947
CmdArgs.push_back(Args.MakeArgString(

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "AMDGPU.h"
1111
#include "CommonArgs.h"
1212
#include "HIPUtility.h"
13+
#include "SPIRV.h"
1314
#include "clang/Basic/Cuda.h"
1415
#include "clang/Basic/TargetID.h"
1516
#include "clang/Driver/Compilation.h"
@@ -193,6 +194,33 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
193194
Lld, LldArgs, Inputs, Output));
194195
}
195196

197+
// For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode
198+
// and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It
199+
// calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will
200+
// be promoted from experimental, we will switch to using that. TODO: consider
201+
// if we want to run any targeted optimisations over IR here, over generic
202+
// SPIR-V.
203+
void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
204+
Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
205+
const InputInfo &Output, const llvm::opt::ArgList &Args) const {
206+
assert(!Inputs.empty() && "Must have at least one input.");
207+
208+
constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
209+
210+
// Linked BC is now in Output
211+
212+
// Emit SPIR-V binary.
213+
llvm::opt::ArgStringList TrArgs{
214+
"--spirv-max-version=1.6",
215+
"--spirv-ext=+all",
216+
"--spirv-allow-extra-diexpressions",
217+
"--spirv-allow-unknown-intrinsics",
218+
"--spirv-lower-const-expr",
219+
"--spirv-preserve-auxdata",
220+
"--spirv-debug-info-version=nonsemantic-shader-200"};
221+
SPIRV::constructTranslateCommand(C, *this, JA, Output, Output, TrArgs);
222+
}
223+
196224
// For amdgcn the inputs of the linker job are device bitcode and output is
197225
// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
198226
// llc, then lld steps.
@@ -214,6 +242,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
214242
if (JA.getType() == types::TY_LLVM_BC)
215243
return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
216244

245+
if (getToolChain().getTriple().isSPIRV())
246+
return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args);
247+
217248
return constructLldCommand(C, JA, Inputs, Output, Args);
218249
}
219250

@@ -270,6 +301,13 @@ void HIPAMDToolChain::addClangTargetOptions(
270301
CC1Args.push_back("-fapply-global-visibility-to-externs");
271302
}
272303

304+
// For SPIR-V we embed the command-line into the generated binary, in order to
305+
// retrieve it at JIT time and be able to do target specific compilation with
306+
// options that match the user-supplied ones.
307+
if (getTriple().isSPIRV() &&
308+
!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
309+
CC1Args.push_back("-fembed-bitcode=marker");
310+
273311
for (auto BCFile : getDeviceLibs(DriverArgs)) {
274312
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
275313
: "-mlink-bitcode-file");
@@ -303,7 +341,8 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
303341
}
304342

305343
Tool *HIPAMDToolChain::buildLinker() const {
306-
assert(getTriple().getArch() == llvm::Triple::amdgcn);
344+
assert(getTriple().getArch() == llvm::Triple::amdgcn ||
345+
getTriple().getArch() == llvm::Triple::spirv64);
307346
return new tools::AMDGCN::Linker(*this);
308347
}
309348

@@ -358,7 +397,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
358397
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
359398
HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
360399
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
361-
if (DriverArgs.hasArg(options::OPT_nogpulib))
400+
if (DriverArgs.hasArg(options::OPT_nogpulib) ||
401+
(getTriple().getArch() == llvm::Triple::spirv64 &&
402+
getTriple().getVendor() == llvm::Triple::AMD))
362403
return {};
363404
ArgStringList LibraryPaths;
364405

clang/lib/Driver/ToolChains/HIPAMD.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class LLVM_LIBRARY_VISIBILITY Linker final : public Tool {
4040
const InputInfoList &Inputs,
4141
const InputInfo &Output,
4242
const llvm::opt::ArgList &Args) const;
43+
void constructLinkAndEmitSpirvCommand(Compilation &C, const JobAction &JA,
44+
const InputInfoList &Inputs,
45+
const InputInfo &Output,
46+
const llvm::opt::ArgList &Args) const;
4347
};
4448

4549
} // end namespace AMDGCN

clang/test/Driver/cuda-arch-translation.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@
5959
// RUN: | FileCheck -check-prefixes=HIP,GFX900 %s
6060
// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 -nogpuinc -nogpulib %s 2>&1 \
6161
// RUN: | FileCheck -check-prefixes=HIP,GFX902 %s
62+
// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=amdgcnspirv -nogpuinc -nogpulib %s 2>&1 \
63+
// RUN: | FileCheck -check-prefixes=HIP,SPIRV %s
6264

6365
// CUDA: ptxas
6466
// CUDA-SAME: -m64
@@ -95,3 +97,4 @@
9597
// GFX810:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx810
9698
// GFX900:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx900
9799
// GFX902:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx902
100+
// SPIRV:-targets=host-x86_64-unknown-linux,hip-spirv64-amd-amdhsa--amdgcnspirv

clang/test/Frontend/embed-bitcode.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
; RUN: %clang_cc1 -triple aarch64 -emit-llvm \
1111
; RUN: -fembed-bitcode=all -x ir %s -o - \
1212
; RUN: | FileCheck %s -check-prefix=CHECK-ELF
13+
; RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm \
14+
; RUN: -fembed-bitcode=all -x ir %s -o - \
15+
; RUN: | FileCheck %s -check-prefix=CHECK-ELF
1316

1417
; check .bc input
1518
; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \

clang/test/Misc/target-invalid-cpu-note.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
3131
// NVPTX: error: unknown target CPU 'not-a-cpu'
32-
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201{{$}}
32+
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}}
3333

3434
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
3535
// R600: error: unknown target CPU 'not-a-cpu'

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5309,6 +5309,8 @@ static const char *getSectionNameForBitcode(const Triple &T) {
53095309
llvm_unreachable("GOFF is not yet implemented");
53105310
break;
53115311
case Triple::SPIRV:
5312+
if (T.getVendor() == Triple::AMD)
5313+
return ".llvmbc";
53125314
llvm_unreachable("SPIRV is not yet implemented");
53135315
break;
53145316
case Triple::XCOFF:
@@ -5334,6 +5336,8 @@ static const char *getSectionNameForCommandline(const Triple &T) {
53345336
llvm_unreachable("GOFF is not yet implemented");
53355337
break;
53365338
case Triple::SPIRV:
5339+
if (T.getVendor() == Triple::AMD)
5340+
return ".llvmcmd";
53375341
llvm_unreachable("SPIRV is not yet implemented");
53385342
break;
53395343
case Triple::XCOFF:

0 commit comments

Comments
 (0)