Skip to content

[AMDGPU] Add an asm directive to track code_object_version #76267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15428,6 +15428,14 @@ command-line options such as ``-triple``, ``-mcpu``, and
The target ID syntax used for code object V2 to V3 for this directive differs
from that used elsewhere. See :ref:`amdgpu-target-id-v2-v3`.

.. _amdgpu-assembler-directive-amdhsa-code-object-version:

.amdhsa_code_object_version <version>
+++++++++++++++++++++++++++++++++++++

Optional directive which declares the code object version to be generated by the
assembler. If not present, a default value will be used.

.amdhsa_kernel <name>
+++++++++++++++++++++

Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/MC/MCObjectWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ class MCObjectWriter {
/// ELF only. Mark that we have seen GNU ABI usage (e.g. SHF_GNU_RETAIN).
virtual void markGnuAbi() {}

/// ELF only, override the default ABIVersion in the ELF header.
virtual void setOverrideABIVersion(uint8_t ABIVersion) {}

/// Tell the object writer to emit an address-significance table during
/// writeObject(). If this function is not called, all symbols are treated as
/// address-significant.
Expand Down
10 changes: 2 additions & 8 deletions llvm/include/llvm/Support/AMDGPUMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ namespace AMDGPU {
//===----------------------------------------------------------------------===//
namespace HSAMD {

/// HSA metadata major version for code object V2.
constexpr uint32_t VersionMajorV2 = 1;
/// HSA metadata minor version for code object V2.
constexpr uint32_t VersionMinorV2 = 0;

/// HSA metadata major version for code object V3.
constexpr uint32_t VersionMajorV3 = 1;
/// HSA metadata minor version for code object V3.
Expand All @@ -49,10 +44,9 @@ constexpr uint32_t VersionMajorV5 = 1;
/// HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV5 = 2;

/// HSA metadata beginning assembler directive.
/// Old HSA metadata beginning assembler directive for V2. This is only used for
/// diagnostics now.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
/// HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveEnd[] = ".end_amd_amdgpu_hsa_metadata";

/// Access qualifiers.
enum class AccessQualifier : uint8_t {
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/MC/ELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ class ELFObjectWriter : public MCObjectWriter {

bool SeenGnuAbi = false;

std::optional<uint8_t> OverrideABIVersion;

bool hasRelocationAddend() const;

bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCValue &Val,
Expand All @@ -238,6 +240,7 @@ class ELFObjectWriter : public MCObjectWriter {

void reset() override {
SeenGnuAbi = false;
OverrideABIVersion.reset();
Relocations.clear();
Renames.clear();
MCObjectWriter::reset();
Expand All @@ -264,6 +267,10 @@ class ELFObjectWriter : public MCObjectWriter {
void markGnuAbi() override { SeenGnuAbi = true; }
bool seenGnuAbi() const { return SeenGnuAbi; }

bool seenOverrideABIVersion() const { return OverrideABIVersion.has_value(); }
uint8_t getOverrideABIVersion() const { return OverrideABIVersion.value(); }
void setOverrideABIVersion(uint8_t V) override { OverrideABIVersion = V; }

friend struct ELFWriter;
};

Expand Down Expand Up @@ -417,7 +424,9 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
? int(ELF::ELFOSABI_GNU)
: OSABI);
// e_ident[EI_ABIVERSION]
W.OS << char(OWriter.TargetObjectWriter->getABIVersion());
W.OS << char(OWriter.seenOverrideABIVersion()
? OWriter.getOverrideABIVersion()
: OWriter.TargetObjectWriter->getABIVersion());

W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD);

Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,11 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {

getTargetStreamer()->EmitDirectiveAMDGCNTarget();

if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
getTargetStreamer()->EmitDirectiveAMDHSACodeObjectVersion(
CodeObjectVersion);
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
}

if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
Expand Down Expand Up @@ -230,8 +233,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
IsaInfo::getNumExtraSGPRs(
&STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
CodeObjectVersion);
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);

Streamer.popSection();
}
Expand Down Expand Up @@ -323,7 +325,7 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
}

bool AMDGPUAsmPrinter::doInitialization(Module &M) {
CodeObjectVersion = AMDGPU::getCodeObjectVersion(M);
CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);

if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
Expand Down Expand Up @@ -631,8 +633,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
// In the beginning all features are either 'Any' or 'NotSupported',
// depending on global target features. This will cover empty modules.
getTargetStreamer()->initializeTargetID(
*getGlobalSTI(), getGlobalSTI()->getFeatureString(), CodeObjectVersion);
getTargetStreamer()->initializeTargetID(*getGlobalSTI(),
getGlobalSTI()->getFeatureString());

// If module is empty, we are done.
if (M.empty())
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ class AMDGPUInformationCache : public InformationCache {
BumpPtrAllocator &Allocator,
SetVector<Function *> *CGSCC, TargetMachine &TM)
: InformationCache(M, AG, Allocator, CGSCC), TM(TM),
CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {}
CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}

TargetMachine &TM;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,

const Module *M = MF.getFunction().getParent();
if (UserSGPRInfo.hasQueuePtr() &&
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,8 @@ void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
Func.getCallingConv() != CallingConv::SPIR_KERNEL)
return;

auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent());
auto CodeObjectVersion =
AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);

auto Kernels =
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2139,7 +2139,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
// For code object version 5, private_base and shared_base are passed through
// implicit kernargs.
if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
AMDGPU::AMDHSA_COV5) {
AMDGPUTargetLowering::ImplicitParameter Param =
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
Expand Down Expand Up @@ -6582,7 +6582,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(

Register SGPR01(AMDGPU::SGPR0_SGPR1);
// For code object version 5, queue_ptr is passed through implicit kernarg.
if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
AMDGPU::AMDHSA_COV5) {
AMDGPUTargetLowering::ImplicitParameter Param =
AMDGPUTargetLowering::QUEUE_PTR;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
// TargetPassConfig for subtarget.
bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
bool MadeChange = false;
bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
bool IsV5OrAbove =
AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove);

if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
Expand Down Expand Up @@ -356,7 +357,7 @@ ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
PreservedAnalyses
AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
bool IsV5OrAbove =
AMDGPU::getCodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5;
AMDGPU::getAMDHSACodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5;
Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove);

if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {

// By default, for code object v5 and later, track only the minimum scratch
// size
if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
if (AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
STI.getTargetTriple().getOS() == Triple::AMDPAL) {
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
AssumedStackSizeForDynamicSizeObjects = 0;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
// Assume all implicit inputs are used by default
const Module *M = F.getParent();
unsigned NBytes =
AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;
AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;
return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
NBytes);
}
Expand Down
91 changes: 13 additions & 78 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,10 +1303,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
unsigned NextFreeSGPR, SMRange SGPRRange,
unsigned &VGPRBlocks, unsigned &SGPRBlocks);
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
Expand Down Expand Up @@ -5133,20 +5131,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
return false;
}

bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
uint32_t &Minor) {
if (ParseAsAbsoluteExpression(Major))
return TokError("invalid major version");

if (!trySkipToken(AsmToken::Comma))
return TokError("minor version number required, comma expected");

if (ParseAsAbsoluteExpression(Minor))
return TokError("invalid minor version");

return false;
}

bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
return TokError("directive only supported for amdgcn architecture");
Expand Down Expand Up @@ -5612,63 +5596,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
}
}

getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
return false;
}

bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
uint32_t Major;
uint32_t Minor;

if (ParseDirectiveMajorMinor(Major, Minor))
return true;

getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
NextFreeVGPR, NextFreeSGPR,
ReserveVCC, ReserveFlatScr);
return false;
}

bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
StringRef VendorName;
StringRef ArchName;

// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (isToken(AsmToken::EndOfStatement)) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
return false;
}

if (ParseDirectiveMajorMinor(Major, Minor))
return true;

if (!trySkipToken(AsmToken::Comma))
return TokError("stepping version number required, comma expected");

if (ParseAsAbsoluteExpression(Stepping))
return TokError("invalid stepping version");

if (!trySkipToken(AsmToken::Comma))
return TokError("vendor name required, comma expected");

if (!parseString(VendorName, "invalid vendor name"))
return true;

if (!trySkipToken(AsmToken::Comma))
return TokError("arch name required, comma expected");

if (!parseString(ArchName, "invalid arch name"))
bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
uint32_t Version;
if (ParseAsAbsoluteExpression(Version))
return true;

getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
VendorName, ArchName);
getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
return false;
}

Expand Down Expand Up @@ -5955,16 +5894,13 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();

if (IDVal == ".amdhsa_code_object_version")
return ParseDirectiveAMDHSACodeObjectVersion();

// TODO: Restructure/combine with PAL metadata directive.
if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
return ParseDirectiveHSAMetadata();
} else {
if (IDVal == ".hsa_code_object_version")
return ParseDirectiveHSACodeObjectVersion();

if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();

if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();

Expand Down Expand Up @@ -8137,9 +8073,8 @@ void AMDGPUAsmParser::onBeginOfFile() {
return;

if (!getTargetStreamer().getTargetID())
getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
// TODO: Should try to check code object version from directive???
AMDGPU::getAmdhsaCodeObjectVersion());
getTargetStreamer().initializeTargetID(getSTI(),
getSTI().getFeatureString());

if (isHsaAbi(getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2184,7 +2184,8 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
}

if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
// FIXME: We should be looking at the ELF header ABI version for this.
if (AMDGPU::getDefaultAMDHSACodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);

Expand Down
14 changes: 5 additions & 9 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,11 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
bool Is64Bit;
bool HasRelocationAddend;
uint8_t OSABI = ELF::ELFOSABI_NONE;
uint8_t ABIVersion = 0;

public:
ELFAMDGPUAsmBackend(const Target &T, const Triple &TT, uint8_t ABIVersion) :
AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
HasRelocationAddend(TT.getOS() == Triple::AMDHSA),
ABIVersion(ABIVersion) {
ELFAMDGPUAsmBackend(const Target &T, const Triple &TT)
: AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
HasRelocationAddend(TT.getOS() == Triple::AMDHSA) {
switch (TT.getOS()) {
case Triple::AMDHSA:
OSABI = ELF::ELFOSABI_AMDGPU_HSA;
Expand All @@ -256,8 +254,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {

std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend,
ABIVersion);
return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend);
}
};

Expand All @@ -267,6 +264,5 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
getHsaAbiVersion(&STI).value_or(0));
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
}
Loading