Skip to content

MCExpr-ify amd_kernel_code_t #91587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 15 additions & 17 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "AMDGPU.h"
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPUResourceUsageAnalysis.h"
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
Expand All @@ -29,6 +28,7 @@
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
AMDGPUMCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}

Expand Down Expand Up @@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}

void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
Expand All @@ -1331,24 +1332,22 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();

AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false);

Out.compute_pgm_resource_registers =
CurrentProgramInfo.getComputePGMRSrc1(STM) |
(CurrentProgramInfo.getComputePGMRSrc2() << 32);
Out.compute_pgm_resource1_registers =
CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
Out.compute_pgm_resource2_registers =
CurrentProgramInfo.getComputePGMRSrc2(Ctx);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;

if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;

AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));

const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}

if (UserSGPRInfo.hasDispatchPtr())
Expand All @@ -1374,10 +1373,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,

Align MaxKernArgAlign;
Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
Out.workitem_private_segment_byte_size =
getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;

// kernarg_segment_alignment is specified as log of the alignment.
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#include "SIProgramInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"

struct amd_kernel_code_t;

namespace llvm {

class AMDGPUMachineFunction;
Expand All @@ -29,6 +27,7 @@ class MCOperand;

namespace AMDGPU {
struct MCKernelDescriptor;
struct AMDGPUMCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
Expand All @@ -50,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;

void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;

/// Emit register usage information so that the GPU driver
Expand Down
51 changes: 11 additions & 40 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1340,7 +1340,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
Expand Down Expand Up @@ -5873,7 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
}

bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
AMDGPUMCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
Expand All @@ -5883,25 +5883,13 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,

SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
return TokError(Err.str());
}
Lex();

if (ID == "enable_dx10_clamp") {
if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
}

if (ID == "enable_ieee_mode") {
if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
}

if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
Expand All @@ -5913,41 +5901,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}

if (ID == "wavefront_size") {
if (Header.wavefront_size == 5) {
if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
} else if (Header.wavefront_size == 6) {
} else if (C.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}

if (ID == "enable_wgp_mode") {
if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
}

if (ID == "enable_mem_ordered") {
if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
}

if (ID == "enable_fwd_progress") {
if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
}

return false;
}

bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
AMDGPUMCKernelCodeT KernelCode;
KernelCode.initDefault(&getSTI(), getContext());

while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
Expand All @@ -5961,11 +5931,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
if (ID == ".end_amd_kernel_code_t")
break;

if (ParseAMDKernelCodeTValue(ID, Header))
if (ParseAMDKernelCodeTValue(ID, KernelCode))
return true;
}

getTargetStreamer().EmitAMDKernelCodeT(Header);
KernelCode.validate(&getSTI(), getContext());
getTargetStreamer().EmitAMDKernelCodeT(KernelCode);

return false;
}
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include "AMDGPUTargetStreamer.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
Expand Down Expand Up @@ -240,10 +239,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}

void
AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
dumpAmdKernelCode(&Header, OS, "\t\t");
Header.EmitKernelCodeT(OS, getContext());
OS << "\t.end_amd_kernel_code_t\n";
}

Expand Down Expand Up @@ -789,12 +787,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {

void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}

void
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {

void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
Header.EmitKernelCodeT(OS, getContext());
OS.popSection();
}

Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"

struct amd_kernel_code_t;

namespace llvm {

class MCELFStreamer;
Expand All @@ -23,6 +21,7 @@ class formatted_raw_ostream;

namespace AMDGPU {

struct AMDGPUMCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
Expand Down Expand Up @@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}

virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};

virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};

Expand Down Expand Up @@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {

void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;

void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;

Expand Down Expand Up @@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {

void EmitDirectiveAMDGCNTarget() override;

void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
#define C_00B84C_LDS_SIZE 0xFF007FFF
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
#define C_00B84C_EXCP_EN
#define C_00B84C_EXCP_EN 0x80FFFFFF

#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
Expand Down
45 changes: 22 additions & 23 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
Expand Down Expand Up @@ -1218,39 +1219,37 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
}
} // end namespace IsaInfo

void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());

memset(&Header, 0, sizeof(Header));

Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = Version.Major;
Header.amd_machine_version_minor = Version.Minor;
Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
Header.wavefront_size = 6;
KernelCode.amd_kernel_code_version_major = 1;
KernelCode.amd_kernel_code_version_minor = 2;
KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
KernelCode.amd_machine_version_major = Version.Major;
KernelCode.amd_machine_version_minor = Version.Minor;
KernelCode.amd_machine_version_stepping = Version.Stepping;
KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
KernelCode.wavefront_size = 5;
KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
} else {
KernelCode.wavefront_size = 6;
}

// If the code object does not support indirect functions, then the value must
// be 0xffffffff.
Header.call_convention = -1;
KernelCode.call_convention = -1;

// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
KernelCode.kernarg_segment_alignment = 4;
KernelCode.group_segment_alignment = 4;
KernelCode.private_segment_alignment = 4;

if (Version.Major >= 10) {
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
Header.wavefront_size = 5;
Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
}
Header.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
KernelCode.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class raw_ostream;

namespace AMDGPU {

struct AMDGPUMCKernelCodeT;
struct IsaVersion;

/// Generic target versions emitted by this version of LLVM.
Expand Down Expand Up @@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
LLVM_READONLY
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);

void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
const MCSubtargetInfo *STI);

bool isGroupSegment(const GlobalValue *GV);
Expand Down
Loading
Loading