Skip to content

[AMDGPU] CodeGen for GFX12 VBUFFER instructions #75492

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def gi_global_saddr :
def gi_mubuf_scratch_offset :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
GIComplexPatternEquiv<MUBUFScratchOffset>;

def gi_buf_soffset :
GIComplexOperandMatcher<s32, "selectBUFSOffset">,
GIComplexPatternEquiv<BUFSOffset>;

def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
Expand Down
35 changes: 27 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
SOffset = Subtarget->hasRestrictedSOffset()
? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
: CurDAG->getTargetConstant(0, DL, MVT::i32);

ConstantSDNode *C1 = nullptr;
SDValue N0 = Addr;
Expand Down Expand Up @@ -1374,7 +1376,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
return true;
}

if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
// Legal offset for instruction.
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
Expand Down Expand Up @@ -1448,7 +1451,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
SDValue HighBits =
CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
Expand Down Expand Up @@ -1482,8 +1485,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
// MUBUF vaddr, but not on older subtargets which can only do this if the
// sign bit is known 0.
const SIInstrInfo *TII = Subtarget->getInstrInfo();
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
if (TII->isLegalMUBUFImmOffset(C1->getZExtValue()) &&
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
CurDAG->SignBitIsZero(N0))) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
Expand Down Expand Up @@ -1515,6 +1519,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &Offset) const {
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
const SIInstrInfo *TII = Subtarget->getInstrInfo();
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc DL(Addr);
Expand All @@ -1531,14 +1536,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
if (Addr.getOpcode() == ISD::ADD) {
// Add (CopyFromReg <sgpr>) <constant>
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
return false;
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
return false;

SOffset = Addr.getOperand(0);
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
// <constant>
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
} else {
Expand All @@ -1555,8 +1560,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset
) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
const SIInstrInfo *TII = Subtarget->getInstrInfo();

if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
Expand All @@ -1577,6 +1581,21 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}

bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
SDValue &SOffset) const {
if (Subtarget->hasRestrictedSOffset()) {
if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
if (SOffsetConst->isZero()) {
SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
return true;
}
}
}

SOffset = ByteOffsetNode;
return true;
}

// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {

bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;

bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, uint64_t FlatVariant) const;
Expand Down
40 changes: 32 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3229,6 +3229,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
}

bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
assert(!AMDGPU::isGFX12Plus(STI));
unsigned Opc;
unsigned Size = MI.getOperand(3).getImm();

Expand Down Expand Up @@ -3295,8 +3296,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
MIB.add(MI.getOperand(5 + OpOffset)); // soffset
MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
MIB.addImm((Aux >> 3) & 1); // swz
MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz

MachineMemOperand *LoadMMO = *MI.memoperands_begin();
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
Expand Down Expand Up @@ -4436,7 +4437,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {

// TODO: Should this be inside the render function? The iterator seems to
// move.
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
.addImm(Offset & ~MaxOffset);
Expand Down Expand Up @@ -4468,7 +4469,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
if (ConstOffset != 0) {
if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
KB->signBitIsZero(PtrBase))) {
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
Expand Down Expand Up @@ -4547,6 +4548,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
if (isNoUnsignedWrap(AddrMI))
return true;

// Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
// values.
if (AMDGPU::isGFX12Plus(STI))
return true;

Register LHS = AddrMI->getOperand(1).getReg();
Register RHS = AddrMI->getOperand(2).getReg();

Expand Down Expand Up @@ -4651,7 +4657,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
if (mi_match(Reg, *MRI,
m_GPtrAdd(m_Reg(BasePtr),
m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
if (!TII.isLegalMUBUFImmOffset(Offset))
return {};
MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
Register WaveBase = getWaveAddress(BasePtrDef);
Expand All @@ -4670,7 +4676,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
}

if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
!TII.isLegalMUBUFImmOffset(Offset))
return {};

return {{
Expand Down Expand Up @@ -4913,7 +4919,7 @@ bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
/// component.
void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
if (TII.isLegalMUBUFImmOffset(ImmOffset))
return;

// Illegal offset, store it in soffset.
Expand Down Expand Up @@ -5022,6 +5028,8 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
else if (STI.hasRestrictedSOffset())
MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
Expand Down Expand Up @@ -5050,6 +5058,8 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // soffset
if (SOffset)
MIB.addReg(SOffset);
else if (STI.hasRestrictedSOffset())
MIB.addReg(AMDGPU::SGPR_NULL);
else
MIB.addImm(0);
},
Expand All @@ -5060,6 +5070,17 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
}};
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {

Register SOffset = Root.getReg();

if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
SOffset = AMDGPU::SGPR_NULL;

return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
}

/// Get an immediate that must be 32-bits, and treated as zero extended.
static std::optional<uint64_t>
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
Expand Down Expand Up @@ -5463,7 +5484,10 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
const bool Swizzle = MI.getOperand(OpIdx).getImm() &
(AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
: AMDGPU::CPol::SWZ_pregfx12);
MIB.addImm(Swizzle);
}

void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg,
Register &SOffset, int64_t &Offset) const;

InstructionSelector::ComplexRendererFns
selectBUFSOffset(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectMUBUFAddr64(MachineOperand &Root) const;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5412,7 +5412,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1784,7 +1784,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(Subtarget);
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
Expand Down
Loading