Skip to content

Commit 5879162

Browse files
authored
[AMDGPU] CodeGen for GFX12 VBUFFER instructions (#75492)
1 parent f5e48fe commit 5879162

File tree

61 files changed

+16712
-4601
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+16712
-4601
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ def gi_global_saddr :
105105
def gi_mubuf_scratch_offset :
106106
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
107107
GIComplexPatternEquiv<MUBUFScratchOffset>;
108+
109+
def gi_buf_soffset :
110+
GIComplexOperandMatcher<s32, "selectBUFSOffset">,
111+
GIComplexPatternEquiv<BUFSOffset>;
112+
108113
def gi_mubuf_scratch_offen :
109114
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
110115
GIComplexPatternEquiv<MUBUFScratchOffen>;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,7 +1319,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
13191319
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13201320
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13211321
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1322-
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1322+
SOffset = Subtarget->hasRestrictedSOffset()
1323+
? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1324+
: CurDAG->getTargetConstant(0, DL, MVT::i32);
13231325

13241326
ConstantSDNode *C1 = nullptr;
13251327
SDValue N0 = Addr;
@@ -1374,7 +1376,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
13741376
return true;
13751377
}
13761378

1377-
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1379+
const SIInstrInfo *TII = Subtarget->getInstrInfo();
1380+
if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
13781381
// Legal offset for instruction.
13791382
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
13801383
return true;
@@ -1448,7 +1451,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
14481451
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
14491452
// Don't fold null pointer.
14501453
if (Imm != NullPtr) {
1451-
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
1454+
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
14521455
SDValue HighBits =
14531456
CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
14541457
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
@@ -1482,8 +1485,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
14821485
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
14831486
// MUBUF vaddr, but not on older subtargets which can only do this if the
14841487
// sign bit is known 0.
1488+
const SIInstrInfo *TII = Subtarget->getInstrInfo();
14851489
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1486-
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1490+
if (TII->isLegalMUBUFImmOffset(C1->getZExtValue()) &&
14871491
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
14881492
CurDAG->SignBitIsZero(N0))) {
14891493
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
@@ -1515,6 +1519,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15151519
SDValue &Offset) const {
15161520
const SIRegisterInfo *TRI =
15171521
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
1522+
const SIInstrInfo *TII = Subtarget->getInstrInfo();
15181523
MachineFunction &MF = CurDAG->getMachineFunction();
15191524
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
15201525
SDLoc DL(Addr);
@@ -1531,14 +1536,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15311536
if (Addr.getOpcode() == ISD::ADD) {
15321537
// Add (CopyFromReg <sgpr>) <constant>
15331538
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1534-
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1539+
if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
15351540
return false;
15361541
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
15371542
return false;
15381543

15391544
SOffset = Addr.getOperand(0);
15401545
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1541-
SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1546+
TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
15421547
// <constant>
15431548
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
15441549
} else {
@@ -1555,8 +1560,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
15551560
SDValue &SOffset, SDValue &Offset
15561561
) const {
15571562
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1558-
const SIInstrInfo *TII =
1559-
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1563+
const SIInstrInfo *TII = Subtarget->getInstrInfo();
15601564

15611565
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
15621566
return false;
@@ -1577,6 +1581,21 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
15771581
return false;
15781582
}
15791583

1584+
bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
1585+
SDValue &SOffset) const {
1586+
if (Subtarget->hasRestrictedSOffset()) {
1587+
if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
1588+
if (SOffsetConst->isZero()) {
1589+
SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1590+
return true;
1591+
}
1592+
}
1593+
}
1594+
1595+
SOffset = ByteOffsetNode;
1596+
return true;
1597+
}
1598+
15801599
// Find a load or store from corresponding pattern root.
15811600
// Roots may be build_vector, bitconvert or their combinations.
15821601
static MemSDNode* findMemSDNode(SDNode *N) {

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
179179

180180
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
181181
SDValue &Offset) const;
182+
bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
182183

183184
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
184185
SDValue &Offset, uint64_t FlatVariant) const;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3229,6 +3229,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
32293229
}
32303230

32313231
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
3232+
assert(!AMDGPU::isGFX12Plus(STI));
32323233
unsigned Opc;
32333234
unsigned Size = MI.getOperand(3).getImm();
32343235

@@ -3295,8 +3296,8 @@ bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
32953296
MIB.add(MI.getOperand(5 + OpOffset)); // soffset
32963297
MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
32973298
unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
3298-
MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
3299-
MIB.addImm((Aux >> 3) & 1); // swz
3299+
MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol
3300+
MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz
33003301

33013302
MachineMemOperand *LoadMMO = *MI.memoperands_begin();
33023303
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
@@ -4436,7 +4437,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
44364437

44374438
// TODO: Should this be inside the render function? The iterator seems to
44384439
// move.
4439-
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
4440+
const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
44404441
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
44414442
HighBits)
44424443
.addImm(Offset & ~MaxOffset);
@@ -4468,7 +4469,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
44684469
int64_t ConstOffset;
44694470
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
44704471
if (ConstOffset != 0) {
4471-
if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
4472+
if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
44724473
(!STI.privateMemoryResourceIsRangeChecked() ||
44734474
KB->signBitIsZero(PtrBase))) {
44744475
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
@@ -4547,6 +4548,11 @@ bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
45474548
if (isNoUnsignedWrap(AddrMI))
45484549
return true;
45494550

4551+
// Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
4552+
// values.
4553+
if (AMDGPU::isGFX12Plus(STI))
4554+
return true;
4555+
45504556
Register LHS = AddrMI->getOperand(1).getReg();
45514557
Register RHS = AddrMI->getOperand(2).getReg();
45524558

@@ -4651,7 +4657,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
46514657
if (mi_match(Reg, *MRI,
46524658
m_GPtrAdd(m_Reg(BasePtr),
46534659
m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
4654-
if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
4660+
if (!TII.isLegalMUBUFImmOffset(Offset))
46554661
return {};
46564662
MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
46574663
Register WaveBase = getWaveAddress(BasePtrDef);
@@ -4670,7 +4676,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
46704676
}
46714677

46724678
if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
4673-
!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
4679+
!TII.isLegalMUBUFImmOffset(Offset))
46744680
return {};
46754681

46764682
return {{
@@ -4913,7 +4919,7 @@ bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
49134919
/// component.
49144920
void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
49154921
MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
4916-
if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
4922+
if (TII.isLegalMUBUFImmOffset(ImmOffset))
49174923
return;
49184924

49194925
// Illegal offset, store it in soffset.
@@ -5022,6 +5028,8 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
50225028
[=](MachineInstrBuilder &MIB) { // soffset
50235029
if (SOffset)
50245030
MIB.addReg(SOffset);
5031+
else if (STI.hasRestrictedSOffset())
5032+
MIB.addReg(AMDGPU::SGPR_NULL);
50255033
else
50265034
MIB.addImm(0);
50275035
},
@@ -5050,6 +5058,8 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
50505058
[=](MachineInstrBuilder &MIB) { // soffset
50515059
if (SOffset)
50525060
MIB.addReg(SOffset);
5061+
else if (STI.hasRestrictedSOffset())
5062+
MIB.addReg(AMDGPU::SGPR_NULL);
50535063
else
50545064
MIB.addImm(0);
50555065
},
@@ -5060,6 +5070,17 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
50605070
}};
50615071
}
50625072

5073+
InstructionSelector::ComplexRendererFns
5074+
AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
5075+
5076+
Register SOffset = Root.getReg();
5077+
5078+
if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
5079+
SOffset = AMDGPU::SGPR_NULL;
5080+
5081+
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
5082+
}
5083+
50635084
/// Get an immediate that must be 32-bits, and treated as zero extended.
50645085
static std::optional<uint64_t>
50655086
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
@@ -5463,7 +5484,10 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
54635484
const MachineInstr &MI,
54645485
int OpIdx) const {
54655486
assert(OpIdx >= 0 && "expected to match an immediate operand");
5466-
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
5487+
const bool Swizzle = MI.getOperand(OpIdx).getImm() &
5488+
(AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
5489+
: AMDGPU::CPol::SWZ_pregfx12);
5490+
MIB.addImm(Swizzle);
54675491
}
54685492

54695493
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
292292
bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg,
293293
Register &SOffset, int64_t &Offset) const;
294294

295+
InstructionSelector::ComplexRendererFns
296+
selectBUFSOffset(MachineOperand &Root) const;
297+
295298
InstructionSelector::ComplexRendererFns
296299
selectMUBUFAddr64(MachineOperand &Root) const;
297300

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5412,7 +5412,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
54125412
std::pair<Register, unsigned>
54135413
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
54145414
Register OrigOffset) const {
5415-
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
5415+
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(ST);
54165416
Register BaseReg;
54175417
unsigned ImmOffset;
54185418
const LLT S32 = LLT::scalar(32);

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1784,7 +1784,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
17841784
std::pair<Register, unsigned>
17851785
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
17861786
Register OrigOffset) const {
1787-
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
1787+
const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset(Subtarget);
17881788
Register BaseReg;
17891789
unsigned ImmOffset;
17901790
const LLT S32 = LLT::scalar(32);

0 commit comments

Comments
 (0)