Skip to content

Commit b78b36e

Browse files
cdevadasYashwant Singh
authored andcommitted
[AMDGPU] Implement whole wave register spill
To reduce the register pressure during allocation, when the allocator spills a virtual register that corresponds to a whole wave mode operation, the spill loads and restores should be activated for all lanes by temporarily flipping all bits in exec register to one just before the spills. It is not implemented in the compiler as of today and this patch enables the necessary support. This is a pre-patch before the SGPR spill to virtual VGPR lanes that would eventually causes the whole wave register spills during allocation. Reviewed By: arsenm, cdevadas Differential Revision: https://reviews.llvm.org/D143759
1 parent 564ff8f commit b78b36e

38 files changed

+1008
-781
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class MachineRegisterInfo {
5757
virtual ~Delegate() = default;
5858

5959
virtual void MRI_NoteNewVirtualRegister(Register Reg) = 0;
60-
virtual void MRI_NotecloneVirtualRegister(Register NewReg,
60+
virtual void MRI_NoteCloneVirtualRegister(Register NewReg,
6161
Register SrcReg) {
6262
MRI_NoteNewVirtualRegister(NewReg);
6363
}
@@ -181,7 +181,7 @@ class MachineRegisterInfo {
181181

182182
void noteCloneVirtualRegister(Register NewReg, Register SrcReg) {
183183
for (auto *TheDelegate : TheDelegates)
184-
TheDelegate->MRI_NotecloneVirtualRegister(NewReg, SrcReg);
184+
TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
185185
}
186186

187187
//===--------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,6 +1427,12 @@ TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
14271427
return new GCNPassConfig(*this, PM);
14281428
}
14291429

1430+
void GCNTargetMachine::registerMachineRegisterInfoCallback(
1431+
MachineFunction &MF) const {
1432+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1433+
MF.getRegInfo().addDelegate(MFI);
1434+
}
1435+
14301436
MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
14311437
BumpPtrAllocator &Allocator, const Function &F,
14321438
const TargetSubtargetInfo *STI) const {
@@ -1481,6 +1487,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
14811487
if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
14821488
return true;
14831489

1490+
if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))
1491+
return true;
1492+
14841493
if (parseOptionalRegister(YamlMFI.LongBranchReservedReg,
14851494
MFI->LongBranchReservedReg))
14861495
return true;

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ class GCNTargetMachine final : public AMDGPUTargetMachine {
9292
return true;
9393
}
9494

95+
void registerMachineRegisterInfoCallback(MachineFunction &MF) const override;
96+
9597
MachineFunctionInfo *
9698
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
9799
const TargetSubtargetInfo *STI) const override;

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,17 @@ enum Offset_COV5 : unsigned {
926926
};
927927

928928
} // namespace ImplicitArg
929+
930+
namespace VirtRegFlag {
931+
// Virtual register flags used for various target specific handlings during
932+
// codegen.
933+
enum Register_Flag : uint8_t {
934+
// Register operand in a whole-wave mode operation.
935+
WWM_REG = 1 << 0,
936+
};
937+
938+
} // namespace VirtRegFlag
939+
929940
} // namespace AMDGPU
930941

931942
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,12 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
6464
return MCRegister();
6565
}
6666

67+
/// Query target location for spilling SGPRs
68+
/// \p IncludeScratchCopy : Also look for free scratch SGPRs
6769
static void getVGPRSpillLaneOrTempRegister(
6870
MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
69-
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) {
71+
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
72+
bool IncludeScratchCopy = true) {
7073
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
7174
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
7275

@@ -77,9 +80,12 @@ static void getVGPRSpillLaneOrTempRegister(
7780

7881
// We need to save and restore the given SGPR.
7982

83+
Register ScratchSGPR;
8084
// 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
81-
// should have all the callee saved registers marked as used.
82-
Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
85+
// should have all the callee saved registers marked as used. For certain
86+
// cases we skip copy to scratch SGPR.
87+
if (IncludeScratchCopy)
88+
ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
8389

8490
if (!ScratchSGPR) {
8591
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -935,8 +941,7 @@ void SIFrameLowering::emitCSRSpillStores(
935941
if (!WWMCalleeSavedRegs.empty()) {
936942
if (ScratchExecCopy) {
937943
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
938-
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
939-
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
944+
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
940945
} else {
941946
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
942947
/*IsProlog*/ true,
@@ -948,8 +953,7 @@ void SIFrameLowering::emitCSRSpillStores(
948953
if (ScratchExecCopy) {
949954
// FIXME: Split block and make terminator.
950955
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
951-
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
952-
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
956+
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
953957
.addReg(ScratchExecCopy, RegState::Kill);
954958
LiveRegs.addReg(ScratchExecCopy);
955959
}
@@ -1040,8 +1044,7 @@ void SIFrameLowering::emitCSRSpillRestores(
10401044
if (!WWMCalleeSavedRegs.empty()) {
10411045
if (ScratchExecCopy) {
10421046
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1043-
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1044-
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
1047+
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
10451048
} else {
10461049
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
10471050
/*IsProlog*/ false,
@@ -1053,8 +1056,7 @@ void SIFrameLowering::emitCSRSpillRestores(
10531056
if (ScratchExecCopy) {
10541057
// FIXME: Split block and make terminator.
10551058
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1056-
MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1057-
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1059+
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
10581060
.addReg(ScratchExecCopy, RegState::Kill);
10591061
}
10601062
}
@@ -1463,8 +1465,10 @@ void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
14631465
// The special SGPR spills like the one needed for FP, BP or any reserved
14641466
// registers delayed until frame lowering.
14651467
void SIFrameLowering::determinePrologEpilogSGPRSaves(
1466-
MachineFunction &MF, BitVector &SavedVGPRs) const {
1468+
MachineFunction &MF, BitVector &SavedVGPRs,
1469+
bool NeedExecCopyReservedReg) const {
14671470
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1471+
MachineRegisterInfo &MRI = MF.getRegInfo();
14681472
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
14691473
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
14701474
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -1476,6 +1480,26 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
14761480
for (unsigned I = 0; CSRegs[I]; ++I)
14771481
LiveRegs.addReg(CSRegs[I]);
14781482

1483+
const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
1484+
1485+
if (NeedExecCopyReservedReg) {
1486+
Register ReservedReg = MFI->getSGPRForEXECCopy();
1487+
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
1488+
Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
1489+
if (UnusedScratchReg) {
1490+
// If found any unused scratch SGPR, reserve the register itself for Exec
1491+
// copy and there is no need for any spills in that case.
1492+
MFI->setSGPRForEXECCopy(UnusedScratchReg);
1493+
LiveRegs.addReg(UnusedScratchReg);
1494+
} else {
1495+
// Needs spill.
1496+
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
1497+
"Re-reserving spill slot for EXEC copy register");
1498+
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
1499+
/*IncludeScratchCopy=*/false);
1500+
}
1501+
}
1502+
14791503
// hasFP only knows about stack objects that already exist. We're now
14801504
// determining the stack slots that will be created, so we have to predict
14811505
// them. Stack objects force FP usage with calls.
@@ -1514,6 +1538,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
15141538

15151539
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
15161540
const SIRegisterInfo *TRI = ST.getRegisterInfo();
1541+
const SIInstrInfo *TII = ST.getInstrInfo();
1542+
bool NeedExecCopyReservedReg = false;
15171543

15181544
MachineInstr *ReturnMI = nullptr;
15191545
for (MachineBasicBlock &MBB : MF) {
@@ -1532,6 +1558,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
15321558
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
15331559
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
15341560
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
1561+
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
1562+
NeedExecCopyReservedReg = true;
15351563
else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
15361564
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
15371565
// We expect all return to be the same size.
@@ -1561,7 +1589,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
15611589
if (!ST.hasGFX90AInsts())
15621590
SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
15631591

1564-
determinePrologEpilogSGPRSaves(MF, SavedVGPRs);
1592+
determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
15651593

15661594
// The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
15671595
// allow the default insertion to handle them.

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
3434
RegScavenger *RS = nullptr) const override;
3535
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
3636
RegScavenger *RS = nullptr) const;
37-
void determinePrologEpilogSGPRSaves(MachineFunction &MF,
38-
BitVector &SavedRegs) const;
37+
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs,
38+
bool NeedExecCopyReservedReg) const;
3939
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
4040
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
4141
LivePhysRegs &LiveRegs, Register FrameReg,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13395,6 +13395,15 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
1339513395
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
1339613396
}
1339713397

13398+
// TODO: Move this logic to getReservedRegs()
13399+
// Reserve the SGPR(s) to save/restore EXEC for WWM spill/copy handling.
13400+
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
13401+
Register SReg = ST.isWave32()
13402+
? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1)
13403+
: TRI->getAlignedHighSGPRForRC(MF, /*Align=*/2,
13404+
&AMDGPU::SGPR_64RegClass);
13405+
Info->setSGPRForEXECCopy(SReg);
13406+
1339813407
assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
1339913408
Info->getStackPtrOffsetReg()));
1340013409
if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,6 +1587,30 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
15871587
}
15881588
}
15891589

1590+
static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
1591+
// Currently, there is only 32-bit WWM register spills needed.
1592+
if (Size != 4)
1593+
llvm_unreachable("unknown wwm register spill size");
1594+
1595+
return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1596+
}
1597+
1598+
static unsigned getVectorRegSpillSaveOpcode(Register Reg,
1599+
const TargetRegisterClass *RC,
1600+
unsigned Size,
1601+
const SIRegisterInfo &TRI,
1602+
const SIMachineFunctionInfo &MFI) {
1603+
// Choose the right opcode if spilling a WWM register.
1604+
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
1605+
return getWWMRegSpillSaveOpcode(Size);
1606+
1607+
if (TRI.isVectorSuperClass(RC))
1608+
return getAVSpillSaveOpcode(Size);
1609+
1610+
return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
1611+
: getVGPRSpillSaveOpcode(Size);
1612+
}
1613+
15901614
void SIInstrInfo::storeRegToStackSlot(
15911615
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
15921616
bool isKill, int FrameIndex, const TargetRegisterClass *RC,
@@ -1631,11 +1655,8 @@ void SIInstrInfo::storeRegToStackSlot(
16311655
return;
16321656
}
16331657

1634-
unsigned Opcode = RI.isVectorSuperClass(RC)
1635-
? getAVSpillSaveOpcode(SpillSize)
1636-
: RI.isAGPRClass(RC)
1637-
? getAGPRSpillSaveOpcode(SpillSize)
1638-
: getVGPRSpillSaveOpcode(SpillSize);
1658+
unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC,
1659+
SpillSize, RI, *MFI);
16391660
MFI->setHasSpilledVGPRs();
16401661

16411662
BuildMI(MBB, MI, DL, get(Opcode))
@@ -1786,6 +1807,29 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
17861807
}
17871808
}
17881809

1810+
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
1811+
// Currently, there is only 32-bit WWM register spills needed.
1812+
if (Size != 4)
1813+
llvm_unreachable("unknown wwm register spill size");
1814+
1815+
return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1816+
}
1817+
1818+
static unsigned
1819+
getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
1820+
unsigned Size, const SIRegisterInfo &TRI,
1821+
const SIMachineFunctionInfo &MFI) {
1822+
// Choose the right opcode if restoring a WWM register.
1823+
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
1824+
return getWWMRegSpillRestoreOpcode(Size);
1825+
1826+
if (TRI.isVectorSuperClass(RC))
1827+
return getAVSpillRestoreOpcode(Size);
1828+
1829+
return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
1830+
: getVGPRSpillRestoreOpcode(Size);
1831+
}
1832+
17891833
void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
17901834
MachineBasicBlock::iterator MI,
17911835
Register DestReg, int FrameIndex,
@@ -1829,11 +1873,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
18291873
return;
18301874
}
18311875

1832-
unsigned Opcode = RI.isVectorSuperClass(RC)
1833-
? getAVSpillRestoreOpcode(SpillSize)
1834-
: RI.isAGPRClass(RC)
1835-
? getAGPRSpillRestoreOpcode(SpillSize)
1836-
: getVGPRSpillRestoreOpcode(SpillSize);
1876+
unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
1877+
SpillSize, RI, *MFI);
18371878
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
18381879
.addFrameIndex(FrameIndex) // vaddr
18391880
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
@@ -4924,6 +4965,39 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
49244965
"Unexpected scalar opcode without corresponding vector one!");
49254966
}
49264967

4968+
void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF,
4969+
MachineBasicBlock &MBB,
4970+
MachineBasicBlock::iterator MBBI,
4971+
const DebugLoc &DL, Register Reg,
4972+
bool IsSCCLive) const {
4973+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4974+
const SIInstrInfo *TII = ST.getInstrInfo();
4975+
bool IsWave32 = ST.isWave32();
4976+
if (IsSCCLive) {
4977+
// Insert two move instructions, one to save the original value of EXEC and
4978+
// the other to turn on all bits in EXEC. This is required as we can't use
4979+
// the single instruction S_OR_SAVEEXEC that clobbers SCC.
4980+
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4981+
MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4982+
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg).addReg(Exec, RegState::Kill);
4983+
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
4984+
} else {
4985+
const unsigned OrSaveExec =
4986+
IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
4987+
auto SaveExec =
4988+
BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1);
4989+
SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
4990+
}
4991+
}
4992+
4993+
void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
4994+
MachineBasicBlock::iterator MBBI,
4995+
const DebugLoc &DL, Register Reg) const {
4996+
unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4997+
MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4998+
BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill);
4999+
}
5000+
49275001
static const TargetRegisterClass *
49285002
adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
49295003
const MachineRegisterInfo &MRI,

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
654654
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
655655
}
656656

657+
static bool isWWMRegSpillOpcode(uint16_t Opcode) {
658+
return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
659+
Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
660+
}
661+
657662
static bool isDPP(const MachineInstr &MI) {
658663
return MI.getDesc().TSFlags & SIInstrFlags::DPP;
659664
}
@@ -939,6 +944,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
939944

940945
unsigned getVALUOp(const MachineInstr &MI) const;
941946

947+
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
948+
MachineBasicBlock::iterator MBBI,
949+
const DebugLoc &DL, Register Reg,
950+
bool IsSCCLive) const;
951+
952+
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
953+
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
954+
Register Reg) const;
955+
942956
/// Return the correct register class for \p OpNo. For target-specific
943957
/// instructions, this will return the register class that has been defined
944958
/// in tablegen. For generic instructions, like REG_SEQUENCE it will return

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,9 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
933933
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
934934
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
935935

936+
let isConvergent = 1 in
937+
defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
938+
936939
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
937940
(outs SReg_64:$dst),
938941
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),

0 commit comments

Comments
 (0)