Skip to content

Commit f6e93b8

Browse files
authored
AMDGPU: Minor improvement and cleanup for waterfall loop generation (#111886)
First, ReadlanePieces should be in the scope of each MachineOperand. It is not correct if we declare in a outer scope without clearing after the use for a MachineOperand. Additionally, we do not need the OrigBB argyment for emitLoadScalarOpsFromVGPRLoop, since MachineFunction (the only use) can be obtained from LoopBB (or BodyBB).
1 parent 1252623 commit f6e93b8

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6302,11 +6302,14 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
63026302
// Emit the actual waterfall loop, executing the wrapped instruction for each
63036303
// unique value of \p ScalarOps across all lanes. In the best case we execute 1
63046304
// iteration, in the worst case we execute 64 (once per lane).
6305-
static void emitLoadScalarOpsFromVGPRLoop(
6306-
const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB,
6307-
MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL,
6308-
ArrayRef<MachineOperand *> ScalarOps) {
6309-
MachineFunction &MF = *OrigBB.getParent();
6305+
static void
6306+
emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII,
6307+
MachineRegisterInfo &MRI,
6308+
MachineBasicBlock &LoopBB,
6309+
MachineBasicBlock &BodyBB,
6310+
const DebugLoc &DL,
6311+
ArrayRef<MachineOperand *> ScalarOps) {
6312+
MachineFunction &MF = *LoopBB.getParent();
63106313
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
63116314
const SIRegisterInfo *TRI = ST.getRegisterInfo();
63126315
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -6319,8 +6322,6 @@ static void emitLoadScalarOpsFromVGPRLoop(
63196322
const auto *BoolXExecRC = TRI->getWaveMaskRegClass();
63206323

63216324
MachineBasicBlock::iterator I = LoopBB.begin();
6322-
6323-
SmallVector<Register, 8> ReadlanePieces;
63246325
Register CondReg;
63256326

63266327
for (MachineOperand *ScalarOp : ScalarOps) {
@@ -6355,6 +6356,7 @@ static void emitLoadScalarOpsFromVGPRLoop(
63556356
ScalarOp->setReg(CurReg);
63566357
ScalarOp->setIsKill();
63576358
} else {
6359+
SmallVector<Register, 8> ReadlanePieces;
63586360
unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef());
63596361
assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
63606362
"Unhandled register size");
@@ -6535,7 +6537,7 @@ loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
65356537
}
65366538
}
65376539

6538-
emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps);
6540+
emitLoadScalarOpsFromVGPRLoop(TII, MRI, *LoopBB, *BodyBB, DL, ScalarOps);
65396541

65406542
MachineBasicBlock::iterator First = RemainderBB->begin();
65416543
// Restore SCC

0 commit comments

Comments
 (0)