Skip to content

Commit 0207ca9

Browse files
committed
AMDGPU: Handle folding frame indexes into s_add_i32
This does not yet enable producing direct frame index references in s_add_i32, only the lowering.
1 parent ca40989 commit 0207ca9

File tree

2 files changed

+256
-163
lines changed

2 files changed

+256
-163
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,7 +2268,70 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
22682268
MI->eraseFromParent();
22692269
return true;
22702270
}
2271+
case AMDGPU::S_ADD_I32: {
2272+
// TODO: Handle s_or_b32, s_and_b32.
2273+
MachineOperand &OtherOp = MI->getOperand(FIOperandNum == 1 ? 2 : 1);
22712274

2275+
assert(FrameReg || MFI->isBottomOfStack());
2276+
2277+
MachineOperand &DstOp = MI->getOperand(0);
2278+
const DebugLoc &DL = MI->getDebugLoc();
2279+
Register MaterializedReg = FrameReg;
2280+
2281+
// Defend against live scc, which should never happen in practice.
2282+
bool DeadSCC = MI->getOperand(3).isDead();
2283+
2284+
// Do an in-place scale of the wave offset to the lane offset.
2285+
if (FrameReg && !ST.enableFlatScratch()) {
2286+
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
2287+
.addDef(DstOp.getReg(), RegState::Renamable)
2288+
.addReg(FrameReg)
2289+
.addImm(ST.getWavefrontSizeLog2())
2290+
.setOperandDead(3); // Set SCC dead
2291+
MaterializedReg = DstOp.getReg();
2292+
}
2293+
2294+
// If we can't fold the other operand, do another increment.
2295+
if (!OtherOp.isImm() && MaterializedReg) {
2296+
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2297+
.addDef(DstOp.getReg(), RegState::Renamable)
2298+
.addReg(MaterializedReg)
2299+
.add(OtherOp);
2300+
if (DeadSCC)
2301+
AddI32.setOperandDead(3);
2302+
MaterializedReg = DstOp.getReg();
2303+
}
2304+
2305+
int64_t NewOffset = FrameInfo.getObjectOffset(Index);
2306+
2307+
// For the non-immediate case, we could fall through to the default
2308+
// handling, but we do an in-place update of the result register here to
2309+
// avoid scavenging another register.
2310+
if (OtherOp.isImm())
2311+
NewOffset += OtherOp.getImm();
2312+
2313+
if (NewOffset == 0 && DeadSCC) {
2314+
MI->eraseFromParent();
2315+
} else if (!MaterializedReg && OtherOp.isImm()) {
2316+
// In a kernel, the address should just be an immediate.
2317+
// SCC should really be dead, but preserve the def just in case it
2318+
// isn't.
2319+
if (DeadSCC)
2320+
MI->removeOperand(3);
2321+
else
2322+
MI->getOperand(3).setIsDef(true);
2323+
2324+
MI->removeOperand(2);
2325+
MI->getOperand(1).ChangeToImmediate(NewOffset);
2326+
MI->setDesc(TII->get(AMDGPU::S_MOV_B32));
2327+
} else {
2328+
if (MaterializedReg)
2329+
OtherOp.ChangeToRegister(MaterializedReg, false);
2330+
FIOp.ChangeToImmediate(NewOffset);
2331+
}
2332+
2333+
return true;
2334+
}
22722335
default: {
22732336
// Other access to frame index
22742337
const DebugLoc &DL = MI->getDebugLoc();

0 commit comments

Comments
 (0)