@@ -2268,7 +2268,70 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
2268
2268
MI->eraseFromParent ();
2269
2269
return true ;
2270
2270
}
2271
+ case AMDGPU::S_ADD_I32: {
2272
+ // TODO: Handle s_or_b32, s_and_b32.
2273
+ MachineOperand &OtherOp = MI->getOperand (FIOperandNum == 1 ? 2 : 1 );
2271
2274
2275
+ assert (FrameReg || MFI->isBottomOfStack ());
2276
+
2277
+ MachineOperand &DstOp = MI->getOperand (0 );
2278
+ const DebugLoc &DL = MI->getDebugLoc ();
2279
+ Register MaterializedReg = FrameReg;
2280
+
2281
+ // Defend against live scc, which should never happen in practice.
2282
+ bool DeadSCC = MI->getOperand (3 ).isDead ();
2283
+
2284
+ // Do an in-place scale of the wave offset to the lane offset.
2285
+ if (FrameReg && !ST.enableFlatScratch ()) {
2286
+ BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_LSHR_B32))
2287
+ .addDef (DstOp.getReg (), RegState::Renamable)
2288
+ .addReg (FrameReg)
2289
+ .addImm (ST.getWavefrontSizeLog2 ())
2290
+ .setOperandDead (3 ); // Set SCC dead
2291
+ MaterializedReg = DstOp.getReg ();
2292
+ }
2293
+
2294
+ // If we can't fold the other operand, do another increment.
2295
+ if (!OtherOp.isImm () && MaterializedReg) {
2296
+ auto AddI32 = BuildMI (*MBB, *MI, DL, TII->get (AMDGPU::S_ADD_I32))
2297
+ .addDef (DstOp.getReg (), RegState::Renamable)
2298
+ .addReg (MaterializedReg)
2299
+ .add (OtherOp);
2300
+ if (DeadSCC)
2301
+ AddI32.setOperandDead (3 );
2302
+ MaterializedReg = DstOp.getReg ();
2303
+ }
2304
+
2305
+ int64_t NewOffset = FrameInfo.getObjectOffset (Index);
2306
+
2307
+ // For the non-immediate case, we could fall through to the default
2308
+ // handling, but we do an in-place update of the result register here to
2309
+ // avoid scavenging another register.
2310
+ if (OtherOp.isImm ())
2311
+ NewOffset += OtherOp.getImm ();
2312
+
2313
+ if (NewOffset == 0 && DeadSCC) {
2314
+ MI->eraseFromParent ();
2315
+ } else if (!MaterializedReg && OtherOp.isImm ()) {
2316
+ // In a kernel, the address should just be an immediate.
2317
+ // SCC should really be dead, but preserve the def just in case it
2318
+ // isn't.
2319
+ if (DeadSCC)
2320
+ MI->removeOperand (3 );
2321
+ else
2322
+ MI->getOperand (3 ).setIsDef (true );
2323
+
2324
+ MI->removeOperand (2 );
2325
+ MI->getOperand (1 ).ChangeToImmediate (NewOffset);
2326
+ MI->setDesc (TII->get (AMDGPU::S_MOV_B32));
2327
+ } else {
2328
+ if (MaterializedReg)
2329
+ OtherOp.ChangeToRegister (MaterializedReg, false );
2330
+ FIOp.ChangeToImmediate (NewOffset);
2331
+ }
2332
+
2333
+ return true ;
2334
+ }
2272
2335
default : {
2273
2336
// Other access to frame index
2274
2337
const DebugLoc &DL = MI->getDebugLoc ();
0 commit comments