@@ -78,6 +78,12 @@ class SIFoldOperandsImpl {
78
78
bool frameIndexMayFold (const MachineInstr &UseMI, int OpNo,
79
79
const MachineOperand &OpToFold) const ;
80
80
81
+ // / Fold %vgpr = COPY (S_ADD_I32 x, frameindex)
82
+ // /
83
+ // / => %vgpr = V_ADD_U32 x, frameindex
84
+ bool foldCopyToVGPROfScalarAddOfFrameIndex (Register DstReg, Register SrcReg,
85
+ MachineInstr &MI) const ;
86
+
81
87
bool updateOperand (FoldCandidate &Fold) const ;
82
88
83
89
bool canUseImmWithOpSel (FoldCandidate &Fold) const ;
@@ -224,6 +230,67 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
224
230
return OpNo == VIdx && SIdx == -1 ;
225
231
}
226
232
233
+ // / Fold %vgpr = COPY (S_ADD_I32 x, frameindex)
234
+ // /
235
+ // / => %vgpr = V_ADD_U32 x, frameindex
236
+ bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex (
237
+ Register DstReg, Register SrcReg, MachineInstr &MI) const {
238
+ if (TRI->isVGPR (*MRI, DstReg) && TRI->isSGPRReg (*MRI, SrcReg) &&
239
+ MRI->hasOneNonDBGUse (SrcReg)) {
240
+ MachineInstr *Def = MRI->getVRegDef (SrcReg);
241
+ if (Def && Def->getOpcode () == AMDGPU::S_ADD_I32 &&
242
+ Def->getOperand (3 ).isDead ()) {
243
+ MachineOperand *Src0 = &Def->getOperand (1 );
244
+ MachineOperand *Src1 = &Def->getOperand (2 );
245
+
246
+ // TODO: This is profitable with more operand types, and for more
247
+ // opcodes. But ultimately this is working around poor / nonexistent
248
+ // regbankselect.
249
+ if (!Src0->isFI () && !Src1->isFI ())
250
+ return false ;
251
+
252
+ if (Src0->isFI ())
253
+ std::swap (Src0, Src1);
254
+
255
+ MachineBasicBlock *MBB = Def->getParent ();
256
+ const DebugLoc &DL = Def->getDebugLoc ();
257
+ if (ST->hasAddNoCarry ()) {
258
+ bool UseVOP3 = !Src0->isImm () || TII->isInlineConstant (*Src0);
259
+ MachineInstrBuilder Add =
260
+ BuildMI (*MBB, *Def, DL,
261
+ TII->get (UseVOP3 ? AMDGPU::V_ADD_U32_e64
262
+ : AMDGPU::V_ADD_U32_e32),
263
+ DstReg)
264
+ .add (*Src0)
265
+ .add (*Src1)
266
+ .setMIFlags (Def->getFlags ());
267
+ if (UseVOP3)
268
+ Add.addImm (0 );
269
+
270
+ Def->eraseFromParent ();
271
+ MI.eraseFromParent ();
272
+ return true ;
273
+ }
274
+
275
+ MachineBasicBlock::LivenessQueryResult Liveness =
276
+ MBB->computeRegisterLiveness (TRI, AMDGPU::VCC, *Def, 16 );
277
+ if (Liveness == MachineBasicBlock::LQR_Dead) {
278
+ // TODO: If src1 satisfies operand constraints, use vop3 version.
279
+ BuildMI (*MBB, *Def, DL, TII->get (AMDGPU::V_ADD_CO_U32_e32), DstReg)
280
+ .add (*Src0)
281
+ .add (*Src1)
282
+ .setOperandDead (3 ) // implicit-def $vcc
283
+ .setMIFlags (Def->getFlags ());
284
+ Def->eraseFromParent ();
285
+ MI.eraseFromParent ();
286
+ return true ;
287
+ }
288
+ }
289
+ }
290
+
291
+ return false ;
292
+ }
293
+
227
294
FunctionPass *llvm::createSIFoldOperandsLegacyPass () {
228
295
return new SIFoldOperandsLegacy ();
229
296
}
@@ -1470,9 +1537,10 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
1470
1537
1471
1538
bool SIFoldOperandsImpl::tryFoldFoldableCopy (
1472
1539
MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
1540
+ Register DstReg = MI.getOperand (0 ).getReg ();
1473
1541
// Specially track simple redefs of m0 to the same value in a block, so we
1474
1542
// can erase the later ones.
1475
- if (MI. getOperand ( 0 ). getReg () == AMDGPU::M0) {
1543
+ if (DstReg == AMDGPU::M0) {
1476
1544
MachineOperand &NewM0Val = MI.getOperand (1 );
1477
1545
if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo (NewM0Val)) {
1478
1546
MI.eraseFromParent ();
@@ -1504,13 +1572,17 @@ bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1504
1572
if (OpToFold.isReg () && !OpToFold.getReg ().isVirtual ())
1505
1573
return false ;
1506
1574
1575
+ if (OpToFold.isReg () &&
1576
+ foldCopyToVGPROfScalarAddOfFrameIndex (DstReg, OpToFold.getReg (), MI))
1577
+ return true ;
1578
+
1507
1579
// Prevent folding operands backwards in the function. For example,
1508
1580
// the COPY opcode must not be replaced by 1 in this example:
1509
1581
//
1510
1582
// %3 = COPY %vgpr0; VGPR_32:%3
1511
1583
// ...
1512
1584
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1513
- if (!MI. getOperand ( 0 ). getReg () .isVirtual ())
1585
+ if (!DstReg .isVirtual ())
1514
1586
return false ;
1515
1587
1516
1588
bool Changed = foldInstOperand (MI, OpToFold);
0 commit comments