@@ -62,17 +62,7 @@ struct FoldCandidate {
62
62
63
63
bool isGlobal () const { return Kind == MachineOperand::MO_GlobalAddress; }
64
64
65
- bool isCommuted () const {
66
- return Commuted;
67
- }
68
-
69
- bool needsShrink () const {
70
- return ShrinkOpcode != -1 ;
71
- }
72
-
73
- int getShrinkOpcode () const {
74
- return ShrinkOpcode;
75
- }
65
+ bool needsShrink () const { return ShrinkOpcode != -1 ; }
76
66
};
77
67
78
68
class SIFoldOperands : public MachineFunctionPass {
@@ -175,19 +165,17 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
175
165
if (!OpToFold.isFI ())
176
166
return false ;
177
167
168
+ const unsigned Opc = UseMI.getOpcode ();
178
169
if (TII->isMUBUF (UseMI))
179
- return OpNo == AMDGPU::getNamedOperandIdx (UseMI.getOpcode (),
180
- AMDGPU::OpName::vaddr);
170
+ return OpNo == AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::vaddr);
181
171
if (!TII->isFLATScratch (UseMI))
182
172
return false ;
183
173
184
- int SIdx = AMDGPU::getNamedOperandIdx (UseMI.getOpcode (),
185
- AMDGPU::OpName::saddr);
174
+ int SIdx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::saddr);
186
175
if (OpNo == SIdx)
187
176
return true ;
188
177
189
- int VIdx = AMDGPU::getNamedOperandIdx (UseMI.getOpcode (),
190
- AMDGPU::OpName::vaddr);
178
+ int VIdx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::vaddr);
191
179
return OpNo == VIdx && SIdx == -1 ;
192
180
}
193
181
@@ -200,11 +188,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
200
188
MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
201
189
assert (Old.isReg ());
202
190
191
+
192
+ const uint64_t TSFlags = MI->getDesc ().TSFlags ;
203
193
if (Fold.isImm ()) {
204
- if (MI->getDesc ().TSFlags & SIInstrFlags::IsPacked &&
205
- !(MI->getDesc ().TSFlags & SIInstrFlags::IsMAI) &&
206
- (!ST->hasDOTOpSelHazard () ||
207
- !(MI->getDesc ().TSFlags & SIInstrFlags::IsDOT)) &&
194
+ if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
195
+ (!ST->hasDOTOpSelHazard () || !(TSFlags & SIInstrFlags::IsDOT)) &&
208
196
AMDGPU::isFoldableLiteralV216 (Fold.ImmToFold ,
209
197
ST->hasInv2PiInlineImm ())) {
210
198
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
@@ -258,7 +246,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
258
246
return false ;
259
247
}
260
248
261
- int Op32 = Fold.getShrinkOpcode () ;
249
+ int Op32 = Fold.ShrinkOpcode ;
262
250
MachineOperand &Dst0 = MI->getOperand (0 );
263
251
MachineOperand &Dst1 = MI->getOperand (1 );
264
252
assert (Dst0.isDef () && Dst1.isDef ());
@@ -287,7 +275,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
287
275
MI->removeOperand (I);
288
276
MI->setDesc (TII->get (AMDGPU::IMPLICIT_DEF));
289
277
290
- if (Fold.isCommuted () )
278
+ if (Fold.Commuted )
291
279
TII->commuteInstruction (*Inst32, false );
292
280
return true ;
293
281
}
@@ -325,11 +313,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
325
313
326
314
static bool isUseMIInFoldList (ArrayRef<FoldCandidate> FoldList,
327
315
const MachineInstr *MI) {
328
- for (auto Candidate : FoldList) {
329
- if (Candidate.UseMI == MI)
330
- return true ;
331
- }
332
- return false ;
316
+ return any_of (FoldList, [&](const auto &C) { return C.UseMI == MI; });
333
317
}
334
318
335
319
static void appendFoldCandidate (SmallVectorImpl<FoldCandidate> &FoldList,
@@ -488,7 +472,6 @@ bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
488
472
}
489
473
490
474
return true ;
491
- // return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
492
475
}
493
476
494
477
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
@@ -608,10 +591,9 @@ void SIFoldOperands::foldOperand(
608
591
return ;
609
592
610
593
// FIXME: Fold operands with subregs.
611
- if (UseOp.isReg () && OpToFold.isReg ()) {
612
- if (UseOp.isImplicit () || UseOp.getSubReg () != AMDGPU::NoSubRegister)
613
- return ;
614
- }
594
+ if (UseOp.isReg () && OpToFold.isReg () &&
595
+ (UseOp.isImplicit () || UseOp.getSubReg () != AMDGPU::NoSubRegister))
596
+ return ;
615
597
616
598
// Special case for REG_SEQUENCE: We can't fold literals into
617
599
// REG_SEQUENCE instructions, so we have to fold them into the
@@ -661,12 +643,11 @@ void SIFoldOperands::foldOperand(
661
643
// safe to fold the addressing mode, even pre-GFX9.
662
644
UseMI->getOperand (UseOpIdx).ChangeToFrameIndex (OpToFold.getIndex ());
663
645
646
+ const unsigned Opc = UseMI->getOpcode ();
664
647
if (TII->isFLATScratch (*UseMI) &&
665
- AMDGPU::getNamedOperandIdx (UseMI->getOpcode (),
666
- AMDGPU::OpName::vaddr) != -1 &&
667
- AMDGPU::getNamedOperandIdx (UseMI->getOpcode (),
668
- AMDGPU::OpName::saddr) == -1 ) {
669
- unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV (UseMI->getOpcode ());
648
+ AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::vaddr) != -1 &&
649
+ AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::saddr) == -1 ) {
650
+ unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV (Opc);
670
651
UseMI->setDesc (TII->get (NewOpc));
671
652
}
672
653
@@ -702,8 +683,10 @@ void SIFoldOperands::foldOperand(
702
683
Use.getParent ()->getOperandNo (&Use),
703
684
&UseMI->getOperand (1 ));
704
685
}
686
+
705
687
for (auto &F : CopyUses) {
706
- foldOperand (*F.OpToFold , F.UseMI , F.UseOpNo , FoldList, CopiesToReplace);
688
+ foldOperand (*F.OpToFold , F.UseMI , F.UseOpNo , FoldList,
689
+ CopiesToReplace);
707
690
}
708
691
}
709
692
@@ -828,15 +811,15 @@ void SIFoldOperands::foldOperand(
828
811
829
812
if (Size != 4 )
830
813
return ;
831
- if (TRI->isAGPR (*MRI, UseMI->getOperand (0 ).getReg ()) &&
832
- TRI->isVGPR (*MRI, UseMI->getOperand (1 ).getReg ()))
814
+
815
+ Register Reg0 = UseMI->getOperand (0 ).getReg ();
816
+ Register Reg1 = UseMI->getOperand (1 ).getReg ();
817
+ if (TRI->isAGPR (*MRI, Reg0) && TRI->isVGPR (*MRI, Reg1))
833
818
UseMI->setDesc (TII->get (AMDGPU::V_ACCVGPR_WRITE_B32_e64));
834
- else if (TRI->isVGPR (*MRI, UseMI->getOperand (0 ).getReg ()) &&
835
- TRI->isAGPR (*MRI, UseMI->getOperand (1 ).getReg ()))
819
+ else if (TRI->isVGPR (*MRI, Reg0) && TRI->isAGPR (*MRI, Reg1))
836
820
UseMI->setDesc (TII->get (AMDGPU::V_ACCVGPR_READ_B32_e64));
837
- else if (ST->hasGFX90AInsts () &&
838
- TRI->isAGPR (*MRI, UseMI->getOperand (0 ).getReg ()) &&
839
- TRI->isAGPR (*MRI, UseMI->getOperand (1 ).getReg ()))
821
+ else if (ST->hasGFX90AInsts () && TRI->isAGPR (*MRI, Reg0) &&
822
+ TRI->isAGPR (*MRI, Reg1))
840
823
UseMI->setDesc (TII->get (AMDGPU::V_ACCVGPR_MOV_B32));
841
824
return ;
842
825
}
@@ -1020,10 +1003,12 @@ static unsigned getMovOpc(bool IsScalar) {
1020
1003
return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1021
1004
}
1022
1005
1023
- // / Remove any leftover implicit operands from mutating the instruction. e.g.
1024
- // / if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1025
- // / anymore.
1026
- static void stripExtraCopyOperands (MachineInstr &MI) {
1006
+ static void mutateCopyOp (MachineInstr &MI, const MCInstrDesc &NewDesc) {
1007
+ MI.setDesc (NewDesc);
1008
+
1009
+ // Remove any leftover implicit operands from mutating the instruction. e.g.
1010
+ // if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1011
+ // anymore.
1027
1012
const MCInstrDesc &Desc = MI.getDesc ();
1028
1013
unsigned NumOps = Desc.getNumOperands () +
1029
1014
Desc.getNumImplicitUses () +
@@ -1033,24 +1018,18 @@ static void stripExtraCopyOperands(MachineInstr &MI) {
1033
1018
MI.removeOperand (I);
1034
1019
}
1035
1020
1036
- static void mutateCopyOp (MachineInstr &MI, const MCInstrDesc &NewDesc) {
1037
- MI.setDesc (NewDesc);
1038
- stripExtraCopyOperands (MI);
1039
- }
1040
-
1041
1021
MachineOperand *
1042
1022
SIFoldOperands::getImmOrMaterializedImm (MachineOperand &Op) const {
1043
- if (Op.isReg ()) {
1044
- // If this has a subregister, it obviously is a register source.
1045
- if (Op.getSubReg () != AMDGPU::NoSubRegister || !Op.getReg ().isVirtual ())
1046
- return &Op;
1047
-
1048
- MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1049
- if (Def && Def->isMoveImmediate ()) {
1050
- MachineOperand &ImmSrc = Def->getOperand (1 );
1051
- if (ImmSrc.isImm ())
1052
- return &ImmSrc;
1053
- }
1023
+ // If this has a subregister, it obviously is a register source.
1024
+ if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1025
+ !Op.getReg ().isVirtual ())
1026
+ return &Op;
1027
+
1028
+ MachineInstr *Def = MRI->getVRegDef (Op.getReg ());
1029
+ if (Def && Def->isMoveImmediate ()) {
1030
+ MachineOperand &ImmSrc = Def->getOperand (1 );
1031
+ if (ImmSrc.isImm ())
1032
+ return &ImmSrc;
1054
1033
}
1055
1034
1056
1035
return &Op;
@@ -1127,9 +1106,8 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
1127
1106
return true ;
1128
1107
}
1129
1108
1130
- if (MI->getOpcode () == AMDGPU::V_AND_B32_e64 ||
1131
- MI->getOpcode () == AMDGPU::V_AND_B32_e32 ||
1132
- MI->getOpcode () == AMDGPU::S_AND_B32) {
1109
+ if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1110
+ Opc == AMDGPU::S_AND_B32) {
1133
1111
if (Src1Val == 0 ) {
1134
1112
// y = and x, 0 => y = v_mov_b32 0
1135
1113
MI->removeOperand (Src0Idx);
@@ -1138,16 +1116,14 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
1138
1116
// y = and x, -1 => y = copy x
1139
1117
MI->removeOperand (Src1Idx);
1140
1118
mutateCopyOp (*MI, TII->get (AMDGPU::COPY));
1141
- stripExtraCopyOperands (*MI);
1142
1119
} else
1143
1120
return false ;
1144
1121
1145
1122
return true ;
1146
1123
}
1147
1124
1148
- if (MI->getOpcode () == AMDGPU::V_XOR_B32_e64 ||
1149
- MI->getOpcode () == AMDGPU::V_XOR_B32_e32 ||
1150
- MI->getOpcode () == AMDGPU::S_XOR_B32) {
1125
+ if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1126
+ Opc == AMDGPU::S_XOR_B32) {
1151
1127
if (Src1Val == 0 ) {
1152
1128
// y = xor x, 0 => y = copy x
1153
1129
MI->removeOperand (Src1Idx);
@@ -1210,14 +1186,13 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
1210
1186
1211
1187
Register Src1 = MI.getOperand (2 ).getReg ();
1212
1188
MachineInstr *SrcDef = MRI->getVRegDef (Src1);
1213
- if (ST->zeroesHigh16BitsOfDest (SrcDef->getOpcode ())) {
1214
- Register Dst = MI.getOperand (0 ).getReg ();
1215
- MRI->replaceRegWith (Dst, SrcDef->getOperand (0 ).getReg ());
1216
- MI.eraseFromParent ();
1217
- return true ;
1218
- }
1189
+ if (!ST->zeroesHigh16BitsOfDest (SrcDef->getOpcode ()))
1190
+ return false ;
1219
1191
1220
- return false ;
1192
+ Register Dst = MI.getOperand (0 ).getReg ();
1193
+ MRI->replaceRegWith (Dst, SrcDef->getOperand (0 ).getReg ());
1194
+ MI.eraseFromParent ();
1195
+ return true ;
1221
1196
}
1222
1197
1223
1198
bool SIFoldOperands::foldInstOperand (MachineInstr &MI,
@@ -1286,7 +1261,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
1286
1261
LLVM_DEBUG (dbgs () << " Folded source from " << MI << " into OpNo "
1287
1262
<< static_cast <int >(Fold.UseOpNo ) << " of "
1288
1263
<< *Fold.UseMI );
1289
- } else if (Fold.isCommuted () ) {
1264
+ } else if (Fold.Commuted ) {
1290
1265
// Restoring instruction's original operand order if fold has failed.
1291
1266
TII->commuteInstruction (*Fold.UseMI , false );
1292
1267
}
@@ -1735,9 +1710,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
1735
1710
1736
1711
SmallVector<const MachineInstr*, 8 > Users;
1737
1712
SmallVector<Register, 8 > MoveRegs;
1738
- for (const MachineInstr &I : MRI->use_nodbg_instructions (DefReg)) {
1713
+ for (const MachineInstr &I : MRI->use_nodbg_instructions (DefReg))
1739
1714
Users.push_back (&I);
1740
- }
1715
+
1741
1716
if (Users.empty ())
1742
1717
return false ;
1743
1718
@@ -1750,9 +1725,8 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
1750
1725
if (TRI->isAGPR (*MRI, DstReg))
1751
1726
continue ;
1752
1727
MoveRegs.push_back (DstReg);
1753
- for (const MachineInstr &U : MRI->use_nodbg_instructions (DstReg)) {
1728
+ for (const MachineInstr &U : MRI->use_nodbg_instructions (DstReg))
1754
1729
Users.push_back (&U);
1755
- }
1756
1730
}
1757
1731
1758
1732
const TargetRegisterClass *RC = MRI->getRegClass (DefReg);
0 commit comments