8
8
// ===----------------------------------------------------------------------===//
9
9
//
10
10
11
+ #include " SIFoldOperands.h"
11
12
#include " AMDGPU.h"
12
13
#include " GCNSubtarget.h"
13
14
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -66,9 +67,8 @@ struct FoldCandidate {
66
67
bool needsShrink () const { return ShrinkOpcode != -1 ; }
67
68
};
68
69
69
- class SIFoldOperands : public MachineFunctionPass {
70
+ class SIFoldOperandsImpl {
70
71
public:
71
- static char ID;
72
72
MachineRegisterInfo *MRI;
73
73
const SIInstrInfo *TII;
74
74
const SIRegisterInfo *TRI;
@@ -121,11 +121,22 @@ class SIFoldOperands : public MachineFunctionPass {
121
121
bool tryOptimizeAGPRPhis (MachineBasicBlock &MBB);
122
122
123
123
public:
124
- SIFoldOperands () : MachineFunctionPass(ID) {
125
- initializeSIFoldOperandsPass (*PassRegistry::getPassRegistry ());
126
- }
124
+ SIFoldOperandsImpl () = default ;
125
+
126
+ bool run (MachineFunction &MF);
127
+ };
127
128
128
- bool runOnMachineFunction (MachineFunction &MF) override ;
129
+ class SIFoldOperandsLegacy : public MachineFunctionPass {
130
+ public:
131
+ static char ID;
132
+
133
+ SIFoldOperandsLegacy () : MachineFunctionPass(ID) {}
134
+
135
+ bool runOnMachineFunction (MachineFunction &MF) override {
136
+ if (skipFunction (MF.getFunction ()))
137
+ return false ;
138
+ return SIFoldOperandsImpl ().run (MF);
139
+ }
129
140
130
141
StringRef getPassName () const override { return " SI Fold Operands" ; }
131
142
@@ -137,12 +148,12 @@ class SIFoldOperands : public MachineFunctionPass {
137
148
138
149
} // End anonymous namespace.
139
150
140
- INITIALIZE_PASS (SIFoldOperands , DEBUG_TYPE,
141
- " SI Fold Operands " , false , false )
151
+ INITIALIZE_PASS (SIFoldOperandsLegacy , DEBUG_TYPE, " SI Fold Operands " , false ,
152
+ false )
142
153
143
- char SIFoldOperands ::ID = 0;
154
+ char SIFoldOperandsLegacy ::ID = 0;
144
155
145
- char &llvm::SIFoldOperandsID = SIFoldOperands ::ID;
156
+ char &llvm::SIFoldOperandsLegacyID = SIFoldOperandsLegacy ::ID;
146
157
147
158
static const TargetRegisterClass *getRegOpRC (const MachineRegisterInfo &MRI,
148
159
const TargetRegisterInfo &TRI,
@@ -177,8 +188,8 @@ static unsigned macToMad(unsigned Opc) {
177
188
178
189
// TODO: Add heuristic that the frame index might not fit in the addressing mode
179
190
// immediate offset to avoid materializing in loops.
180
- bool SIFoldOperands ::frameIndexMayFold (const MachineInstr &UseMI, int OpNo,
181
- const MachineOperand &OpToFold) const {
191
+ bool SIFoldOperandsImpl ::frameIndexMayFold (
192
+ const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
182
193
if (!OpToFold.isFI ())
183
194
return false ;
184
195
@@ -196,11 +207,11 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
196
207
return OpNo == VIdx && SIdx == -1 ;
197
208
}
198
209
199
- FunctionPass *llvm::createSIFoldOperandsPass () {
200
- return new SIFoldOperands ();
210
+ FunctionPass *llvm::createSIFoldOperandsLegacyPass () {
211
+ return new SIFoldOperandsLegacy ();
201
212
}
202
213
203
- bool SIFoldOperands ::canUseImmWithOpSel (FoldCandidate &Fold) const {
214
+ bool SIFoldOperandsImpl ::canUseImmWithOpSel (FoldCandidate &Fold) const {
204
215
MachineInstr *MI = Fold.UseMI ;
205
216
MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
206
217
const uint64_t TSFlags = MI->getDesc ().TSFlags ;
@@ -230,7 +241,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
230
241
return true ;
231
242
}
232
243
233
- bool SIFoldOperands ::tryFoldImmWithOpSel (FoldCandidate &Fold) const {
244
+ bool SIFoldOperandsImpl ::tryFoldImmWithOpSel (FoldCandidate &Fold) const {
234
245
MachineInstr *MI = Fold.UseMI ;
235
246
MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
236
247
unsigned Opcode = MI->getOpcode ();
@@ -354,7 +365,7 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
354
365
return false ;
355
366
}
356
367
357
- bool SIFoldOperands ::updateOperand (FoldCandidate &Fold) const {
368
+ bool SIFoldOperandsImpl ::updateOperand (FoldCandidate &Fold) const {
358
369
MachineInstr *MI = Fold.UseMI ;
359
370
MachineOperand &Old = MI->getOperand (Fold.UseOpNo );
360
371
assert (Old.isReg ());
@@ -464,9 +475,9 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
464
475
FoldList.emplace_back (MI, OpNo, FoldOp, Commuted, ShrinkOp);
465
476
}
466
477
467
- bool SIFoldOperands ::tryAddToFoldList (SmallVectorImpl<FoldCandidate> &FoldList,
468
- MachineInstr *MI, unsigned OpNo,
469
- MachineOperand *OpToFold) const {
478
+ bool SIFoldOperandsImpl ::tryAddToFoldList (
479
+ SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
480
+ MachineOperand *OpToFold) const {
470
481
const unsigned Opc = MI->getOpcode ();
471
482
472
483
auto tryToFoldAsFMAAKorMK = [&]() {
@@ -645,16 +656,16 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
645
656
return true ;
646
657
}
647
658
648
- bool SIFoldOperands ::isUseSafeToFold (const MachineInstr &MI,
649
- const MachineOperand &UseMO) const {
659
+ bool SIFoldOperandsImpl ::isUseSafeToFold (const MachineInstr &MI,
660
+ const MachineOperand &UseMO) const {
650
661
// Operands of SDWA instructions must be registers.
651
662
return !TII->isSDWA (MI);
652
663
}
653
664
654
665
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
655
666
// for each subreg, tracking it to foldable inline immediate if possible.
656
667
// Returns true on success.
657
- bool SIFoldOperands ::getRegSeqInit (
668
+ bool SIFoldOperandsImpl ::getRegSeqInit (
658
669
SmallVectorImpl<std::pair<MachineOperand *, unsigned >> &Defs,
659
670
Register UseReg, uint8_t OpTy) const {
660
671
MachineInstr *Def = MRI->getVRegDef (UseReg);
@@ -686,7 +697,7 @@ bool SIFoldOperands::getRegSeqInit(
686
697
return true ;
687
698
}
688
699
689
- bool SIFoldOperands ::tryToFoldACImm (
700
+ bool SIFoldOperandsImpl ::tryToFoldACImm (
690
701
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
691
702
SmallVectorImpl<FoldCandidate> &FoldList) const {
692
703
const MCInstrDesc &Desc = UseMI->getDesc ();
@@ -752,12 +763,10 @@ bool SIFoldOperands::tryToFoldACImm(
752
763
return true ;
753
764
}
754
765
755
- void SIFoldOperands::foldOperand (
756
- MachineOperand &OpToFold,
757
- MachineInstr *UseMI,
758
- int UseOpIdx,
759
- SmallVectorImpl<FoldCandidate> &FoldList,
760
- SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
766
+ void SIFoldOperandsImpl::foldOperand (
767
+ MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
768
+ SmallVectorImpl<FoldCandidate> &FoldList,
769
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
761
770
const MachineOperand *UseOp = &UseMI->getOperand (UseOpIdx);
762
771
763
772
if (!isUseSafeToFold (*UseMI, *UseOp))
@@ -1187,7 +1196,7 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1187
1196
}
1188
1197
1189
1198
MachineOperand *
1190
- SIFoldOperands ::getImmOrMaterializedImm (MachineOperand &Op) const {
1199
+ SIFoldOperandsImpl ::getImmOrMaterializedImm (MachineOperand &Op) const {
1191
1200
// If this has a subregister, it obviously is a register source.
1192
1201
if (!Op.isReg () || Op.getSubReg () != AMDGPU::NoSubRegister ||
1193
1202
!Op.getReg ().isVirtual ())
@@ -1206,7 +1215,7 @@ SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
1206
1215
// Try to simplify operations with a constant that may appear after instruction
1207
1216
// selection.
1208
1217
// TODO: See if a frame index with a fixed offset can fold.
1209
- bool SIFoldOperands ::tryConstantFoldOp (MachineInstr *MI) const {
1218
+ bool SIFoldOperandsImpl ::tryConstantFoldOp (MachineInstr *MI) const {
1210
1219
if (!MI->allImplicitDefsAreDead ())
1211
1220
return false ;
1212
1221
@@ -1307,7 +1316,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
1307
1316
}
1308
1317
1309
1318
// Try to fold an instruction into a simpler one
1310
- bool SIFoldOperands ::tryFoldCndMask (MachineInstr &MI) const {
1319
+ bool SIFoldOperandsImpl ::tryFoldCndMask (MachineInstr &MI) const {
1311
1320
unsigned Opc = MI.getOpcode ();
1312
1321
if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1313
1322
Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
@@ -1346,7 +1355,7 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
1346
1355
return true ;
1347
1356
}
1348
1357
1349
- bool SIFoldOperands ::tryFoldZeroHighBits (MachineInstr &MI) const {
1358
+ bool SIFoldOperandsImpl ::tryFoldZeroHighBits (MachineInstr &MI) const {
1350
1359
if (MI.getOpcode () != AMDGPU::V_AND_B32_e64 &&
1351
1360
MI.getOpcode () != AMDGPU::V_AND_B32_e32)
1352
1361
return false ;
@@ -1368,8 +1377,8 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
1368
1377
return true ;
1369
1378
}
1370
1379
1371
- bool SIFoldOperands ::foldInstOperand (MachineInstr &MI,
1372
- MachineOperand &OpToFold) const {
1380
+ bool SIFoldOperandsImpl ::foldInstOperand (MachineInstr &MI,
1381
+ MachineOperand &OpToFold) const {
1373
1382
// We need mutate the operands of new mov instructions to add implicit
1374
1383
// uses of EXEC, but adding them invalidates the use_iterator, so defer
1375
1384
// this.
@@ -1442,7 +1451,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
1442
1451
return true ;
1443
1452
}
1444
1453
1445
- bool SIFoldOperands ::tryFoldFoldableCopy (
1454
+ bool SIFoldOperandsImpl ::tryFoldFoldableCopy (
1446
1455
MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
1447
1456
// Specially track simple redefs of m0 to the same value in a block, so we
1448
1457
// can erase the later ones.
@@ -1519,7 +1528,8 @@ bool SIFoldOperands::tryFoldFoldableCopy(
1519
1528
1520
1529
// Clamp patterns are canonically selected to v_max_* instructions, so only
1521
1530
// handle them.
1522
- const MachineOperand *SIFoldOperands::isClamp (const MachineInstr &MI) const {
1531
+ const MachineOperand *
1532
+ SIFoldOperandsImpl::isClamp (const MachineInstr &MI) const {
1523
1533
unsigned Op = MI.getOpcode ();
1524
1534
switch (Op) {
1525
1535
case AMDGPU::V_MAX_F32_e64:
@@ -1567,7 +1577,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1567
1577
}
1568
1578
1569
1579
// FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1570
- bool SIFoldOperands ::tryFoldClamp (MachineInstr &MI) {
1580
+ bool SIFoldOperandsImpl ::tryFoldClamp (MachineInstr &MI) {
1571
1581
const MachineOperand *ClampSrc = isClamp (MI);
1572
1582
if (!ClampSrc || !MRI->hasOneNonDBGUser (ClampSrc->getReg ()))
1573
1583
return false ;
@@ -1662,7 +1672,7 @@ static int getOModValue(unsigned Opc, int64_t Val) {
1662
1672
// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
1663
1673
// handled, so will anything other than that break?
1664
1674
std::pair<const MachineOperand *, int >
1665
- SIFoldOperands ::isOMod (const MachineInstr &MI) const {
1675
+ SIFoldOperandsImpl ::isOMod (const MachineInstr &MI) const {
1666
1676
unsigned Op = MI.getOpcode ();
1667
1677
switch (Op) {
1668
1678
case AMDGPU::V_MUL_F64_e64:
@@ -1740,7 +1750,7 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
1740
1750
}
1741
1751
1742
1752
// FIXME: Does this need to check IEEE bit on function?
1743
- bool SIFoldOperands ::tryFoldOMod (MachineInstr &MI) {
1753
+ bool SIFoldOperandsImpl ::tryFoldOMod (MachineInstr &MI) {
1744
1754
const MachineOperand *RegOp;
1745
1755
int OMod;
1746
1756
std::tie (RegOp, OMod) = isOMod (MI);
@@ -1779,7 +1789,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1779
1789
1780
1790
// Try to fold a reg_sequence with vgpr output and agpr inputs into an
1781
1791
// instruction which can take an agpr. So far that means a store.
1782
- bool SIFoldOperands ::tryFoldRegSequence (MachineInstr &MI) {
1792
+ bool SIFoldOperandsImpl ::tryFoldRegSequence (MachineInstr &MI) {
1783
1793
assert (MI.isRegSequence ());
1784
1794
auto Reg = MI.getOperand (0 ).getReg ();
1785
1795
@@ -1926,7 +1936,7 @@ static bool isAGPRCopy(const SIRegisterInfo &TRI,
1926
1936
// loop:
1927
1937
// %3:areg = PHI %2:areg, %entry, %X:areg,
1928
1938
// %4:areg = (instr using %3:areg)
1929
- bool SIFoldOperands ::tryFoldPhiAGPR (MachineInstr &PHI) {
1939
+ bool SIFoldOperandsImpl ::tryFoldPhiAGPR (MachineInstr &PHI) {
1930
1940
assert (PHI.isPHI ());
1931
1941
1932
1942
Register PhiOut = PHI.getOperand (0 ).getReg ();
@@ -2030,7 +2040,7 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
2030
2040
}
2031
2041
2032
2042
// Attempt to convert VGPR load to an AGPR load.
2033
- bool SIFoldOperands ::tryFoldLoad (MachineInstr &MI) {
2043
+ bool SIFoldOperandsImpl ::tryFoldLoad (MachineInstr &MI) {
2034
2044
assert (MI.mayLoad ());
2035
2045
if (!ST->hasGFX90AInsts () || MI.getNumExplicitDefs () != 1 )
2036
2046
return false ;
@@ -2117,7 +2127,7 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
2117
2127
// %0:areg = PHI %tmp_agpr, %a, %x, %c
2118
2128
// %1:areg = PHI %tmp_agpr, %a, %y, %c
2119
2129
// %2:areg = PHI %tmp_agpr, %a, %z, %c
2120
- bool SIFoldOperands ::tryOptimizeAGPRPhis (MachineBasicBlock &MBB) {
2130
+ bool SIFoldOperandsImpl ::tryOptimizeAGPRPhis (MachineBasicBlock &MBB) {
2121
2131
// This is only really needed on GFX908 where AGPR-AGPR copies are
2122
2132
// unreasonably difficult.
2123
2133
if (ST->hasGFX90AInsts ())
@@ -2182,10 +2192,7 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
2182
2192
return Changed;
2183
2193
}
2184
2194
2185
- bool SIFoldOperands::runOnMachineFunction (MachineFunction &MF) {
2186
- if (skipFunction (MF.getFunction ()))
2187
- return false ;
2188
-
2195
+ bool SIFoldOperandsImpl::run (MachineFunction &MF) {
2189
2196
MRI = &MF.getRegInfo ();
2190
2197
ST = &MF.getSubtarget <GCNSubtarget>();
2191
2198
TII = ST->getInstrInfo ();
@@ -2246,3 +2253,14 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
2246
2253
2247
2254
return Changed;
2248
2255
}
2256
+
2257
+ PreservedAnalyses SIFoldOperandsPass::run (MachineFunction &MF,
2258
+ MachineFunctionAnalysisManager &) {
2259
+ bool Changed = SIFoldOperandsImpl ().run (MF);
2260
+ if (!Changed) {
2261
+ return PreservedAnalyses::all ();
2262
+ }
2263
+ auto PA = getMachineFunctionPassPreservedAnalyses ();
2264
+ PA.preserveSet <CFGAnalyses>();
2265
+ return PA;
2266
+ }
0 commit comments