Skip to content

Commit 2adc94c

Browse files
authored
AMDGPU/NewPM: Port SIFoldOperands to new pass manager (#105801)
1 parent 62c5de3 commit 2adc94c

File tree

7 files changed

+100
-55
lines changed

7 files changed

+100
-55
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ void initializeAMDGPURegBankSelectPass(PassRegistry &);
3535
// SI Passes
3636
FunctionPass *createGCNDPPCombinePass();
3737
FunctionPass *createSIAnnotateControlFlowLegacyPass();
38-
FunctionPass *createSIFoldOperandsPass();
38+
FunctionPass *createSIFoldOperandsLegacyPass();
3939
FunctionPass *createSIPeepholeSDWAPass();
4040
FunctionPass *createSILowerI1CopiesLegacyPass();
4141
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
@@ -160,8 +160,8 @@ extern char &AMDGPURewriteOutArgumentsID;
160160
void initializeGCNDPPCombinePass(PassRegistry &);
161161
extern char &GCNDPPCombineID;
162162

163-
void initializeSIFoldOperandsPass(PassRegistry &);
164-
extern char &SIFoldOperandsID;
163+
void initializeSIFoldOperandsLegacyPass(PassRegistry &);
164+
extern char &SIFoldOperandsLegacyID;
165165

166166
void initializeSIPeepholeSDWAPass(PassRegistry &);
167167
extern char &SIPeepholeSDWAID;

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,5 @@ FUNCTION_PASS_WITH_PARAMS(
9797
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
9898
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
9999
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
100+
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
100101
#undef MACHINE_FUNCTION_PASS

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "R600.h"
3535
#include "R600TargetMachine.h"
3636
#include "SIFixSGPRCopies.h"
37+
#include "SIFoldOperands.h"
3738
#include "SIMachineFunctionInfo.h"
3839
#include "SIMachineScheduler.h"
3940
#include "TargetInfo/AMDGPUTargetInfo.h"
@@ -410,7 +411,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
410411
initializeSILowerSGPRSpillsPass(*PR);
411412
initializeSIFixSGPRCopiesLegacyPass(*PR);
412413
initializeSIFixVGPRCopiesPass(*PR);
413-
initializeSIFoldOperandsPass(*PR);
414+
initializeSIFoldOperandsLegacyPass(*PR);
414415
initializeSIPeepholeSDWAPass(*PR);
415416
initializeSIShrinkInstructionsPass(*PR);
416417
initializeSIOptimizeExecMaskingPreRAPass(*PR);
@@ -1270,15 +1271,15 @@ void GCNPassConfig::addMachineSSAOptimization() {
12701271
// instructions leftover after the operands are folded as well.
12711272
//
12721273
// XXX - Can we get away without running DeadMachineInstructionElim again?
1273-
addPass(&SIFoldOperandsID);
1274+
addPass(&SIFoldOperandsLegacyID);
12741275
if (EnableDPPCombine)
12751276
addPass(&GCNDPPCombineID);
12761277
addPass(&SILoadStoreOptimizerID);
12771278
if (isPassEnabled(EnableSDWAPeephole)) {
12781279
addPass(&SIPeepholeSDWAID);
12791280
addPass(&EarlyMachineLICMID);
12801281
addPass(&MachineCSEID);
1281-
addPass(&SIFoldOperandsID);
1282+
addPass(&SIFoldOperandsLegacyID);
12821283
}
12831284
addPass(&DeadMachineInstructionElimID);
12841285
addPass(createSIShrinkInstructionsPass());

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 67 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//===----------------------------------------------------------------------===//
99
//
1010

11+
#include "SIFoldOperands.h"
1112
#include "AMDGPU.h"
1213
#include "GCNSubtarget.h"
1314
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -66,9 +67,8 @@ struct FoldCandidate {
6667
bool needsShrink() const { return ShrinkOpcode != -1; }
6768
};
6869

69-
class SIFoldOperands : public MachineFunctionPass {
70+
class SIFoldOperandsImpl {
7071
public:
71-
static char ID;
7272
MachineRegisterInfo *MRI;
7373
const SIInstrInfo *TII;
7474
const SIRegisterInfo *TRI;
@@ -121,11 +121,22 @@ class SIFoldOperands : public MachineFunctionPass {
121121
bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);
122122

123123
public:
124-
SIFoldOperands() : MachineFunctionPass(ID) {
125-
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
126-
}
124+
SIFoldOperandsImpl() = default;
125+
126+
bool run(MachineFunction &MF);
127+
};
127128

128-
bool runOnMachineFunction(MachineFunction &MF) override;
129+
class SIFoldOperandsLegacy : public MachineFunctionPass {
130+
public:
131+
static char ID;
132+
133+
SIFoldOperandsLegacy() : MachineFunctionPass(ID) {}
134+
135+
bool runOnMachineFunction(MachineFunction &MF) override {
136+
if (skipFunction(MF.getFunction()))
137+
return false;
138+
return SIFoldOperandsImpl().run(MF);
139+
}
129140

130141
StringRef getPassName() const override { return "SI Fold Operands"; }
131142

@@ -137,12 +148,12 @@ class SIFoldOperands : public MachineFunctionPass {
137148

138149
} // End anonymous namespace.
139150

140-
INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE,
141-
"SI Fold Operands", false, false)
151+
INITIALIZE_PASS(SIFoldOperandsLegacy, DEBUG_TYPE, "SI Fold Operands", false,
152+
false)
142153

143-
char SIFoldOperands::ID = 0;
154+
char SIFoldOperandsLegacy::ID = 0;
144155

145-
char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
156+
char &llvm::SIFoldOperandsLegacyID = SIFoldOperandsLegacy::ID;
146157

147158
static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
148159
const TargetRegisterInfo &TRI,
@@ -177,8 +188,8 @@ static unsigned macToMad(unsigned Opc) {
177188

178189
// TODO: Add heuristic that the frame index might not fit in the addressing mode
179190
// immediate offset to avoid materializing in loops.
180-
bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
181-
const MachineOperand &OpToFold) const {
191+
bool SIFoldOperandsImpl::frameIndexMayFold(
192+
const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
182193
if (!OpToFold.isFI())
183194
return false;
184195

@@ -196,11 +207,11 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
196207
return OpNo == VIdx && SIdx == -1;
197208
}
198209

199-
FunctionPass *llvm::createSIFoldOperandsPass() {
200-
return new SIFoldOperands();
210+
FunctionPass *llvm::createSIFoldOperandsLegacyPass() {
211+
return new SIFoldOperandsLegacy();
201212
}
202213

203-
bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
214+
bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
204215
MachineInstr *MI = Fold.UseMI;
205216
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
206217
const uint64_t TSFlags = MI->getDesc().TSFlags;
@@ -230,7 +241,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
230241
return true;
231242
}
232243

233-
bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
244+
bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
234245
MachineInstr *MI = Fold.UseMI;
235246
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
236247
unsigned Opcode = MI->getOpcode();
@@ -354,7 +365,7 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
354365
return false;
355366
}
356367

357-
bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
368+
bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
358369
MachineInstr *MI = Fold.UseMI;
359370
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
360371
assert(Old.isReg());
@@ -464,9 +475,9 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
464475
FoldList.emplace_back(MI, OpNo, FoldOp, Commuted, ShrinkOp);
465476
}
466477

467-
bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
468-
MachineInstr *MI, unsigned OpNo,
469-
MachineOperand *OpToFold) const {
478+
bool SIFoldOperandsImpl::tryAddToFoldList(
479+
SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
480+
MachineOperand *OpToFold) const {
470481
const unsigned Opc = MI->getOpcode();
471482

472483
auto tryToFoldAsFMAAKorMK = [&]() {
@@ -645,16 +656,16 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
645656
return true;
646657
}
647658

648-
bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
649-
const MachineOperand &UseMO) const {
659+
bool SIFoldOperandsImpl::isUseSafeToFold(const MachineInstr &MI,
660+
const MachineOperand &UseMO) const {
650661
// Operands of SDWA instructions must be registers.
651662
return !TII->isSDWA(MI);
652663
}
653664

654665
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
655666
// for each subreg, tracking it to foldable inline immediate if possible.
656667
// Returns true on success.
657-
bool SIFoldOperands::getRegSeqInit(
668+
bool SIFoldOperandsImpl::getRegSeqInit(
658669
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
659670
Register UseReg, uint8_t OpTy) const {
660671
MachineInstr *Def = MRI->getVRegDef(UseReg);
@@ -686,7 +697,7 @@ bool SIFoldOperands::getRegSeqInit(
686697
return true;
687698
}
688699

689-
bool SIFoldOperands::tryToFoldACImm(
700+
bool SIFoldOperandsImpl::tryToFoldACImm(
690701
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
691702
SmallVectorImpl<FoldCandidate> &FoldList) const {
692703
const MCInstrDesc &Desc = UseMI->getDesc();
@@ -752,12 +763,10 @@ bool SIFoldOperands::tryToFoldACImm(
752763
return true;
753764
}
754765

755-
void SIFoldOperands::foldOperand(
756-
MachineOperand &OpToFold,
757-
MachineInstr *UseMI,
758-
int UseOpIdx,
759-
SmallVectorImpl<FoldCandidate> &FoldList,
760-
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
766+
void SIFoldOperandsImpl::foldOperand(
767+
MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
768+
SmallVectorImpl<FoldCandidate> &FoldList,
769+
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
761770
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);
762771

763772
if (!isUseSafeToFold(*UseMI, *UseOp))
@@ -1187,7 +1196,7 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
11871196
}
11881197

11891198
MachineOperand *
1190-
SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
1199+
SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const {
11911200
// If this has a subregister, it obviously is a register source.
11921201
if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
11931202
!Op.getReg().isVirtual())
@@ -1206,7 +1215,7 @@ SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
12061215
// Try to simplify operations with a constant that may appear after instruction
12071216
// selection.
12081217
// TODO: See if a frame index with a fixed offset can fold.
1209-
bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
1218+
bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const {
12101219
if (!MI->allImplicitDefsAreDead())
12111220
return false;
12121221

@@ -1307,7 +1316,7 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
13071316
}
13081317

13091318
// Try to fold an instruction into a simpler one
1310-
bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
1319+
bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
13111320
unsigned Opc = MI.getOpcode();
13121321
if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
13131322
Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
@@ -1346,7 +1355,7 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
13461355
return true;
13471356
}
13481357

1349-
bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
1358+
bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
13501359
if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
13511360
MI.getOpcode() != AMDGPU::V_AND_B32_e32)
13521361
return false;
@@ -1368,8 +1377,8 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
13681377
return true;
13691378
}
13701379

1371-
bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
1372-
MachineOperand &OpToFold) const {
1380+
bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI,
1381+
MachineOperand &OpToFold) const {
13731382
// We need mutate the operands of new mov instructions to add implicit
13741383
// uses of EXEC, but adding them invalidates the use_iterator, so defer
13751384
// this.
@@ -1442,7 +1451,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
14421451
return true;
14431452
}
14441453

1445-
bool SIFoldOperands::tryFoldFoldableCopy(
1454+
bool SIFoldOperandsImpl::tryFoldFoldableCopy(
14461455
MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
14471456
// Specially track simple redefs of m0 to the same value in a block, so we
14481457
// can erase the later ones.
@@ -1519,7 +1528,8 @@ bool SIFoldOperands::tryFoldFoldableCopy(
15191528

15201529
// Clamp patterns are canonically selected to v_max_* instructions, so only
15211530
// handle them.
1522-
const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
1531+
const MachineOperand *
1532+
SIFoldOperandsImpl::isClamp(const MachineInstr &MI) const {
15231533
unsigned Op = MI.getOpcode();
15241534
switch (Op) {
15251535
case AMDGPU::V_MAX_F32_e64:
@@ -1567,7 +1577,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
15671577
}
15681578

15691579
// FIXME: Clamp for v_mad_mixhi_f16 handled during isel.
1570-
bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
1580+
bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
15711581
const MachineOperand *ClampSrc = isClamp(MI);
15721582
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
15731583
return false;
@@ -1662,7 +1672,7 @@ static int getOModValue(unsigned Opc, int64_t Val) {
16621672
// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
16631673
// handled, so will anything other than that break?
16641674
std::pair<const MachineOperand *, int>
1665-
SIFoldOperands::isOMod(const MachineInstr &MI) const {
1675+
SIFoldOperandsImpl::isOMod(const MachineInstr &MI) const {
16661676
unsigned Op = MI.getOpcode();
16671677
switch (Op) {
16681678
case AMDGPU::V_MUL_F64_e64:
@@ -1740,7 +1750,7 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
17401750
}
17411751

17421752
// FIXME: Does this need to check IEEE bit on function?
1743-
bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
1753+
bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
17441754
const MachineOperand *RegOp;
17451755
int OMod;
17461756
std::tie(RegOp, OMod) = isOMod(MI);
@@ -1779,7 +1789,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
17791789

17801790
// Try to fold a reg_sequence with vgpr output and agpr inputs into an
17811791
// instruction which can take an agpr. So far that means a store.
1782-
bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
1792+
bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) {
17831793
assert(MI.isRegSequence());
17841794
auto Reg = MI.getOperand(0).getReg();
17851795

@@ -1926,7 +1936,7 @@ static bool isAGPRCopy(const SIRegisterInfo &TRI,
19261936
// loop:
19271937
// %3:areg = PHI %2:areg, %entry, %X:areg,
19281938
// %4:areg = (instr using %3:areg)
1929-
bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
1939+
bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &PHI) {
19301940
assert(PHI.isPHI());
19311941

19321942
Register PhiOut = PHI.getOperand(0).getReg();
@@ -2030,7 +2040,7 @@ bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
20302040
}
20312041

20322042
// Attempt to convert VGPR load to an AGPR load.
2033-
bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
2043+
bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &MI) {
20342044
assert(MI.mayLoad());
20352045
if (!ST->hasGFX90AInsts() || MI.getNumExplicitDefs() != 1)
20362046
return false;
@@ -2117,7 +2127,7 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
21172127
// %0:areg = PHI %tmp_agpr, %a, %x, %c
21182128
// %1:areg = PHI %tmp_agpr, %a, %y, %c
21192129
// %2:areg = PHI %tmp_agpr, %a, %z, %c
2120-
bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
2130+
bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
21212131
// This is only really needed on GFX908 where AGPR-AGPR copies are
21222132
// unreasonably difficult.
21232133
if (ST->hasGFX90AInsts())
@@ -2182,10 +2192,7 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
21822192
return Changed;
21832193
}
21842194

2185-
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
2186-
if (skipFunction(MF.getFunction()))
2187-
return false;
2188-
2195+
bool SIFoldOperandsImpl::run(MachineFunction &MF) {
21892196
MRI = &MF.getRegInfo();
21902197
ST = &MF.getSubtarget<GCNSubtarget>();
21912198
TII = ST->getInstrInfo();
@@ -2246,3 +2253,14 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
22462253

22472254
return Changed;
22482255
}
2256+
2257+
PreservedAnalyses SIFoldOperandsPass::run(MachineFunction &MF,
2258+
MachineFunctionAnalysisManager &) {
2259+
bool Changed = SIFoldOperandsImpl().run(MF);
2260+
if (!Changed) {
2261+
return PreservedAnalyses::all();
2262+
}
2263+
auto PA = getMachineFunctionPassPreservedAnalyses();
2264+
PA.preserveSet<CFGAnalyses>();
2265+
return PA;
2266+
}

0 commit comments

Comments
 (0)