Skip to content

Commit b5f9972

Browse files
committed
[SIFoldOperands] Small code cleanups, NFC.
I've been trying to understand the backend better and decided to read the code of this pass. While doing so, I noticed parts that could be refactored to be a tiny bit clearer. I tried to keep the changes minimal, a non-exhaustive list of changes is: - Stylistic changes to better fit LLVM's coding style - Removing dead/useless functions (e.g. FoldCandidate had getters, but it's a public struct!) - Saving regs/opcodes in variables if they're going to be used multiple times in the same condition Reviewed By: arsenm, foad Differential Revision: https://reviews.llvm.org/D137539
1 parent 13f8336 commit b5f9972

File tree

1 file changed

+59
-85
lines changed

1 file changed

+59
-85
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 59 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,7 @@ struct FoldCandidate {
6262

6363
bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
6464

65-
bool isCommuted() const {
66-
return Commuted;
67-
}
68-
69-
bool needsShrink() const {
70-
return ShrinkOpcode != -1;
71-
}
72-
73-
int getShrinkOpcode() const {
74-
return ShrinkOpcode;
75-
}
65+
bool needsShrink() const { return ShrinkOpcode != -1; }
7666
};
7767

7868
class SIFoldOperands : public MachineFunctionPass {
@@ -175,19 +165,17 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
175165
if (!OpToFold.isFI())
176166
return false;
177167

168+
const unsigned Opc = UseMI.getOpcode();
178169
if (TII->isMUBUF(UseMI))
179-
return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
180-
AMDGPU::OpName::vaddr);
170+
return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
181171
if (!TII->isFLATScratch(UseMI))
182172
return false;
183173

184-
int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
185-
AMDGPU::OpName::saddr);
174+
int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
186175
if (OpNo == SIdx)
187176
return true;
188177

189-
int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(),
190-
AMDGPU::OpName::vaddr);
178+
int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
191179
return OpNo == VIdx && SIdx == -1;
192180
}
193181

@@ -200,11 +188,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
200188
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
201189
assert(Old.isReg());
202190

191+
192+
const uint64_t TSFlags = MI->getDesc().TSFlags;
203193
if (Fold.isImm()) {
204-
if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
205-
!(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
206-
(!ST->hasDOTOpSelHazard() ||
207-
!(MI->getDesc().TSFlags & SIInstrFlags::IsDOT)) &&
194+
if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) &&
195+
(!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) &&
208196
AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
209197
ST->hasInv2PiInlineImm())) {
210198
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
@@ -258,7 +246,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
258246
return false;
259247
}
260248

261-
int Op32 = Fold.getShrinkOpcode();
249+
int Op32 = Fold.ShrinkOpcode;
262250
MachineOperand &Dst0 = MI->getOperand(0);
263251
MachineOperand &Dst1 = MI->getOperand(1);
264252
assert(Dst0.isDef() && Dst1.isDef());
@@ -287,7 +275,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
287275
MI->removeOperand(I);
288276
MI->setDesc(TII->get(AMDGPU::IMPLICIT_DEF));
289277

290-
if (Fold.isCommuted())
278+
if (Fold.Commuted)
291279
TII->commuteInstruction(*Inst32, false);
292280
return true;
293281
}
@@ -325,11 +313,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
325313

326314
static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
327315
const MachineInstr *MI) {
328-
for (auto Candidate : FoldList) {
329-
if (Candidate.UseMI == MI)
330-
return true;
331-
}
332-
return false;
316+
return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
333317
}
334318

335319
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
@@ -488,7 +472,6 @@ bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI,
488472
}
489473

490474
return true;
491-
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
492475
}
493476

494477
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
@@ -608,10 +591,9 @@ void SIFoldOperands::foldOperand(
608591
return;
609592

610593
// FIXME: Fold operands with subregs.
611-
if (UseOp.isReg() && OpToFold.isReg()) {
612-
if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
613-
return;
614-
}
594+
if (UseOp.isReg() && OpToFold.isReg() &&
595+
(UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister))
596+
return;
615597

616598
// Special case for REG_SEQUENCE: We can't fold literals into
617599
// REG_SEQUENCE instructions, so we have to fold them into the
@@ -661,12 +643,11 @@ void SIFoldOperands::foldOperand(
661643
// safe to fold the addressing mode, even pre-GFX9.
662644
UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
663645

646+
const unsigned Opc = UseMI->getOpcode();
664647
if (TII->isFLATScratch(*UseMI) &&
665-
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
666-
AMDGPU::OpName::vaddr) != -1 &&
667-
AMDGPU::getNamedOperandIdx(UseMI->getOpcode(),
668-
AMDGPU::OpName::saddr) == -1) {
669-
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode());
648+
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1 &&
649+
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr) == -1) {
650+
unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc);
670651
UseMI->setDesc(TII->get(NewOpc));
671652
}
672653

@@ -702,8 +683,10 @@ void SIFoldOperands::foldOperand(
702683
Use.getParent()->getOperandNo(&Use),
703684
&UseMI->getOperand(1));
704685
}
686+
705687
for (auto &F : CopyUses) {
706-
foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
688+
foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList,
689+
CopiesToReplace);
707690
}
708691
}
709692

@@ -828,15 +811,15 @@ void SIFoldOperands::foldOperand(
828811

829812
if (Size != 4)
830813
return;
831-
if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
832-
TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
814+
815+
Register Reg0 = UseMI->getOperand(0).getReg();
816+
Register Reg1 = UseMI->getOperand(1).getReg();
817+
if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1))
833818
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
834-
else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
835-
TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
819+
else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1))
836820
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
837-
else if (ST->hasGFX90AInsts() &&
838-
TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
839-
TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
821+
else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) &&
822+
TRI->isAGPR(*MRI, Reg1))
840823
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
841824
return;
842825
}
@@ -1020,10 +1003,12 @@ static unsigned getMovOpc(bool IsScalar) {
10201003
return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
10211004
}
10221005

1023-
/// Remove any leftover implicit operands from mutating the instruction. e.g.
1024-
/// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1025-
/// anymore.
1026-
static void stripExtraCopyOperands(MachineInstr &MI) {
1006+
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1007+
MI.setDesc(NewDesc);
1008+
1009+
// Remove any leftover implicit operands from mutating the instruction. e.g.
1010+
// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
1011+
// anymore.
10271012
const MCInstrDesc &Desc = MI.getDesc();
10281013
unsigned NumOps = Desc.getNumOperands() +
10291014
Desc.getNumImplicitUses() +
@@ -1033,24 +1018,18 @@ static void stripExtraCopyOperands(MachineInstr &MI) {
10331018
MI.removeOperand(I);
10341019
}
10351020

1036-
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
1037-
MI.setDesc(NewDesc);
1038-
stripExtraCopyOperands(MI);
1039-
}
1040-
10411021
MachineOperand *
10421022
SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const {
1043-
if (Op.isReg()) {
1044-
// If this has a subregister, it obviously is a register source.
1045-
if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual())
1046-
return &Op;
1047-
1048-
MachineInstr *Def = MRI->getVRegDef(Op.getReg());
1049-
if (Def && Def->isMoveImmediate()) {
1050-
MachineOperand &ImmSrc = Def->getOperand(1);
1051-
if (ImmSrc.isImm())
1052-
return &ImmSrc;
1053-
}
1023+
// If this has a subregister, it obviously is a register source.
1024+
if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister ||
1025+
!Op.getReg().isVirtual())
1026+
return &Op;
1027+
1028+
MachineInstr *Def = MRI->getVRegDef(Op.getReg());
1029+
if (Def && Def->isMoveImmediate()) {
1030+
MachineOperand &ImmSrc = Def->getOperand(1);
1031+
if (ImmSrc.isImm())
1032+
return &ImmSrc;
10541033
}
10551034

10561035
return &Op;
@@ -1127,9 +1106,8 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
11271106
return true;
11281107
}
11291108

1130-
if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
1131-
MI->getOpcode() == AMDGPU::V_AND_B32_e32 ||
1132-
MI->getOpcode() == AMDGPU::S_AND_B32) {
1109+
if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1110+
Opc == AMDGPU::S_AND_B32) {
11331111
if (Src1Val == 0) {
11341112
// y = and x, 0 => y = v_mov_b32 0
11351113
MI->removeOperand(Src0Idx);
@@ -1138,16 +1116,14 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const {
11381116
// y = and x, -1 => y = copy x
11391117
MI->removeOperand(Src1Idx);
11401118
mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
1141-
stripExtraCopyOperands(*MI);
11421119
} else
11431120
return false;
11441121

11451122
return true;
11461123
}
11471124

1148-
if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
1149-
MI->getOpcode() == AMDGPU::V_XOR_B32_e32 ||
1150-
MI->getOpcode() == AMDGPU::S_XOR_B32) {
1125+
if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1126+
Opc == AMDGPU::S_XOR_B32) {
11511127
if (Src1Val == 0) {
11521128
// y = xor x, 0 => y = copy x
11531129
MI->removeOperand(Src1Idx);
@@ -1210,14 +1186,13 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
12101186

12111187
Register Src1 = MI.getOperand(2).getReg();
12121188
MachineInstr *SrcDef = MRI->getVRegDef(Src1);
1213-
if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) {
1214-
Register Dst = MI.getOperand(0).getReg();
1215-
MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
1216-
MI.eraseFromParent();
1217-
return true;
1218-
}
1189+
if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode()))
1190+
return false;
12191191

1220-
return false;
1192+
Register Dst = MI.getOperand(0).getReg();
1193+
MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
1194+
MI.eraseFromParent();
1195+
return true;
12211196
}
12221197

12231198
bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
@@ -1286,7 +1261,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
12861261
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
12871262
<< static_cast<int>(Fold.UseOpNo) << " of "
12881263
<< *Fold.UseMI);
1289-
} else if (Fold.isCommuted()) {
1264+
} else if (Fold.Commuted) {
12901265
// Restoring instruction's original operand order if fold has failed.
12911266
TII->commuteInstruction(*Fold.UseMI, false);
12921267
}
@@ -1735,9 +1710,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
17351710

17361711
SmallVector<const MachineInstr*, 8> Users;
17371712
SmallVector<Register, 8> MoveRegs;
1738-
for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) {
1713+
for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg))
17391714
Users.push_back(&I);
1740-
}
1715+
17411716
if (Users.empty())
17421717
return false;
17431718

@@ -1750,9 +1725,8 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
17501725
if (TRI->isAGPR(*MRI, DstReg))
17511726
continue;
17521727
MoveRegs.push_back(DstReg);
1753-
for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) {
1728+
for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg))
17541729
Users.push_back(&U);
1755-
}
17561730
}
17571731

17581732
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);

0 commit comments

Comments
 (0)