Skip to content

Commit 70c781f

Browse files
committed
[SIFoldOperands] Move isFoldableCopy into a separate helper, NFC.
There was quite a bit of logic there that was just in the middle of core loop. I think it makes it easier to follow when it's split off in a separate helper like the others. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D137538
1 parent 1c35535 commit 70c781f

File tree

1 file changed

+80
-71
lines changed

1 file changed

+80
-71
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 80 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ class SIFoldOperands : public MachineFunctionPass {
111111
bool tryFoldCndMask(MachineInstr &MI) const;
112112
bool tryFoldZeroHighBits(MachineInstr &MI) const;
113113
bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
114+
bool tryFoldFoldableCopy(MachineInstr &MI,
115+
MachineOperand *&CurrentKnownM0Val) const;
114116

115117
const MachineOperand *isClamp(const MachineInstr &MI) const;
116118
bool tryFoldClamp(MachineInstr &MI);
@@ -1292,6 +1294,73 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
12921294
return true;
12931295
}
12941296

1297+
bool SIFoldOperands::tryFoldFoldableCopy(
1298+
MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
1299+
// Specially track simple redefs of m0 to the same value in a block, so we
1300+
// can erase the later ones.
1301+
if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1302+
MachineOperand &NewM0Val = MI.getOperand(1);
1303+
if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1304+
MI.eraseFromParent();
1305+
return true;
1306+
}
1307+
1308+
// We aren't tracking other physical registers
1309+
CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical())
1310+
? nullptr
1311+
: &NewM0Val;
1312+
return false;
1313+
}
1314+
1315+
MachineOperand &OpToFold = MI.getOperand(1);
1316+
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1317+
1318+
// FIXME: We could also be folding things like TargetIndexes.
1319+
if (!FoldingImm && !OpToFold.isReg())
1320+
return false;
1321+
1322+
if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
1323+
return false;
1324+
1325+
// Prevent folding operands backwards in the function. For example,
1326+
// the COPY opcode must not be replaced by 1 in this example:
1327+
//
1328+
// %3 = COPY %vgpr0; VGPR_32:%3
1329+
// ...
1330+
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1331+
if (!MI.getOperand(0).getReg().isVirtual())
1332+
return false;
1333+
1334+
bool Changed = foldInstOperand(MI, OpToFold);
1335+
1336+
// If we managed to fold all uses of this copy then we might as well
1337+
// delete it now.
1338+
// The only reason we need to follow chains of copies here is that
1339+
// tryFoldRegSequence looks forward through copies before folding a
1340+
// REG_SEQUENCE into its eventual users.
1341+
auto *InstToErase = &MI;
1342+
while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1343+
auto &SrcOp = InstToErase->getOperand(1);
1344+
auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
1345+
InstToErase->eraseFromParent();
1346+
Changed = true;
1347+
InstToErase = nullptr;
1348+
if (!SrcReg || SrcReg.isPhysical())
1349+
break;
1350+
InstToErase = MRI->getVRegDef(SrcReg);
1351+
if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
1352+
break;
1353+
}
1354+
1355+
if (InstToErase && InstToErase->isRegSequence() &&
1356+
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1357+
InstToErase->eraseFromParent();
1358+
Changed = true;
1359+
}
1360+
1361+
return Changed;
1362+
}
1363+
12951364
// Clamp patterns are canonically selected to v_max_* instructions, so only
12961365
// handle them.
12971366
const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
@@ -1746,82 +1815,22 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
17461815
continue;
17471816
}
17481817

1749-
if (!TII->isFoldableCopy(MI)) {
1750-
// Saw an unknown clobber of m0, so we no longer know what it is.
1751-
if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1752-
CurrentKnownM0Val = nullptr;
1753-
1754-
// TODO: Omod might be OK if there is NSZ only on the source
1755-
// instruction, and not the omod multiply.
1756-
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1757-
!tryFoldOMod(MI))
1758-
Changed |= tryFoldClamp(MI);
1759-
1818+
if (TII->isFoldableCopy(MI)) {
1819+
Changed |= tryFoldFoldableCopy(MI, CurrentKnownM0Val);
17601820
continue;
17611821
}
17621822

1763-
// Specially track simple redefs of m0 to the same value in a block, so we
1764-
// can erase the later ones.
1765-
if (MI.getOperand(0).getReg() == AMDGPU::M0) {
1766-
MachineOperand &NewM0Val = MI.getOperand(1);
1767-
if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
1768-
MI.eraseFromParent();
1769-
Changed = true;
1770-
continue;
1771-
}
1772-
1773-
// We aren't tracking other physical registers
1774-
CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
1775-
nullptr : &NewM0Val;
1776-
continue;
1777-
}
1778-
1779-
MachineOperand &OpToFold = MI.getOperand(1);
1780-
bool FoldingImm =
1781-
OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1782-
1783-
// FIXME: We could also be folding things like TargetIndexes.
1784-
if (!FoldingImm && !OpToFold.isReg())
1785-
continue;
1786-
1787-
if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
1788-
continue;
1823+
// Saw an unknown clobber of m0, so we no longer know what it is.
1824+
if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
1825+
CurrentKnownM0Val = nullptr;
17891826

1790-
// Prevent folding operands backwards in the function. For example,
1791-
// the COPY opcode must not be replaced by 1 in this example:
1792-
//
1793-
// %3 = COPY %vgpr0; VGPR_32:%3
1794-
// ...
1795-
// %vgpr0 = V_MOV_B32_e32 1, implicit %exec
1796-
if (!MI.getOperand(0).getReg().isVirtual())
1797-
continue;
1798-
1799-
Changed |= foldInstOperand(MI, OpToFold);
1800-
1801-
// If we managed to fold all uses of this copy then we might as well
1802-
// delete it now.
1803-
// The only reason we need to follow chains of copies here is that
1804-
// tryFoldRegSequence looks forward through copies before folding a
1805-
// REG_SEQUENCE into its eventual users.
1806-
auto *InstToErase = &MI;
1807-
while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1808-
auto &SrcOp = InstToErase->getOperand(1);
1809-
auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
1810-
InstToErase->eraseFromParent();
1811-
Changed = true;
1812-
InstToErase = nullptr;
1813-
if (!SrcReg || SrcReg.isPhysical())
1814-
break;
1815-
InstToErase = MRI->getVRegDef(SrcReg);
1816-
if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
1817-
break;
1818-
}
1819-
if (InstToErase && InstToErase->isRegSequence() &&
1820-
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1821-
InstToErase->eraseFromParent();
1822-
Changed = true;
1823-
}
1827+
// TODO: Omod might be OK if there is NSZ only on the source
1828+
// instruction, and not the omod multiply.
1829+
if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
1830+
!tryFoldOMod(MI))
1831+
Changed |= tryFoldClamp(MI);
18241832
}
18251833
}
1834+
18261835
return Changed;
18271836
}

0 commit comments

Comments
 (0)