45
45
#include " llvm/CodeGen/TargetInstrInfo.h"
46
46
#include " llvm/CodeGen/TargetPassConfig.h"
47
47
#include " llvm/CodeGen/TargetRegisterInfo.h"
48
+ #include " llvm/CodeGen/TargetSchedule.h"
48
49
#include " llvm/CodeGen/TargetSubtargetInfo.h"
49
50
#include " llvm/IR/BasicBlock.h"
50
51
#include " llvm/IR/DebugInfoMetadata.h"
@@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split");
113
114
STATISTIC (NumCoalesces, " Number of copies coalesced" );
114
115
STATISTIC (NumPostRACopySink, " Number of copies sunk after RA" );
115
116
117
+ using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
118
+
116
119
namespace {
117
120
118
121
class MachineSinking : public MachineFunctionPass {
@@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass {
128
131
const MachineBranchProbabilityInfo *MBPI = nullptr ;
129
132
AliasAnalysis *AA = nullptr ;
130
133
RegisterClassInfo RegClassInfo;
134
+ TargetSchedModel SchedModel;
131
135
132
136
// Remember which edges have been considered for breaking.
133
137
SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8 >
@@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass {
161
165
// / would re-order assignments.
162
166
using SeenDbgUser = PointerIntPair<MachineInstr *, 1 >;
163
167
168
+ using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
169
+
164
170
// / Record of DBG_VALUE uses of vregs in a block, so that we can identify
165
171
// / debug instructions to sink.
166
172
SmallDenseMap<unsigned , TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
@@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass {
255
261
256
262
void FindCycleSinkCandidates (MachineCycle *Cycle, MachineBasicBlock *BB,
257
263
SmallVectorImpl<MachineInstr *> &Candidates);
258
- bool SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I);
264
+
265
+ bool
266
+ aggressivelySinkIntoCycle (MachineCycle *Cycle, MachineInstr &I,
267
+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
259
268
260
269
bool isProfitableToSinkTo (Register Reg, MachineInstr &MI,
261
270
MachineBasicBlock *MBB,
@@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass {
271
280
GetAllSortedSuccessors (MachineInstr &MI, MachineBasicBlock *MBB,
272
281
AllSuccsCache &AllSuccessors) const ;
273
282
274
- std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB);
283
+ std::vector<unsigned > &getBBRegisterPressure (const MachineBasicBlock &MBB,
284
+ bool UseCache = true );
275
285
276
286
bool registerPressureSetExceedsLimit (unsigned NRegs,
277
287
const TargetRegisterClass *RC,
278
288
const MachineBasicBlock &MBB);
289
+
290
+ bool registerPressureExceedsLimit (const MachineBasicBlock &MBB);
279
291
};
280
292
281
293
} // end anonymous namespace
@@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
680
692
SmallVectorImpl<MachineInstr *> &Candidates) {
681
693
for (auto &MI : *BB) {
682
694
LLVM_DEBUG (dbgs () << " CycleSink: Analysing candidate: " << MI);
695
+ if (MI.isMetaInstruction ()) {
696
+ LLVM_DEBUG (dbgs () << " CycleSink: not sinking meta instruction\n " );
697
+ continue ;
698
+ }
683
699
if (!TII->shouldSink (MI)) {
684
700
LLVM_DEBUG (dbgs () << " CycleSink: Instruction not a candidate for this "
685
701
" target\n " );
@@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
775
791
776
792
if (SinkInstsIntoCycle) {
777
793
SmallVector<MachineCycle *, 8 > Cycles (CI->toplevel_cycles ());
778
- for (auto *Cycle : Cycles) {
779
- MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
780
- if (!Preheader) {
781
- LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
782
- continue ;
783
- }
784
- SmallVector<MachineInstr *, 8 > Candidates;
785
- FindCycleSinkCandidates (Cycle, Preheader, Candidates);
786
-
787
- // Walk the candidates in reverse order so that we start with the use
788
- // of a def-use chain, if there is any.
789
- // TODO: Sort the candidates using a cost-model.
790
- unsigned i = 0 ;
791
- for (MachineInstr *I : llvm::reverse (Candidates)) {
792
- if (i++ == SinkIntoCycleLimit) {
793
- LLVM_DEBUG (dbgs () << " CycleSink: Limit reached of instructions to "
794
- " be analysed." );
795
- break ;
794
+ SchedModel.init (STI);
795
+ bool HasHighPressure;
796
+
797
+ DenseMap<SinkItem, MachineInstr *> SunkInstrs;
798
+
799
+ enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
800
+ for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
801
+ ++Stage, SunkInstrs.clear ()) {
802
+ HasHighPressure = false ;
803
+
804
+ for (auto *Cycle : Cycles) {
805
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
806
+ if (!Preheader) {
807
+ LLVM_DEBUG (dbgs () << " CycleSink: Can't find preheader\n " );
808
+ continue ;
796
809
}
810
+ SmallVector<MachineInstr *, 8 > Candidates;
811
+ FindCycleSinkCandidates (Cycle, Preheader, Candidates);
812
+
813
+ unsigned i = 0 ;
814
+
815
+ // Walk the candidates in reverse order so that we start with the use
816
+ // of a def-use chain, if there is any.
817
+ // TODO: Sort the candidates using a cost-model.
818
+ for (MachineInstr *I : llvm::reverse (Candidates)) {
819
+ // CycleSinkStage::COPY: Sink a limited number of copies
820
+ if (Stage == CycleSinkStage::COPY) {
821
+ if (i++ == SinkIntoCycleLimit) {
822
+ LLVM_DEBUG (dbgs ()
823
+ << " CycleSink: Limit reached of instructions to "
824
+ " be analyzed." );
825
+ break ;
826
+ }
827
+
828
+ if (!I->isCopy ())
829
+ continue ;
830
+ }
797
831
798
- if (!SinkIntoCycle (Cycle, *I))
799
- break ;
800
- EverMadeChange = true ;
801
- ++NumCycleSunk;
832
+ // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
833
+ // which the target specifies as low-latency
834
+ if (Stage == CycleSinkStage::LOW_LATENCY &&
835
+ !TII->hasLowDefLatency (SchedModel, *I, 0 ))
836
+ continue ;
837
+
838
+ if (!aggressivelySinkIntoCycle (Cycle, *I, SunkInstrs))
839
+ break ;
840
+ EverMadeChange = true ;
841
+ ++NumCycleSunk;
842
+ }
843
+
844
+ // Recalculate the pressure after sinking
845
+ if (!HasHighPressure)
846
+ HasHighPressure = registerPressureExceedsLimit (*Preheader);
802
847
}
848
+ if (!HasHighPressure)
849
+ break ;
803
850
}
804
851
}
805
852
@@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1055
1102
}
1056
1103
1057
1104
std::vector<unsigned > &
1058
- MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB) {
1105
+ MachineSinking::getBBRegisterPressure (const MachineBasicBlock &MBB,
1106
+ bool UseCache) {
1059
1107
// Currently to save compiling time, MBB's register pressure will not change
1060
1108
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
1061
1109
// register pressure is changed after sinking any instructions into it.
1062
1110
// FIXME: need a accurate and cheap register pressure estiminate model here.
1111
+
1063
1112
auto RP = CachedRegisterPressure.find (&MBB);
1064
- if (RP != CachedRegisterPressure.end ())
1113
+ if (UseCache && RP != CachedRegisterPressure.end ())
1065
1114
return RP->second ;
1066
1115
1067
1116
RegionPressure Pressure;
@@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
1085
1134
}
1086
1135
1087
1136
RPTracker.closeRegion ();
1137
+
1138
+ if (RP != CachedRegisterPressure.end ()) {
1139
+ CachedRegisterPressure[&MBB] = RPTracker.getPressure ().MaxSetPressure ;
1140
+ return CachedRegisterPressure[&MBB];
1141
+ }
1142
+
1088
1143
auto It = CachedRegisterPressure.insert (
1089
1144
std::make_pair (&MBB, RPTracker.getPressure ().MaxSetPressure ));
1090
1145
return It.first ->second ;
@@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
1103
1158
return false ;
1104
1159
}
1105
1160
1161
+ // Recalculate RP and check if any pressure set exceeds the set limit.
1162
+ bool MachineSinking::registerPressureExceedsLimit (
1163
+ const MachineBasicBlock &MBB) {
1164
+ std::vector<unsigned > BBRegisterPressure = getBBRegisterPressure (MBB, false );
1165
+
1166
+ for (unsigned PS = 0 ; PS < BBRegisterPressure.size (); ++PS) {
1167
+ if (BBRegisterPressure[PS] >=
1168
+ TRI->getRegPressureSetLimit (*MBB.getParent (), PS)) {
1169
+ return true ;
1170
+ }
1171
+ }
1172
+
1173
+ return false ;
1174
+ }
1175
+
1106
1176
// / isProfitableToSinkTo - Return true if it is profitable to sink MI.
1107
1177
bool MachineSinking::isProfitableToSinkTo (Register Reg, MachineInstr &MI,
1108
1178
MachineBasicBlock *MBB,
@@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1581
1651
return HasAliasedStore;
1582
1652
}
1583
1653
1584
- // / Sink instructions into cycles if profitable. This especially tries to
1585
- // / prevent register spills caused by register pressure if there is little to no
1586
- // / overhead moving instructions into cycles.
1587
- bool MachineSinking::SinkIntoCycle (MachineCycle *Cycle, MachineInstr &I) {
1588
- LLVM_DEBUG (dbgs () << " CycleSink: Finding sink block for: " << I);
1654
+ // / Aggressively sink instructions into cycles. This will aggressively try to
1655
+ // / sink all instructions in the top-most preheaders in an attempt to reduce RP.
1656
+ // / In particular, it will sink into multiple successor blocks without limits
1657
+ // / based on the amount of sinking, or the type of ops being sunk (so long as
1658
+ // / they are safe to sink).
1659
+ bool MachineSinking::aggressivelySinkIntoCycle (
1660
+ MachineCycle *Cycle, MachineInstr &I,
1661
+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1662
+ // TODO: support instructions with multiple defs
1663
+ if (I.getNumDefs () > 1 )
1664
+ return false ;
1665
+
1666
+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Finding sink block for: " << I);
1589
1667
MachineBasicBlock *Preheader = Cycle->getCyclePreheader ();
1590
1668
assert (Preheader && " Cycle sink needs a preheader block" );
1591
- MachineBasicBlock *SinkBlock = nullptr ;
1592
- bool CanSink = true ;
1593
- const MachineOperand &MO = I.getOperand (0 );
1594
-
1595
- for (MachineInstr &MI : MRI->use_instructions (MO.getReg ())) {
1596
- LLVM_DEBUG (dbgs () << " CycleSink: Analysing use: " << MI);
1597
- if (!Cycle->contains (MI.getParent ())) {
1598
- LLVM_DEBUG (dbgs () << " CycleSink: Use not in cycle, can't sink.\n " );
1599
- CanSink = false ;
1600
- break ;
1601
- }
1669
+ SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
1602
1670
1603
- // FIXME: Come up with a proper cost model that estimates whether sinking
1604
- // the instruction (and thus possibly executing it on every cycle
1605
- // iteration) is more expensive than a register.
1606
- // For now assumes that copies are cheap and thus almost always worth it.
1607
- if (!MI.isCopy ()) {
1608
- LLVM_DEBUG (dbgs () << " CycleSink: Use is not a copy\n " );
1609
- CanSink = false ;
1610
- break ;
1671
+ MachineOperand &DefMO = I.getOperand (0 );
1672
+ for (MachineInstr &MI : MRI->use_instructions (DefMO.getReg ())) {
1673
+ Uses.push_back ({{DefMO.getReg (), DefMO.getSubReg ()}, &MI});
1674
+ }
1675
+
1676
+ for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1677
+ MachineInstr *MI = Entry.second ;
1678
+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Analysing use: " << MI);
1679
+ if (MI->isPHI ()) {
1680
+ LLVM_DEBUG (
1681
+ dbgs () << " AggressiveCycleSink: Not attempting to sink for PHI.\n " );
1682
+ continue ;
1611
1683
}
1612
- if (!SinkBlock) {
1613
- SinkBlock = MI. getParent ();
1614
- LLVM_DEBUG (dbgs () << " CycleSink : Setting sink block to: "
1615
- << printMBBReference (*SinkBlock) << " \n " );
1684
+ // We cannot sink before the prologue
1685
+ if (MI-> isPosition () || TII-> isBasicBlockPrologue (*MI)) {
1686
+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink : Use is BasicBlock prologue, "
1687
+ " can't sink. \n " );
1616
1688
continue ;
1617
1689
}
1618
- SinkBlock = DT->findNearestCommonDominator (SinkBlock, MI.getParent ());
1619
- if (!SinkBlock) {
1620
- LLVM_DEBUG (dbgs () << " CycleSink: Can't find nearest dominator\n " );
1621
- CanSink = false ;
1622
- break ;
1690
+ if (!Cycle->contains (MI->getParent ())) {
1691
+ LLVM_DEBUG (
1692
+ dbgs () << " AggressiveCycleSink: Use not in cycle, can't sink.\n " );
1693
+ continue ;
1623
1694
}
1624
- LLVM_DEBUG (dbgs () << " CycleSink: Setting nearest common dom block: "
1625
- << printMBBReference (*SinkBlock) << " \n " );
1626
- }
1627
1695
1628
- if (!CanSink) {
1629
- LLVM_DEBUG (dbgs () << " CycleSink: Can't sink instruction.\n " );
1630
- return false ;
1631
- }
1632
- if (!SinkBlock) {
1633
- LLVM_DEBUG (dbgs () << " CycleSink: Not sinking, can't find sink block.\n " );
1634
- return false ;
1635
- }
1636
- if (SinkBlock == Preheader) {
1637
- LLVM_DEBUG (
1638
- dbgs () << " CycleSink: Not sinking, sink block is the preheader\n " );
1639
- return false ;
1640
- }
1641
- if (SinkBlock->sizeWithoutDebugLargerThan (SinkLoadInstsPerBlockThreshold)) {
1642
- LLVM_DEBUG (
1643
- dbgs () << " CycleSink: Not Sinking, block too large to analyse.\n " );
1644
- return false ;
1645
- }
1696
+ MachineBasicBlock *SinkBlock = MI->getParent ();
1697
+ MachineInstr *NewMI = nullptr ;
1698
+ SinkItem MapEntry (&I, SinkBlock);
1699
+
1700
+ auto SI = SunkInstrs.find (MapEntry);
1701
+
1702
+ // Check for the case in which we have already sunk a copy of this
1703
+ // instruction into the user block.
1704
+ if (SI != SunkInstrs.end ()) {
1705
+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Already sunk to block: "
1706
+ << printMBBReference (*SinkBlock) << " \n " );
1707
+ NewMI = SI->second ;
1708
+ }
1646
1709
1647
- LLVM_DEBUG (dbgs () << " CycleSink: Sinking instruction!\n " );
1648
- SinkBlock->splice (SinkBlock->SkipPHIsAndLabels (SinkBlock->begin ()), Preheader,
1649
- I);
1710
+ // Create a copy of the instruction in the use block.
1711
+ if (!NewMI) {
1712
+ LLVM_DEBUG (dbgs () << " AggressiveCycleSink: Sinking instruction to block: "
1713
+ << printMBBReference (*SinkBlock) << " \n " );
1714
+
1715
+ NewMI = I.getMF ()->CloneMachineInstr (&I);
1716
+ if (DefMO.getReg ().isVirtual ()) {
1717
+ const TargetRegisterClass *TRC = MRI->getRegClass (DefMO.getReg ());
1718
+ Register DestReg = MRI->createVirtualRegister (TRC);
1719
+ NewMI->substituteRegister (DefMO.getReg (), DestReg, DefMO.getSubReg (),
1720
+ *TRI);
1721
+ }
1722
+ SinkBlock->insert (SinkBlock->SkipPHIsAndLabels (SinkBlock->begin ()),
1723
+ NewMI);
1724
+ SunkInstrs.insert ({MapEntry, NewMI});
1725
+ }
1650
1726
1651
- // Conservatively clear any kill flags on uses of sunk instruction
1652
- for (MachineOperand &MO : I. operands ()) {
1653
- if (MO.isReg () && MO.readsReg ())
1727
+ // Conservatively clear any kill flags on uses of sunk instruction
1728
+ for (MachineOperand &MO : NewMI-> all_uses ()) {
1729
+ assert (MO.isReg () && MO.isUse ());
1654
1730
RegsToClearKillFlags.insert (MO.getReg ());
1655
- }
1731
+ }
1656
1732
1657
- // The instruction is moved from its basic block, so do not retain the
1658
- // debug information.
1659
- assert (!I.isDebugInstr () && " Should not sink debug inst" );
1660
- I.setDebugLoc (DebugLoc ());
1733
+ // The instruction is moved from its basic block, so do not retain the
1734
+ // debug information.
1735
+ assert (!NewMI->isDebugInstr () && " Should not sink debug inst" );
1736
+ NewMI->setDebugLoc (DebugLoc ());
1737
+
1738
+ // Replace the use with the newly created virtual register.
1739
+ RegSubRegPair &UseReg = Entry.first ;
1740
+ MI->substituteRegister (UseReg.Reg , NewMI->getOperand (0 ).getReg (),
1741
+ UseReg.SubReg , *TRI);
1742
+ }
1743
+ // If we have replaced all uses, then delete the dead instruction
1744
+ if (I.isDead (*MRI))
1745
+ I.eraseFromParent ();
1661
1746
return true ;
1662
1747
}
1663
1748
0 commit comments