Skip to content

Commit 7e2f961

Browse files
authored
[MachineSink] Fix missing sinks along critical edges (#97618)
4e0bd3f improved early MachineLICM's capabilities to hoist COPY from physical registers out of a loop. However, it accidentally broke one of MachineSink's preconditions on sinking cheap instructions (in this case, COPY) which considered those instructions being profitable to sink only when there are at least two of them in the same def-use chain in the same basic block. So if early MachineLICM hoisted one of them out, MachineSink no longer sink rest of the cheap instructions. This results in redundant load immediate instructions from the motivating example we've seen on RISC-V. This patch fixes this by teaching MachineSink that if there is more than one demand to sink a register into the same block from different critical edges, it should be considered profitable as it increases the CSE opportunities. This change also improves two of the AArch64's cases.
1 parent 90d79e2 commit 7e2f961

File tree

4 files changed

+95
-57
lines changed

4 files changed

+95
-57
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 66 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ namespace {
130130
// Remember which edges have been considered for breaking.
131131
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
132132
CEBCandidates;
133+
// Memorize the register that also wanted to sink into the same block along
134+
// a different critical edge.
135+
// {register to sink, sink-to block} -> the first sink-from block.
136+
// We're recording the first sink-from block because that (critical) edge
137+
// was deferred until we see another register that's going to sink into the
138+
// same block.
139+
DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
140+
CEMergeCandidates;
133141
// Remember which edges we are about to split.
134142
// This is different from CEBCandidates since those edges
135143
// will be split.
@@ -197,14 +205,17 @@ namespace {
197205

198206
void releaseMemory() override {
199207
CEBCandidates.clear();
208+
CEMergeCandidates.clear();
200209
}
201210

202211
private:
203212
bool ProcessBlock(MachineBasicBlock &MBB);
204213
void ProcessDbgInst(MachineInstr &MI);
205-
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
206-
MachineBasicBlock *From,
207-
MachineBasicBlock *To);
214+
bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
215+
MachineBasicBlock *To, bool BreakPHIEdge);
216+
bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
217+
MachineBasicBlock *To,
218+
MachineBasicBlock *&DeferredFromBlock);
208219

209220
bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
210221
MachineInstr &MI);
@@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
725736

726737
// Process all basic blocks.
727738
CEBCandidates.clear();
739+
CEMergeCandidates.clear();
728740
ToSplit.clear();
729741
for (auto &MBB: MF)
730742
MadeChange |= ProcessBlock(MBB);
@@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
873885
SeenDbgVars.insert(Var);
874886
}
875887

876-
bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
877-
MachineBasicBlock *From,
878-
MachineBasicBlock *To) {
888+
bool MachineSinking::isWorthBreakingCriticalEdge(
889+
MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
890+
MachineBasicBlock *&DeferredFromBlock) {
879891
// FIXME: Need much better heuristics.
880892

881893
// If the pass has already considered breaking this edge (during this pass
@@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
887899
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
888900
return true;
889901

902+
// Check and record the register and the destination block we want to sink
903+
// into. Note that we want to do the following before the next check on branch
904+
// probability. Because we want to record the initial candidate even if it's
905+
// on hot edge, so that other candidates that might not on hot edges can be
906+
// sinked as well.
907+
for (const auto &MO : MI.all_defs()) {
908+
Register Reg = MO.getReg();
909+
if (!Reg)
910+
continue;
911+
Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
912+
auto Key = std::make_pair(SrcReg, To);
913+
auto Res = CEMergeCandidates.try_emplace(Key, From);
914+
// We wanted to sink the same register into the same block, consider it to
915+
// be profitable.
916+
if (!Res.second) {
917+
// Return the source block that was previously held off.
918+
DeferredFromBlock = Res.first->second;
919+
return true;
920+
}
921+
}
922+
890923
if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
891924
BranchProbability(SplitEdgeProbabilityThreshold, 100))
892925
return true;
@@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
921954
return false;
922955
}
923956

924-
bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
925-
MachineBasicBlock *FromBB,
926-
MachineBasicBlock *ToBB,
927-
bool BreakPHIEdge) {
928-
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
929-
return false;
930-
957+
bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
958+
MachineBasicBlock *FromBB,
959+
MachineBasicBlock *ToBB,
960+
bool BreakPHIEdge) {
931961
// Avoid breaking back edge. From == To means backedge for single BB cycle.
932962
if (!SplitEdges || FromBB == ToBB)
933963
return false;
@@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
9851015
return false;
9861016
}
9871017

988-
ToSplit.insert(std::make_pair(FromBB, ToBB));
989-
9901018
return true;
9911019
}
9921020

1021+
bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
1022+
MachineBasicBlock *FromBB,
1023+
MachineBasicBlock *ToBB,
1024+
bool BreakPHIEdge) {
1025+
bool Status = false;
1026+
MachineBasicBlock *DeferredFromBB = nullptr;
1027+
if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
1028+
// If there is a DeferredFromBB, we consider FromBB only if _both_
1029+
// of them are legal to split.
1030+
if ((!DeferredFromBB ||
1031+
ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
1032+
isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
1033+
isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
1034+
ToSplit.insert(std::make_pair(FromBB, ToBB));
1035+
if (DeferredFromBB)
1036+
ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
1037+
Status = true;
1038+
}
1039+
}
1040+
1041+
return Status;
1042+
}
1043+
9931044
std::vector<unsigned> &
9941045
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
9951046
// Currently to save compiling time, MBB's register pressure will not change

llvm/test/CodeGen/AArch64/and-sink.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,24 @@ bb2:
4646
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
4747
; CHECK-LABEL: and_sink2:
4848
; CHECK: // %bb.0:
49-
; CHECK-NEXT: mov w8, wzr
50-
; CHECK-NEXT: adrp x9, A
51-
; CHECK-NEXT: str wzr, [x9, :lo12:A]
49+
; CHECK-NEXT: adrp x8, A
50+
; CHECK-NEXT: str wzr, [x8, :lo12:A]
5251
; CHECK-NEXT: tbz w1, #0, .LBB1_5
5352
; CHECK-NEXT: // %bb.1: // %bb0.preheader
5453
; CHECK-NEXT: adrp x8, B
5554
; CHECK-NEXT: adrp x9, C
5655
; CHECK-NEXT: .LBB1_2: // %bb0
5756
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
5857
; CHECK-NEXT: str wzr, [x8, :lo12:B]
59-
; CHECK-NEXT: tbz w2, #0, .LBB1_6
58+
; CHECK-NEXT: tbz w2, #0, .LBB1_5
6059
; CHECK-NEXT: // %bb.3: // %bb1
6160
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
6261
; CHECK-NEXT: str wzr, [x9, :lo12:C]
6362
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
6463
; CHECK-NEXT: // %bb.4:
65-
; CHECK-NEXT: mov w8, #1 // =0x1
66-
; CHECK-NEXT: .LBB1_5: // %common.ret
67-
; CHECK-NEXT: mov w0, w8
64+
; CHECK-NEXT: mov w0, #1 // =0x1
6865
; CHECK-NEXT: ret
69-
; CHECK-NEXT: .LBB1_6:
66+
; CHECK-NEXT: .LBB1_5:
7067
; CHECK-NEXT: mov w0, wzr
7168
; CHECK-NEXT: ret
7269

llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@
44
define i64 @test_or(i32 %a, i32 %b) {
55
; CHECK-LABEL: test_or:
66
; CHECK: ; %bb.0: ; %bb1
7-
; CHECK-NEXT: mov w8, w0
7+
; CHECK-NEXT: cbnz w0, LBB0_2
8+
; CHECK-NEXT: LBB0_1:
89
; CHECK-NEXT: mov x0, xzr
9-
; CHECK-NEXT: cbnz w8, LBB0_2
10-
; CHECK-NEXT: LBB0_1: ; %common.ret
1110
; CHECK-NEXT: ret
1211
; CHECK-NEXT: LBB0_2: ; %bb1.cond.split
13-
; CHECK-NEXT: mov x0, xzr
1412
; CHECK-NEXT: cbz w1, LBB0_1
1513
; CHECK-NEXT: ; %bb.3: ; %bb4
1614
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -37,13 +35,11 @@ bb4:
3735
define i64 @test_or_select(i32 %a, i32 %b) {
3836
; CHECK-LABEL: test_or_select:
3937
; CHECK: ; %bb.0: ; %bb1
40-
; CHECK-NEXT: mov w8, w0
38+
; CHECK-NEXT: cbnz w0, LBB1_2
39+
; CHECK-NEXT: LBB1_1:
4140
; CHECK-NEXT: mov x0, xzr
42-
; CHECK-NEXT: cbnz w8, LBB1_2
43-
; CHECK-NEXT: LBB1_1: ; %common.ret
4441
; CHECK-NEXT: ret
4542
; CHECK-NEXT: LBB1_2: ; %bb1.cond.split
46-
; CHECK-NEXT: mov x0, xzr
4743
; CHECK-NEXT: cbz w1, LBB1_1
4844
; CHECK-NEXT: ; %bb.3: ; %bb4
4945
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -70,13 +66,11 @@ bb4:
7066
define i64 @test_and(i32 %a, i32 %b) {
7167
; CHECK-LABEL: test_and:
7268
; CHECK: ; %bb.0: ; %bb1
73-
; CHECK-NEXT: mov w8, w0
69+
; CHECK-NEXT: cbnz w0, LBB2_2
70+
; CHECK-NEXT: LBB2_1:
7471
; CHECK-NEXT: mov x0, xzr
75-
; CHECK-NEXT: cbnz w8, LBB2_2
76-
; CHECK-NEXT: LBB2_1: ; %common.ret
7772
; CHECK-NEXT: ret
7873
; CHECK-NEXT: LBB2_2: ; %bb1.cond.split
79-
; CHECK-NEXT: mov x0, xzr
8074
; CHECK-NEXT: cbz w1, LBB2_1
8175
; CHECK-NEXT: ; %bb.3: ; %bb4
8276
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -103,13 +97,11 @@ bb4:
10397
define i64 @test_and_select(i32 %a, i32 %b) {
10498
; CHECK-LABEL: test_and_select:
10599
; CHECK: ; %bb.0: ; %bb1
106-
; CHECK-NEXT: mov w8, w0
100+
; CHECK-NEXT: cbnz w0, LBB3_2
101+
; CHECK-NEXT: LBB3_1:
107102
; CHECK-NEXT: mov x0, xzr
108-
; CHECK-NEXT: cbnz w8, LBB3_2
109-
; CHECK-NEXT: LBB3_1: ; %common.ret
110103
; CHECK-NEXT: ret
111104
; CHECK-NEXT: LBB3_2: ; %bb1.cond.split
112-
; CHECK-NEXT: mov x0, xzr
113105
; CHECK-NEXT: cbz w1, LBB3_1
114106
; CHECK-NEXT: ; %bb.3: ; %bb4
115107
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill

llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
1313
; CHECK-NEXT: mv s0, a0
1414
; CHECK-NEXT: call toupper
1515
; CHECK-NEXT: li a1, 0
16-
; CHECK-NEXT: beqz s0, .LBB0_25
16+
; CHECK-NEXT: beqz s0, .LBB0_26
1717
; CHECK-NEXT: # %bb.1: # %while.body.preheader
1818
; CHECK-NEXT: li a2, 1
1919
; CHECK-NEXT: li a3, 9
@@ -55,36 +55,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
5555
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
5656
; CHECK-NEXT: beq a2, a3, .LBB0_2
5757
; CHECK-NEXT: # %bb.14: # %while.body.6
58-
; CHECK-NEXT: li a1, 0
59-
; CHECK-NEXT: beqz a2, .LBB0_25
58+
; CHECK-NEXT: beqz a2, .LBB0_24
6059
; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
6160
; CHECK-NEXT: addi s0, s1, 7
62-
; CHECK-NEXT: j .LBB0_24
63-
; CHECK-NEXT: .LBB0_16: # %while.body
64-
; CHECK-NEXT: bnez a2, .LBB0_18
6561
; CHECK-NEXT: j .LBB0_25
62+
; CHECK-NEXT: .LBB0_16: # %while.body
63+
; CHECK-NEXT: beqz a2, .LBB0_26
64+
; CHECK-NEXT: j .LBB0_18
6665
; CHECK-NEXT: .LBB0_17: # %while.body.1
67-
; CHECK-NEXT: li a1, 0
68-
; CHECK-NEXT: beqz a2, .LBB0_25
66+
; CHECK-NEXT: beqz a2, .LBB0_24
6967
; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
7068
; CHECK-NEXT: li s0, 0
71-
; CHECK-NEXT: j .LBB0_24
69+
; CHECK-NEXT: j .LBB0_25
7270
; CHECK-NEXT: .LBB0_19: # %while.body.3
73-
; CHECK-NEXT: li a1, 0
74-
; CHECK-NEXT: beqz a2, .LBB0_25
71+
; CHECK-NEXT: beqz a2, .LBB0_24
7572
; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
7673
; CHECK-NEXT: addi s0, s1, 4
77-
; CHECK-NEXT: j .LBB0_24
74+
; CHECK-NEXT: j .LBB0_25
7875
; CHECK-NEXT: .LBB0_21: # %while.body.4
79-
; CHECK-NEXT: li a1, 0
80-
; CHECK-NEXT: beqz a2, .LBB0_25
76+
; CHECK-NEXT: beqz a2, .LBB0_24
8177
; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
8278
; CHECK-NEXT: addi s0, s1, 5
83-
; CHECK-NEXT: j .LBB0_24
79+
; CHECK-NEXT: j .LBB0_25
8480
; CHECK-NEXT: .LBB0_23: # %while.body.5
81+
; CHECK-NEXT: bnez a2, .LBB0_25
82+
; CHECK-NEXT: .LBB0_24:
8583
; CHECK-NEXT: li a1, 0
86-
; CHECK-NEXT: beqz a2, .LBB0_25
87-
; CHECK-NEXT: .LBB0_24: # %strdup.exit
84+
; CHECK-NEXT: j .LBB0_26
85+
; CHECK-NEXT: .LBB0_25: # %strdup.exit
8886
; CHECK-NEXT: li s1, 0
8987
; CHECK-NEXT: mv s2, a0
9088
; CHECK-NEXT: li a0, 0
@@ -95,7 +93,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
9593
; CHECK-NEXT: li a2, 0
9694
; CHECK-NEXT: jalr s1
9795
; CHECK-NEXT: li a1, 1
98-
; CHECK-NEXT: .LBB0_25: # %return
96+
; CHECK-NEXT: .LBB0_26: # %return
9997
; CHECK-NEXT: mv a0, a1
10098
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
10199
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload

0 commit comments

Comments
 (0)