Skip to content

Commit b8d6659

Browse files
authored
[CodeLayout] Do not flip branch condition when using optsize (#114607)
* Do not use profile data when flipping a branch condition when optimizing for size. This should improving outlining and ICF due to more uniform instruction sequences. * Refactor `optimizeBranches()` to use early `continue`s * Use the correct debug location for `insertBranch()`
1 parent 789de76 commit b8d6659

File tree

3 files changed

+119
-25
lines changed

3 files changed

+119
-25
lines changed

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2906,7 +2906,7 @@ void MachineBlockPlacement::buildCFGChains() {
29062906

29072907
void MachineBlockPlacement::optimizeBranches() {
29082908
BlockChain &FunctionChain = *BlockToChain[&F->front()];
2909-
SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
2909+
SmallVector<MachineOperand, 4> Cond;
29102910

29112911
// Now that all the basic blocks in the chain have the proper layout,
29122912
// make a final call to analyzeBranch with AllowModify set.
@@ -2916,24 +2916,30 @@ void MachineBlockPlacement::optimizeBranches() {
29162916
// a fallthrough when it occurs after predicated terminators.
29172917
for (MachineBasicBlock *ChainBB : FunctionChain) {
29182918
Cond.clear();
2919-
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
2920-
if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
2921-
// If PrevBB has a two-way branch, try to re-order the branches
2922-
// such that we branch to the successor with higher probability first.
2923-
if (TBB && !Cond.empty() && FBB &&
2924-
MBPI->getEdgeProbability(ChainBB, FBB) >
2925-
MBPI->getEdgeProbability(ChainBB, TBB) &&
2926-
!TII->reverseBranchCondition(Cond)) {
2927-
LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
2928-
<< getBlockName(ChainBB) << "\n");
2929-
LLVM_DEBUG(dbgs() << " Edge probability: "
2930-
<< MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
2931-
<< MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
2932-
DebugLoc dl; // FIXME: this is nowhere
2933-
TII->removeBranch(*ChainBB);
2934-
TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
2935-
}
2936-
}
2919+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2920+
if (TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true))
2921+
continue;
2922+
if (!TBB || !FBB || Cond.empty())
2923+
continue;
2924+
// If we are optimizing for size we do not consider the runtime performance.
2925+
// Instead, we retain the original branch condition so we have more uniform
2926+
// instructions which will benefit ICF.
2927+
if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()))
2928+
continue;
2929+
// If ChainBB has a two-way branch, try to re-order the branches
2930+
// such that we branch to the successor with higher probability first.
2931+
if (MBPI->getEdgeProbability(ChainBB, TBB) >=
2932+
MBPI->getEdgeProbability(ChainBB, FBB))
2933+
continue;
2934+
if (TII->reverseBranchCondition(Cond))
2935+
continue;
2936+
LLVM_DEBUG(dbgs() << "Reverse order of the two branches: "
2937+
<< getBlockName(ChainBB) << "\n");
2938+
LLVM_DEBUG(dbgs() << " " << getBlockName(TBB) << " < " << getBlockName(FBB)
2939+
<< "\n");
2940+
auto Dl = ChainBB->findBranchDebugLoc();
2941+
TII->removeBranch(*ChainBB);
2942+
TII->insertBranch(*ChainBB, FBB, TBB, Cond, Dl);
29372943
}
29382944
}
29392945

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
4+
; When consuming profile data we sometimes flip a branch to improve runtime
5+
; performance. If we are optimizing for size, we avoid changing the branch to
6+
; improve outlining and ICF.
7+
8+
define i8 @foo_optsize(i32 %v4) optsize {
9+
; CHECK-LABEL: foo_optsize:
10+
; CHECK: // %bb.0: // %entry
11+
; CHECK-NEXT: cbz wzr, .LBB0_2
12+
; CHECK-NEXT: .LBB0_1:
13+
; CHECK-NEXT: mov w0, wzr
14+
; CHECK-NEXT: ret
15+
; CHECK-NEXT: .LBB0_2: // %b1
16+
; CHECK-NEXT: cbnz w0, .LBB0_4
17+
; CHECK-NEXT: .LBB0_3: // %b2
18+
; CHECK-NEXT: mov w0, #1 // =0x1
19+
; CHECK-NEXT: ret
20+
; CHECK-NEXT: .LBB0_4: // %b1
21+
; CHECK-NEXT: cmp w0, #1
22+
; CHECK-NEXT: b.ne .LBB0_1
23+
; CHECK-NEXT: // %bb.5: // %b3
24+
; CHECK-NEXT: cbz wzr, .LBB0_1
25+
; CHECK-NEXT: b .LBB0_3
26+
entry:
27+
%v2 = icmp eq i32 0, 0
28+
br i1 %v2, label %b1, label %b4
29+
30+
b1:
31+
switch i32 %v4, label %b4 [
32+
i32 1, label %b3
33+
i32 0, label %b2
34+
], !prof !0
35+
36+
b2:
37+
br label %b4
38+
39+
b3:
40+
%v3 = icmp eq i32 0, 0
41+
br i1 %v3, label %b4, label %b2
42+
43+
b4:
44+
%v16 = phi i8 [ 1, %b2 ], [ 0, %entry ], [ 0, %b3 ], [ 0, %b1 ]
45+
ret i8 %v16
46+
}
47+
48+
define i8 @foo_optspeed(i32 %v4) {
49+
; CHECK-LABEL: foo_optspeed:
50+
; CHECK: // %bb.0: // %entry
51+
; CHECK-NEXT: cbz wzr, .LBB1_2
52+
; CHECK-NEXT: .LBB1_1:
53+
; CHECK-NEXT: mov w0, wzr
54+
; CHECK-NEXT: ret
55+
; CHECK-NEXT: .LBB1_2: // %b1
56+
; CHECK-NEXT: cbnz w0, .LBB1_4
57+
; CHECK-NEXT: .LBB1_3: // %b2
58+
; CHECK-NEXT: mov w0, #1 // =0x1
59+
; CHECK-NEXT: ret
60+
; CHECK-NEXT: .LBB1_4: // %b1
61+
; CHECK-NEXT: cmp w0, #1
62+
; CHECK-NEXT: b.ne .LBB1_1
63+
; CHECK-NEXT: // %bb.5: // %b3
64+
; CHECK-NEXT: cbnz wzr, .LBB1_3
65+
; CHECK-NEXT: b .LBB1_1
66+
entry:
67+
%v2 = icmp eq i32 0, 0
68+
br i1 %v2, label %b1, label %b4
69+
70+
b1:
71+
switch i32 %v4, label %b4 [
72+
i32 1, label %b3
73+
i32 0, label %b2
74+
], !prof !0
75+
76+
b2:
77+
br label %b4
78+
79+
b3:
80+
%v3 = icmp eq i32 0, 0
81+
br i1 %v3, label %b4, label %b2
82+
83+
b4:
84+
%v16 = phi i8 [ 1, %b2 ], [ 0, %entry ], [ 0, %b3 ], [ 0, %b1 ]
85+
ret i8 %v16
86+
}
87+
88+
!0 = !{!"branch_weights", i32 5, i32 5, i32 100}

llvm/test/CodeGen/X86/conditional-tailcall.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,10 @@ define zeroext i1 @pr31257(ptr nocapture readonly dereferenceable(8) %s) minsize
303303
; CHECK32-NEXT: .LBB3_8: # %if.else
304304
; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
305305
; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3]
306-
; CHECK32-NEXT: jb .LBB3_11 # encoding: [0x72,A]
307-
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
308-
; CHECK32-NEXT: jmp .LBB3_9 # encoding: [0xeb,A]
306+
; CHECK32-NEXT: jae .LBB3_9 # encoding: [0x73,A]
309307
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
308+
; CHECK32-NEXT: jmp .LBB3_11 # encoding: [0xeb,A]
309+
; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
310310
; CHECK32-NEXT: .LBB3_12: # %sw.bb22
311311
; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1
312312
; CHECK32-NEXT: movzbl (%eax), %ebx # encoding: [0x0f,0xb6,0x18]
@@ -483,10 +483,10 @@ define zeroext i1 @pr31257(ptr nocapture readonly dereferenceable(8) %s) minsize
483483
; WIN64-NEXT: # %bb.6: # %sw.bb
484484
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
485485
; WIN64-NEXT: cmpl $45, %r9d # encoding: [0x41,0x83,0xf9,0x2d]
486-
; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A]
487-
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
488-
; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A]
486+
; WIN64-NEXT: jne .LBB3_8 # encoding: [0x75,A]
489487
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
488+
; WIN64-NEXT: jmp .LBB3_10 # encoding: [0xeb,A]
489+
; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
490490
; WIN64-NEXT: .LBB3_7: # %sw.bb14
491491
; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1
492492
; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09]

0 commit comments

Comments
 (0)