Skip to content

Commit f01d9e6

Browse files
committed
[SimplifyCFG] Fix inconsistency in block size assessment for threading
Sometimes SimplifyCFG may decide to perform jump threading. In order to do it, it follows the following algorithm: 1. Checks if the block is small enough for threading; 2. If yes, inserts a PR Phi relying that the next iteration will remove it by performing jump threading; 3. The next iteration checks the block again and performs the threading. This logic has a corner case: inserting the PR Phi increases block's size by 1. If the block size at first check was max possible, one more Phi will exceed this size, and we will neither perform threading nor remove the created Phi node. As result, we will end up with worse IR than before. This patch fixes this situation by excluding Phis from block size computation. Excluding Phis from size computation for threading also makes sense by itself because in case of threadign all those Phis will be removed. Differential Revision: https://reviews.llvm.org/D81835 Reviewed By: asbirlea, nikic
1 parent 11cd977 commit f01d9e6

File tree

2 files changed

+77
-10
lines changed

2 files changed

+77
-10
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ static cl::opt<unsigned> MaxSpeculationDepth(
133133
cl::desc("Limit maximum recursion depth when calculating costs of "
134134
"speculatively executed instructions"));
135135

136+
static cl::opt<int>
137+
MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
138+
cl::desc("Max size of a block which is still considered "
139+
"small enough to thread through"));
140+
136141
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
137142
STATISTIC(NumLinearMaps,
138143
"Number of switch instructions turned into linear mapping");
@@ -2189,12 +2194,15 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
21892194

21902195
/// Return true if we can thread a branch across this block.
21912196
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
2192-
unsigned Size = 0;
2197+
int Size = 0;
21932198

21942199
for (Instruction &I : BB->instructionsWithoutDebug()) {
2195-
if (Size > 10)
2200+
if (Size > MaxSmallBlockSize)
21962201
return false; // Don't clone large BB's.
2197-
++Size;
2202+
// We will delete Phis while threading, so Phis should not be accounted in
2203+
// block's size
2204+
if (!isa<PHINode>(I))
2205+
++Size;
21982206

21992207
// We can only support instructions that do not define values that are
22002208
// live outside of the current basic block.

llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -simplifycfg -S < %s | FileCheck %s
3-
; RUN: opt -passes=simplify-cfg -S < %s | FileCheck %s
2+
; RUN: opt -simplifycfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
3+
; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
44

55
target datalayout = "e-p:64:64-p5:32:32-A5"
66

@@ -50,14 +50,71 @@ false2: ; preds = %true1
5050
ret void
5151
}
5252

53-
; FIXME: SimplifyCFG is doing something weird here. It should have split the
54-
; blocks like in the test above, but instead it creates .pr Phi node which
55-
; only complicates things.
53+
; Corner case: the block has max possible size for which we still do PRE.
5654
define void @test_02(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
5755
; CHECK-LABEL: @test_02(
56+
; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
57+
; CHECK: false1:
58+
; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4
59+
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR]] to i64
60+
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
61+
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
62+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
63+
; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
64+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
65+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
66+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
67+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
68+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
69+
; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8
70+
; CHECK-NEXT: ret void
71+
; CHECK: true2.critedge:
72+
; CHECK-NEXT: [[PTRINT_C:%.*]] = ptrtoint i64* [[PTR]] to i64
73+
; CHECK-NEXT: [[MASKEDPTR_C:%.*]] = and i64 [[PTRINT_C]], 7
74+
; CHECK-NEXT: [[MASKCOND_C:%.*]] = icmp eq i64 [[MASKEDPTR_C]], 0
75+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND_C]])
76+
; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
77+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
78+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
79+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
80+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
81+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
82+
; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8
83+
; CHECK-NEXT: ret void
84+
;
85+
br i1 %c, label %true1, label %false1
86+
87+
true1: ; preds = %false1, %0
88+
%ptrint = ptrtoint i64* %ptr to i64
89+
%maskedptr = and i64 %ptrint, 7
90+
%maskcond = icmp eq i64 %maskedptr, 0
91+
tail call void @llvm.assume(i1 %maskcond)
92+
store volatile i64 0, i64* %ptr, align 8
93+
store volatile i64 -1, i64* %ptr, align 8
94+
store volatile i64 -1, i64* %ptr, align 8
95+
store volatile i64 -1, i64* %ptr, align 8
96+
store volatile i64 -1, i64* %ptr, align 8
97+
store volatile i64 -1, i64* %ptr, align 8
98+
br i1 %c, label %true2, label %false2
99+
100+
false1: ; preds = %0
101+
store volatile i64 1, i64* %ptr, align 4
102+
br label %true1
103+
104+
true2: ; preds = %true1
105+
store volatile i64 2, i64* %ptr, align 8
106+
ret void
107+
108+
false2: ; preds = %true1
109+
store volatile i64 3, i64* %ptr, align 8
110+
ret void
111+
}
112+
113+
; This block is too huge for PRE.
114+
define void @test_03(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
115+
; CHECK-LABEL: @test_03(
58116
; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE1:%.*]], label [[FALSE1:%.*]]
59117
; CHECK: true1:
60-
; CHECK-NEXT: [[C_PR:%.*]] = phi i1 [ [[C]], [[FALSE1]] ], [ true, [[TMP0:%.*]] ]
61118
; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i64* [[PTR:%.*]] to i64
62119
; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
63120
; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
@@ -68,7 +125,8 @@ define void @test_02(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
68125
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
69126
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
70127
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
71-
; CHECK-NEXT: br i1 [[C_PR]], label [[TRUE2:%.*]], label [[FALSE2:%.*]]
128+
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
129+
; CHECK-NEXT: br i1 [[C]], label [[TRUE2:%.*]], label [[FALSE2:%.*]]
72130
; CHECK: false1:
73131
; CHECK-NEXT: store volatile i64 1, i64* [[PTR]], align 4
74132
; CHECK-NEXT: br label [[TRUE1]]
@@ -92,6 +150,7 @@ true1: ; preds = %false1, %0
92150
store volatile i64 -1, i64* %ptr, align 8
93151
store volatile i64 -1, i64* %ptr, align 8
94152
store volatile i64 -1, i64* %ptr, align 8
153+
store volatile i64 -1, i64* %ptr, align 8
95154
br i1 %c, label %true2, label %false2
96155

97156
false1: ; preds = %0

0 commit comments

Comments
 (0)