Skip to content

Commit ba45453

Browse files
authored
[SimplifyCFG] Skip threading if the target may have divergent branches
- This patch skips the threading on known values if the target has divergent branch. - So far, threading on known values is skipped when the basic block has covergent calls. However, even without convergent calls, if that condition is divergent, threading duplicates the execution of that block threaded and hence results in lower performance. E.g., ``` BB1: if (cond) BB3, BB2 BB2: // work2 br BB3 BB3: // work3 if (cond) BB5, BB4 BB4: // work4 br BB5 BB5: ``` after threading, ``` BB1: if (cond) BB3', BB2' BB2': // work3 br BB5 BB3': // work2 // work3 // work4 br BB5 BB5: ``` After threading, work3 is executed twice if 'cond' is a divergent one. Reviewers: yxsamliu, nikic Pull Request: #100185
1 parent b582b65 commit ba45453

File tree

3 files changed

+98
-12
lines changed

3 files changed

+98
-12
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3246,7 +3246,12 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
32463246
}
32473247

32483248
/// Return true if we can thread a branch across this block.
3249-
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3249+
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB,
3250+
const TargetTransformInfo &TTI) {
3251+
// Skip threading if the branch may be divergent.
3252+
if (TTI.hasBranchDivergence(BB->getParent()))
3253+
return false;
3254+
32503255
int Size = 0;
32513256
EphemeralValueTracker EphTracker;
32523257

@@ -3301,10 +3306,9 @@ static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
33013306
/// If we have a conditional branch on something for which we know the constant
33023307
/// value in predecessors (e.g. a phi node in the current block), thread edges
33033308
/// from the predecessor to their ultimate destination.
3304-
static std::optional<bool>
3305-
FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
3306-
const DataLayout &DL,
3307-
AssumptionCache *AC) {
3309+
static std::optional<bool> FoldCondBranchOnValueKnownInPredecessorImpl(
3310+
BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL,
3311+
const TargetTransformInfo &TTI, AssumptionCache *AC) {
33083312
SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
33093313
BasicBlock *BB = BI->getParent();
33103314
Value *Cond = BI->getCondition();
@@ -3332,7 +3336,7 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
33323336
// Now we know that this block has multiple preds and two succs.
33333337
// Check that the block is small enough and values defined in the block are
33343338
// not used outside of it.
3335-
if (!BlockIsSimpleEnoughToThreadThrough(BB))
3339+
if (!BlockIsSimpleEnoughToThreadThrough(BB, TTI))
33363340
return false;
33373341

33383342
for (const auto &Pair : KnownValues) {
@@ -3459,15 +3463,14 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
34593463
return false;
34603464
}
34613465

3462-
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
3463-
DomTreeUpdater *DTU,
3464-
const DataLayout &DL,
3465-
AssumptionCache *AC) {
3466+
static bool FoldCondBranchOnValueKnownInPredecessor(
3467+
BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL,
3468+
const TargetTransformInfo &TTI, AssumptionCache *AC) {
34663469
std::optional<bool> Result;
34673470
bool EverChanged = false;
34683471
do {
34693472
// Note that None means "we changed things, but recurse further."
3470-
Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3473+
Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, TTI, AC);
34713474
EverChanged |= Result == std::nullopt || *Result;
34723475
} while (Result == std::nullopt);
34733476
return EverChanged;
@@ -7543,7 +7546,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
75437546
// If this is a branch on something for which we know the constant value in
75447547
// predecessors (e.g. a phi node in the current block), thread control
75457548
// through this block.
7546-
if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
7549+
if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, TTI, Options.AC))
75477550
return requestResimplify();
75487551

75497552
// Scan predecessor blocks for conditional branches.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn -S -passes=simplifycfg < %s | FileCheck %s
3+
4+
declare void @bar1()
5+
declare void @bar2()
6+
declare void @bar3()
7+
8+
define i32 @test_01a(i32 %a) {
9+
; CHECK-LABEL: define i32 @test_01a(
10+
; CHECK-SAME: i32 [[A:%.*]]) {
11+
; CHECK-NEXT: [[ENTRY:.*:]]
12+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A]], 0
13+
; CHECK-NEXT: br i1 [[COND]], label %[[MERGE:.*]], label %[[IF_FALSE:.*]]
14+
; CHECK: [[IF_FALSE]]:
15+
; CHECK-NEXT: call void @bar1()
16+
; CHECK-NEXT: br label %[[MERGE]]
17+
; CHECK: [[MERGE]]:
18+
; CHECK-NEXT: call void @bar2()
19+
; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[IF_FALSE_2:.*]]
20+
; CHECK: [[IF_FALSE_2]]:
21+
; CHECK-NEXT: call void @bar3()
22+
; CHECK-NEXT: br label %[[EXIT]]
23+
; CHECK: [[EXIT]]:
24+
; CHECK-NEXT: ret i32 [[A]]
25+
;
26+
entry:
27+
%cond = icmp eq i32 %a, 0
28+
br i1 %cond, label %merge, label %if.false
29+
30+
if.false:
31+
call void @bar1()
32+
br label %merge
33+
34+
merge:
35+
call void @bar2()
36+
br i1 %cond, label %exit, label %if.false.2
37+
38+
if.false.2:
39+
call void @bar3()
40+
br label %exit
41+
42+
exit:
43+
ret i32 %a
44+
}

llvm/test/Transforms/SimplifyCFG/convergent.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
; RUN: opt -S -passes='simplifycfg<hoist-common-insts;sink-common-insts>' < %s | FileCheck -check-prefixes=CHECK,SINK %s
55

66
declare void @foo() convergent
7+
declare void @bar1()
8+
declare void @bar2()
9+
declare void @bar3()
710
declare i32 @tid()
811
declare i32 @mbcnt(i32 %a, i32 %b) convergent
912
declare i32 @bpermute(i32 %a, i32 %b) convergent
@@ -45,6 +48,42 @@ exit:
4548
ret i32 %a
4649
}
4750

51+
define i32 @test_01a(i32 %a) {
52+
; CHECK-LABEL: @test_01a(
53+
; CHECK-NEXT: entry:
54+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
55+
; CHECK-NEXT: br i1 [[COND]], label [[EXIT_CRITEDGE:%.*]], label [[IF_FALSE:%.*]]
56+
; CHECK: if.false:
57+
; CHECK-NEXT: call void @bar1()
58+
; CHECK-NEXT: call void @bar2()
59+
; CHECK-NEXT: call void @bar3()
60+
; CHECK-NEXT: br label [[EXIT:%.*]]
61+
; CHECK: exit.critedge:
62+
; CHECK-NEXT: call void @bar2()
63+
; CHECK-NEXT: br label [[EXIT]]
64+
; CHECK: exit:
65+
; CHECK-NEXT: ret i32 [[A]]
66+
;
67+
entry:
68+
%cond = icmp eq i32 %a, 0
69+
br i1 %cond, label %merge, label %if.false
70+
71+
if.false:
72+
call void @bar1()
73+
br label %merge
74+
75+
merge:
76+
call void @bar2()
77+
br i1 %cond, label %exit, label %if.false.2
78+
79+
if.false.2:
80+
call void @bar3()
81+
br label %exit
82+
83+
exit:
84+
ret i32 %a
85+
}
86+
4887
define void @test_02(ptr %y.coerce) convergent {
4988
; NOSINK-LABEL: @test_02(
5089
; NOSINK-NEXT: entry:

0 commit comments

Comments
 (0)