Skip to content

Commit 0a39af0

Browse files
[llvm][CallBrPrepare] split critical edges
If we have a CallBrInst with output that's used, we need to split critical edges so that we have some place to insert COPYs for physregs to virtregs. Part 2a of https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8. Test cases and logic re-purposed from D138078. Reviewed By: efriedma, void, jyknight Differential Revision: https://reviews.llvm.org/D139872
1 parent fb47115 commit 0a39af0

File tree

2 files changed

+266
-14
lines changed

2 files changed

+266
-14
lines changed

llvm/lib/CodeGen/CallBrPrepare.cpp

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,17 @@
3131
//
3232
//===----------------------------------------------------------------------===//
3333

34+
#include "llvm/ADT/ArrayRef.h"
35+
#include "llvm/ADT/SmallVector.h"
36+
#include "llvm/Analysis/CFG.h"
3437
#include "llvm/CodeGen/Passes.h"
3538
#include "llvm/IR/BasicBlock.h"
39+
#include "llvm/IR/Dominators.h"
3640
#include "llvm/IR/Function.h"
3741
#include "llvm/IR/Instructions.h"
3842
#include "llvm/InitializePasses.h"
3943
#include "llvm/Pass.h"
44+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
4045

4146
using namespace llvm;
4247

@@ -45,31 +50,85 @@ using namespace llvm;
4550
namespace {
4651

4752
class CallBrPrepare : public FunctionPass {
53+
bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
54+
4855
public:
4956
CallBrPrepare() : FunctionPass(ID) {}
50-
static char ID;
5157
void getAnalysisUsage(AnalysisUsage &AU) const override;
5258
bool runOnFunction(Function &Fn) override;
59+
static char ID;
5360
};
5461

5562
} // end anonymous namespace
5663

5764
char CallBrPrepare::ID = 0;
58-
INITIALIZE_PASS(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
65+
INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
66+
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
67+
INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
5968

6069
FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
6170

6271
void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
63-
AU.setPreservesAll();
72+
AU.addPreserved<DominatorTreeWrapperPass>();
73+
}
74+
75+
static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
76+
SmallVector<CallBrInst *, 2> CBRs;
77+
for (BasicBlock &BB : Fn)
78+
if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
79+
if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
80+
CBRs.push_back(CBR);
81+
return CBRs;
82+
}
83+
84+
bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
85+
DominatorTree &DT) {
86+
bool Changed = false;
87+
CriticalEdgeSplittingOptions Options(&DT);
88+
Options.setMergeIdenticalEdges();
89+
90+
// The indirect destination might be duplicated between another parameter...
91+
// %0 = callbr ... [label %x, label %x]
92+
// ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
93+
// to split the default destination if it's duplicated between an indirect
94+
// destination...
95+
// %1 = callbr ... to label %x [label %x]
96+
// ...hence starting at 1 and checking against successor 0 (aka the default
97+
// destination).
98+
for (CallBrInst *CBR : CBRs)
99+
for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
100+
if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
101+
isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
102+
if (SplitKnownCriticalEdge(CBR, i, Options))
103+
Changed = true;
104+
return Changed;
64105
}
65106

66107
bool CallBrPrepare::runOnFunction(Function &Fn) {
67-
for (BasicBlock &BB : Fn) {
68-
auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator());
69-
if (!CBR)
70-
continue;
71-
// TODO: something interesting.
72-
// https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8
108+
bool Changed = false;
109+
SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
110+
111+
if (CBRs.empty())
112+
return Changed;
113+
114+
// It's highly likely that most programs do not contain CallBrInsts. Follow a
115+
// similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
116+
// domtree analysis if available, otherwise compute it lazily. This avoids
117+
// forcing Dominator Tree Construction at -O0 for programs that likely do not
118+
// contain CallBrInsts. It does pessimize programs with callbr at higher
119+
// optimization levels, as the DominatorTree created here is not reused by
120+
// subsequent passes.
121+
DominatorTree *DT;
122+
std::optional<DominatorTree> LazilyComputedDomTree;
123+
if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
124+
DT = &DTWP->getDomTree();
125+
else {
126+
LazilyComputedDomTree.emplace(Fn);
127+
DT = &*LazilyComputedDomTree;
73128
}
74-
return false;
129+
130+
if (SplitCriticalEdges(CBRs, *DT))
131+
Changed = true;
132+
133+
return Changed;
75134
}

llvm/test/CodeGen/AArch64/callbr-prepare.ll

Lines changed: 197 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt %s -callbrprepare -S -o - | FileCheck %s
33

4-
; TODO: update this test to split critical edges.
54
define i32 @test0() {
65
; CHECK-LABEL: @test0(
76
; CHECK-NEXT: entry:
87
; CHECK-NEXT: [[OUT:%.*]] = callbr i32 asm "# $0", "=r,!i"()
9-
; CHECK-NEXT: to label [[DIRECT:%.*]] [label %indirect]
8+
; CHECK-NEXT: to label [[DIRECT:%.*]] [label %entry.indirect_crit_edge]
9+
; CHECK: entry.indirect_crit_edge:
10+
; CHECK-NEXT: br label [[INDIRECT:%.*]]
1011
; CHECK: direct:
1112
; CHECK-NEXT: [[OUT2:%.*]] = callbr i32 asm "# $0", "=r,!i"()
12-
; CHECK-NEXT: to label [[DIRECT2:%.*]] [label %indirect]
13+
; CHECK-NEXT: to label [[DIRECT2:%.*]] [label %direct.indirect_crit_edge]
14+
; CHECK: direct.indirect_crit_edge:
15+
; CHECK-NEXT: br label [[INDIRECT]]
1316
; CHECK: direct2:
1417
; CHECK-NEXT: ret i32 0
1518
; CHECK: indirect:
16-
; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY:%.*]] ], [ [[OUT2]], [[DIRECT]] ]
19+
; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[OUT2]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
1720
; CHECK-NEXT: ret i32 [[OUT3]]
1821
;
1922
entry:
@@ -28,3 +31,193 @@ indirect:
2831
%out3 = phi i32 [%out, %entry], [%out2, %direct]
2932
ret i32 %out3
3033
}
34+
35+
; Don't split edges unless they are critical, and callbr produces output, and
36+
; that output is used.
37+
; Here we have none of the above.
38+
define i32 @dont_split0() {
39+
; CHECK-LABEL: @dont_split0(
40+
; CHECK-NEXT: entry:
41+
; CHECK-NEXT: callbr void asm "", "!i"()
42+
; CHECK-NEXT: to label [[X:%.*]] [label %y]
43+
; CHECK: x:
44+
; CHECK-NEXT: ret i32 42
45+
; CHECK: y:
46+
; CHECK-NEXT: ret i32 0
47+
;
48+
entry:
49+
callbr void asm "", "!i"()
50+
to label %x [label %y]
51+
52+
x:
53+
ret i32 42
54+
55+
y:
56+
ret i32 0
57+
}
58+
59+
; Don't split edges unless they are critical, and callbr produces output, and
60+
; that output is used.
61+
; Here we have output, but no critical edge.
62+
define i32 @dont_split1() {
63+
; CHECK-LABEL: @dont_split1(
64+
; CHECK-NEXT: entry:
65+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
66+
; CHECK-NEXT: to label [[X:%.*]] [label %y]
67+
; CHECK: x:
68+
; CHECK-NEXT: ret i32 42
69+
; CHECK: y:
70+
; CHECK-NEXT: ret i32 [[TMP0]]
71+
;
72+
entry:
73+
%0 = callbr i32 asm "", "=r,!i"()
74+
to label %x [label %y]
75+
76+
x:
77+
ret i32 42
78+
79+
y:
80+
ret i32 %0
81+
}
82+
83+
; Don't split edges unless they are critical, and callbr produces output, and
84+
; that output is used.
85+
; Here we have a critical edge along an indirect branch, but no output.
86+
define i32 @dont_split2() {
87+
; CHECK-LABEL: @dont_split2(
88+
; CHECK-NEXT: entry:
89+
; CHECK-NEXT: callbr void asm "", "!i"()
90+
; CHECK-NEXT: to label [[X:%.*]] [label %y]
91+
; CHECK: x:
92+
; CHECK-NEXT: br label [[Y:%.*]]
93+
; CHECK: y:
94+
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[X]] ], [ 42, [[ENTRY:%.*]] ]
95+
; CHECK-NEXT: ret i32 [[TMP0]]
96+
;
97+
entry:
98+
callbr void asm "", "!i"()
99+
to label %x [label %y]
100+
101+
x:
102+
br label %y
103+
104+
y:
105+
%0 = phi i32 [ 0, %x ], [ 42, %entry ]
106+
ret i32 %0
107+
}
108+
109+
; Don't split edges unless they are critical, and callbr produces output, and
110+
; that output is used.
111+
; Here we're missing a use.
112+
define i32 @dont_split3() {
113+
; CHECK-LABEL: @dont_split3(
114+
; CHECK-NEXT: entry:
115+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
116+
; CHECK-NEXT: to label [[X:%.*]] [label %v]
117+
; CHECK: x:
118+
; CHECK-NEXT: br label [[V:%.*]]
119+
; CHECK: v:
120+
; CHECK-NEXT: ret i32 42
121+
;
122+
entry:
123+
%0 = callbr i32 asm "", "=r,!i"() to label %x [label %v]
124+
125+
x:
126+
br label %v
127+
128+
v:
129+
ret i32 42
130+
}
131+
132+
; Don't split edges unless they are critical, and callbr produces output, and
133+
; that output is used.
134+
; Here we have output and a critical edge along an indirect branch.
135+
define i32 @split_me0() {
136+
; CHECK-LABEL: @split_me0(
137+
; CHECK-NEXT: entry:
138+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
139+
; CHECK-NEXT: to label [[X:%.*]] [label %entry.y_crit_edge]
140+
; CHECK: entry.y_crit_edge:
141+
; CHECK-NEXT: br label [[Y:%.*]]
142+
; CHECK: x:
143+
; CHECK-NEXT: br label [[Y]]
144+
; CHECK: y:
145+
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
146+
; CHECK-NEXT: ret i32 [[TMP1]]
147+
;
148+
entry:
149+
%0 = callbr i32 asm "", "=r,!i"()
150+
to label %x [label %y]
151+
152+
x:
153+
br label %y
154+
155+
y:
156+
%1 = phi i32 [ %0, %entry ], [ 42, %x ]
157+
ret i32 %1
158+
}
159+
160+
; Here we have output and a critical edge along an indirect branch.
161+
; Ensure that if we repeat the indirect destination, that we only split it
162+
; once.
163+
define i32 @split_me1(i1 %z) {
164+
; CHECK-LABEL: @split_me1(
165+
; CHECK-NEXT: entry:
166+
; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]]
167+
; CHECK: w:
168+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"()
169+
; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge]
170+
; CHECK: w.v_crit_edge:
171+
; CHECK-NEXT: br label [[V]]
172+
; CHECK: x:
173+
; CHECK-NEXT: ret i32 42
174+
; CHECK: v:
175+
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
176+
; CHECK-NEXT: ret i32 [[TMP1]]
177+
;
178+
entry:
179+
br i1 %z, label %w, label %v
180+
181+
w:
182+
%0 = callbr i32 asm "", "=r,!i,!i"()
183+
to label %x [label %v, label %v]
184+
185+
x:
186+
ret i32 42
187+
188+
v:
189+
%1 = phi i32 [%0, %w], [%0, %w], [undef, %entry]
190+
ret i32 %1
191+
}
192+
193+
; A more interessting case of @split_me1. Check that we still only split the
194+
; critical edge from w to v once.
195+
define i32 @split_me2(i1 %z) {
196+
; CHECK-LABEL: @split_me2(
197+
; CHECK-NEXT: entry:
198+
; CHECK-NEXT: br i1 [[Z:%.*]], label [[W:%.*]], label [[V:%.*]]
199+
; CHECK: w:
200+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i,!i"()
201+
; CHECK-NEXT: to label [[X:%.*]] [label [[W_V_CRIT_EDGE:%.*]], label %w.v_crit_edge]
202+
; CHECK: w.v_crit_edge:
203+
; CHECK-NEXT: br label [[V]]
204+
; CHECK: x:
205+
; CHECK-NEXT: ret i32 42
206+
; CHECK: v:
207+
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
208+
; CHECK-NEXT: ret i32 [[TMP1]]
209+
;
210+
entry:
211+
br i1 %z, label %w, label %v
212+
213+
w:
214+
%0 = callbr i32 asm "", "=r,!i,!i"()
215+
to label %x [label %v, label %v]
216+
217+
x:
218+
ret i32 42
219+
220+
v:
221+
%1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ]
222+
ret i32 %1
223+
}

0 commit comments

Comments
 (0)