Skip to content

Commit 28d45c8

Browse files
[llvm][CallBrPrepare] use SSAUpdater to use intrinsic value
Now that we've inserted a call to an intrinsic, we need to update certain previous uses of CallBrInst values to use the value of this intrinsic instead. There are 3 cases to handle: 1. The @llvm.callbr.landingpad.<type>() intrinsic call is in the same BasicBlock as the use of the callbr we're replacing. 2. The use is dominated by the direct destination. 3. The use is not dominated by the direct destination, and may or may not be dominated by the indirect destination. Part 2c of https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8. Reviewed By: efriedma, void, jyknight Differential Revision: https://reviews.llvm.org/D139970
1 parent 094190c commit 28d45c8

File tree

2 files changed

+284
-9
lines changed

2 files changed

+284
-9
lines changed

llvm/lib/CodeGen/CallBrPrepare.cpp

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/ADT/ArrayRef.h"
3535
#include "llvm/ADT/SmallPtrSet.h"
3636
#include "llvm/ADT/SmallVector.h"
37+
#include "llvm/ADT/iterator.h"
3738
#include "llvm/Analysis/CFG.h"
3839
#include "llvm/CodeGen/Passes.h"
3940
#include "llvm/IR/BasicBlock.h"
@@ -46,6 +47,7 @@
4647
#include "llvm/InitializePasses.h"
4748
#include "llvm/Pass.h"
4849
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
50+
#include "llvm/Transforms/Utils/SSAUpdater.h"
4951

5052
using namespace llvm;
5153

@@ -55,6 +57,10 @@ namespace {
5557

5658
class CallBrPrepare : public FunctionPass {
5759
bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
60+
bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
61+
DominatorTree &DT) const;
62+
void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
63+
SSAUpdater &SSAUpdate) const;
5864

5965
public:
6066
CallBrPrepare() : FunctionPass(ID) {}
@@ -108,23 +114,89 @@ bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
108114
return Changed;
109115
}
110116

111-
static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs) {
117+
bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
118+
DominatorTree &DT) const {
112119
bool Changed = false;
113120
SmallPtrSet<const BasicBlock *, 4> Visited;
114121
IRBuilder<> Builder(CBRs[0]->getContext());
115122
for (CallBrInst *CBR : CBRs) {
123+
if (!CBR->getNumIndirectDests())
124+
continue;
125+
126+
SSAUpdater SSAUpdate;
127+
SSAUpdate.Initialize(CBR->getType(), CBR->getName());
128+
SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
129+
SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
130+
116131
for (BasicBlock *IndDest : CBR->getIndirectDests()) {
117132
if (!Visited.insert(IndDest).second)
118133
continue;
119134
Builder.SetInsertPoint(&*IndDest->begin());
120-
Builder.CreateIntrinsic(CBR->getType(), Intrinsic::callbr_landingpad,
121-
{CBR});
135+
CallInst *Intrinsic = Builder.CreateIntrinsic(
136+
CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
137+
SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
138+
UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
122139
Changed = true;
123140
}
124141
}
125142
return Changed;
126143
}
127144

145+
static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
146+
const auto *I = dyn_cast<Instruction>(U.getUser());
147+
return I && I->getParent() == BB;
148+
}
149+
150+
static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
151+
const BasicBlock *BB, bool IsDefaultDest) {
152+
if (!isa<Instruction>(U.getUser()))
153+
return;
154+
const bool IsDominated = DT.dominates(BB, U);
155+
LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
156+
<< cast<Instruction>(U.getUser())->getParent()->getName()
157+
<< ", is " << (IsDominated ? "" : "NOT ") << "dominated by "
158+
<< BB->getName() << " (" << (IsDefaultDest ? "in" : "")
159+
<< "direct)\n");
160+
}
161+
162+
void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
163+
CallInst *Intrinsic,
164+
SSAUpdater &SSAUpdate) const {
165+
166+
SmallPtrSet<Use *, 4> Visited;
167+
BasicBlock *DefaultDest = CBR->getDefaultDest();
168+
BasicBlock *LandingPad = Intrinsic->getParent();
169+
170+
SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
171+
for (Use *U : Uses) {
172+
if (!Visited.insert(U).second)
173+
continue;
174+
175+
#ifndef NDEBUG
176+
PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
177+
PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
178+
#endif
179+
180+
// Don't rewrite the use in the newly inserted intrinsic.
181+
if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
182+
if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
183+
continue;
184+
185+
// If the Use is in the same BasicBlock as the Intrinsic call, replace
186+
// the Use with the value of the Intrinsic call.
187+
if (IsInSameBasicBlock(*U, LandingPad)) {
188+
U->set(Intrinsic);
189+
continue;
190+
}
191+
192+
// If the Use is dominated by the default dest, do not touch it.
193+
if (DT.dominates(DefaultDest, *U))
194+
continue;
195+
196+
SSAUpdate.RewriteUse(*U);
197+
}
198+
}
199+
128200
bool CallBrPrepare::runOnFunction(Function &Fn) {
129201
bool Changed = false;
130202
SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
@@ -151,7 +223,7 @@ bool CallBrPrepare::runOnFunction(Function &Fn) {
151223
if (SplitCriticalEdges(CBRs, *DT))
152224
Changed = true;
153225

154-
if (InsertIntrinsicCalls(CBRs))
226+
if (InsertIntrinsicCalls(CBRs, *DT))
155227
Changed = true;
156228

157229
return Changed;

llvm/test/CodeGen/AArch64/callbr-prepare.ll

Lines changed: 208 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define i32 @test0() {
1818
; CHECK: direct2:
1919
; CHECK-NEXT: ret i32 0
2020
; CHECK: indirect:
21-
; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[OUT2]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
21+
; CHECK-NEXT: [[OUT3:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[TMP1]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
2222
; CHECK-NEXT: ret i32 [[OUT3]]
2323
;
2424
entry:
@@ -61,6 +61,8 @@ y:
6161
; Don't split edges unless they are critical, and callbr produces output, and
6262
; that output is used.
6363
; Here we have output, but no critical edge.
64+
; That said, we ought to insert a callbr landing pad intrinsic call and update
65+
; to use the correct SSA value.
6466
define i32 @dont_split1() {
6567
; CHECK-LABEL: @dont_split1(
6668
; CHECK-NEXT: entry:
@@ -70,7 +72,7 @@ define i32 @dont_split1() {
7072
; CHECK-NEXT: ret i32 42
7173
; CHECK: y:
7274
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
73-
; CHECK-NEXT: ret i32 [[TMP0]]
75+
; CHECK-NEXT: ret i32 [[TMP1]]
7476
;
7577
entry:
7678
%0 = callbr i32 asm "", "=r,!i"()
@@ -146,7 +148,7 @@ define i32 @split_me0() {
146148
; CHECK: x:
147149
; CHECK-NEXT: br label [[Y]]
148150
; CHECK: y:
149-
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
151+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
150152
; CHECK-NEXT: ret i32 [[TMP2]]
151153
;
152154
entry:
@@ -177,7 +179,7 @@ define i32 @split_me1(i1 %z) {
177179
; CHECK: x:
178180
; CHECK-NEXT: ret i32 42
179181
; CHECK: v:
180-
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
182+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
181183
; CHECK-NEXT: ret i32 [[TMP2]]
182184
;
183185
entry:
@@ -210,7 +212,7 @@ define i32 @split_me2(i1 %z) {
210212
; CHECK: x:
211213
; CHECK-NEXT: ret i32 42
212214
; CHECK: v:
213-
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
215+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
214216
; CHECK-NEXT: ret i32 [[TMP2]]
215217
;
216218
entry:
@@ -227,3 +229,204 @@ v:
227229
%1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ]
228230
ret i32 %1
229231
}
232+
233+
; Here we have a diamond with no phi.
234+
define i32 @dont_split4() {
235+
; CHECK-LABEL: @dont_split4(
236+
; CHECK-NEXT: entry:
237+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
238+
; CHECK-NEXT: to label [[X:%.*]] [label %y]
239+
; CHECK: x:
240+
; CHECK-NEXT: br label [[OUT:%.*]]
241+
; CHECK: y:
242+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
243+
; CHECK-NEXT: br label [[OUT]]
244+
; CHECK: out:
245+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[Y:%.*]] ], [ [[TMP0]], [[X]] ]
246+
; CHECK-NEXT: ret i32 [[TMP2]]
247+
;
248+
entry:
249+
%0 = callbr i32 asm "", "=r,!i"()
250+
to label %x [label %y]
251+
252+
x:
253+
br label %out
254+
255+
y:
256+
br label %out
257+
258+
out:
259+
ret i32 %0
260+
}
261+
262+
; Triangle with no phi.
263+
define i32 @dont_split5() {
264+
; CHECK-LABEL: @dont_split5(
265+
; CHECK-NEXT: entry:
266+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
267+
; CHECK-NEXT: to label [[OUT:%.*]] [label %y]
268+
; CHECK: y:
269+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
270+
; CHECK-NEXT: br label [[OUT]]
271+
; CHECK: out:
272+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[Y:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
273+
; CHECK-NEXT: ret i32 [[TMP2]]
274+
;
275+
entry:
276+
%0 = callbr i32 asm "", "=r,!i"()
277+
to label %out [label %y]
278+
279+
y:
280+
br label %out
281+
282+
out:
283+
ret i32 %0
284+
}
285+
286+
; Triangle the other way with no phi.
287+
define i32 @split_me3() {
288+
; CHECK-LABEL: @split_me3(
289+
; CHECK-NEXT: entry:
290+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
291+
; CHECK-NEXT: to label [[Y:%.*]] [label %entry.out_crit_edge]
292+
; CHECK: entry.out_crit_edge:
293+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
294+
; CHECK-NEXT: br label [[OUT:%.*]]
295+
; CHECK: y:
296+
; CHECK-NEXT: br label [[OUT]]
297+
; CHECK: out:
298+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_OUT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[Y]] ]
299+
; CHECK-NEXT: ret i32 [[TMP2]]
300+
;
301+
entry:
302+
%0 = callbr i32 asm "", "=r,!i"()
303+
to label %y [label %out]
304+
305+
y:
306+
br label %out
307+
308+
out:
309+
ret i32 %0
310+
}
311+
312+
; Test callbr looping back on itself.
313+
define i32 @dont_split6(i32 %0) {
314+
; CHECK-LABEL: @dont_split6(
315+
; CHECK-NEXT: entry:
316+
; CHECK-NEXT: br label [[LOOP:%.*]]
317+
; CHECK: loop:
318+
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0:%.*]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
319+
; CHECK-NEXT: [[TMP2:%.*]] = callbr i32 asm "", "=r,0,!i"(i32 [[TMP1]])
320+
; CHECK-NEXT: to label [[EXIT:%.*]] [label %loop.loop_crit_edge]
321+
; CHECK: loop.loop_crit_edge:
322+
; CHECK-NEXT: [[TMP3]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP2]])
323+
; CHECK-NEXT: br label [[LOOP]]
324+
; CHECK: exit:
325+
; CHECK-NEXT: ret i32 0
326+
;
327+
entry:
328+
br label %loop
329+
loop:
330+
%1 = phi i32 [%0, %entry], [%2, %loop]
331+
%2 = callbr i32 asm "", "=r,0,!i"(i32 %1) to label %exit [label %loop]
332+
exit:
333+
ret i32 0
334+
}
335+
336+
; Test same direct+indirect dest no phi.
337+
define i32 @split_me4() {
338+
; CHECK-LABEL: @split_me4(
339+
; CHECK-NEXT: entry:
340+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
341+
; CHECK-NEXT: to label [[SAME:%.*]] [label %entry.same_crit_edge]
342+
; CHECK: entry.same_crit_edge:
343+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
344+
; CHECK-NEXT: br label [[SAME]]
345+
; CHECK: same:
346+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_SAME_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
347+
; CHECK-NEXT: ret i32 [[TMP2]]
348+
;
349+
entry:
350+
%0 = callbr i32 asm "", "=r,!i"() to label %same [label %same]
351+
same:
352+
ret i32 %0
353+
}
354+
355+
; Test same direct+indirect dest w/ phi.
356+
define i32 @split_me5() {
357+
; CHECK-LABEL: @split_me5(
358+
; CHECK-NEXT: entry:
359+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
360+
; CHECK-NEXT: to label [[SAME:%.*]] [label %entry.same_crit_edge]
361+
; CHECK: entry.same_crit_edge:
362+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
363+
; CHECK-NEXT: br label [[SAME]]
364+
; CHECK: same:
365+
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_SAME_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
366+
; CHECK-NEXT: ret i32 [[TMP2]]
367+
;
368+
entry:
369+
%0 = callbr i32 asm "", "=r,!i"() to label %same [label %same]
370+
same:
371+
%1 = phi i32 [%0, %entry], [%0, %entry]
372+
ret i32 %1
373+
}
374+
375+
; "The Devil's cross" (i.e. two asm goto with conflicting physreg constraints
376+
; going to the same destination).
377+
define i64 @split_me6() {
378+
; CHECK-LABEL: @split_me6(
379+
; CHECK-NEXT: entry:
380+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i64 asm "# $0 $1", "={dx},!i"()
381+
; CHECK-NEXT: to label [[ASM_FALLTHROUGH:%.*]] [label %entry.foo_crit_edge]
382+
; CHECK: entry.foo_crit_edge:
383+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.callbr.landingpad.i64(i64 [[TMP0]])
384+
; CHECK-NEXT: br label [[FOO:%.*]]
385+
; CHECK: asm.fallthrough:
386+
; CHECK-NEXT: [[TMP2:%.*]] = callbr i64 asm "# $0 $1", "={bx},!i"()
387+
; CHECK-NEXT: to label [[FOO]] [label %asm.fallthrough.foo_crit_edge]
388+
; CHECK: asm.fallthrough.foo_crit_edge:
389+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.callbr.landingpad.i64(i64 [[TMP2]])
390+
; CHECK-NEXT: br label [[FOO]]
391+
; CHECK: foo:
392+
; CHECK-NEXT: [[X_0:%.*]] = phi i64 [ [[TMP1]], [[ENTRY_FOO_CRIT_EDGE:%.*]] ], [ [[TMP3]], [[ASM_FALLTHROUGH_FOO_CRIT_EDGE:%.*]] ], [ [[TMP2]], [[ASM_FALLTHROUGH]] ]
393+
; CHECK-NEXT: ret i64 [[X_0]]
394+
;
395+
entry:
396+
%0 = callbr i64 asm "# $0 $1", "={dx},!i"()
397+
to label %asm.fallthrough [label %foo]
398+
399+
asm.fallthrough:
400+
%1 = callbr i64 asm "# $0 $1", "={bx},!i"()
401+
to label %foo [label %foo]
402+
403+
foo:
404+
%x.0 = phi i64 [ %0, %entry ], [ %1, %asm.fallthrough ], [ %1, %asm.fallthrough ]
405+
ret i64 %x.0
406+
}
407+
408+
; Test the result of the callbr having multiple uses to avoid iterator
409+
; invalidation bugs in CallBrPrepare::UpdateSSA.
410+
define i32 @multiple_split() {
411+
; CHECK-LABEL: @multiple_split(
412+
; CHECK-NEXT: entry:
413+
; CHECK-NEXT: [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
414+
; CHECK-NEXT: to label [[X:%.*]] [label %y]
415+
; CHECK: x:
416+
; CHECK-NEXT: ret i32 42
417+
; CHECK: y:
418+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
419+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], [[TMP1]]
420+
; CHECK-NEXT: ret i32 [[TMP2]]
421+
;
422+
entry:
423+
%0 = callbr i32 asm "", "=r,!i"()
424+
to label %x [label %y]
425+
426+
x:
427+
ret i32 42
428+
429+
y:
430+
%1 = add nsw i32 %0, %0
431+
ret i32 %1
432+
}

0 commit comments

Comments
 (0)