Skip to content

Commit eb35ebb

Browse files
committed
[LV] Update CFG before adding runtime checks.
addRuntimeChecks uses SCEVExpander, which relies on the DT/LoopInfo to be up-to-date. Changing the CFG afterwards may invalidate some inserted instructions, especially LCSSA phis. Reorder the code to first update the CFG and then create the runtime checks. This should not have any impact on the generated code, as we adjust the CFG and generate runtime checks together. Fixes PR47343.
1 parent 7d15ece commit eb35ebb

File tree

2 files changed

+114
-13
lines changed

2 files changed

+114
-13
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2950,31 +2950,32 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
29502950
});
29512951
}
29522952

2953-
Instruction *FirstCheckInst;
2954-
Instruction *MemRuntimeCheck;
2955-
std::tie(FirstCheckInst, MemRuntimeCheck) =
2956-
addRuntimeChecks(MemCheckBlock->getTerminator(), OrigLoop,
2957-
RtPtrChecking.getChecks(), RtPtrChecking.getSE());
2958-
assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
2959-
"claimed checks are required");
2960-
29612953
MemCheckBlock->setName("vector.memcheck");
29622954
// Create new preheader for vector loop.
29632955
LoopVectorPreHeader =
29642956
SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr,
29652957
"vector.ph");
29662958

2959+
auto *CondBranch = cast<BranchInst>(
2960+
Builder.CreateCondBr(Builder.getTrue(), Bypass, LoopVectorPreHeader));
2961+
ReplaceInstWithInst(MemCheckBlock->getTerminator(), CondBranch);
2962+
LoopBypassBlocks.push_back(MemCheckBlock);
2963+
AddedSafetyChecks = true;
2964+
29672965
// Update dominator only if this is first RT check.
29682966
if (LoopBypassBlocks.empty()) {
29692967
DT->changeImmediateDominator(Bypass, MemCheckBlock);
29702968
DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
29712969
}
29722970

2973-
ReplaceInstWithInst(
2974-
MemCheckBlock->getTerminator(),
2975-
BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheck));
2976-
LoopBypassBlocks.push_back(MemCheckBlock);
2977-
AddedSafetyChecks = true;
2971+
Instruction *FirstCheckInst;
2972+
Instruction *MemRuntimeCheck;
2973+
std::tie(FirstCheckInst, MemRuntimeCheck) =
2974+
addRuntimeChecks(MemCheckBlock->getTerminator(), OrigLoop,
2975+
RtPtrChecking.getChecks(), RtPtrChecking.getSE());
2976+
assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
2977+
"claimed checks are required");
2978+
CondBranch->setCondition(MemRuntimeCheck);
29782979

29792980
// We currently don't use LoopVersioning for the actual loop cloning but we
29802981
// still use it to add the noalias metadata.
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-vectorize -force-vector-width=2 %s -S | FileCheck %s
3+
4+
; Test case for PR47343. Make sure LCSSA phis are create correctly when
5+
; expanding the memory runtime checks.
6+
7+
@f.e = external global i32, align 1
8+
@d = external global i8*, align 1
9+
10+
declare i1 @cond()
11+
12+
define void @f() {
13+
; CHECK-LABEL: @f(
14+
15+
; CHECK: outer.header:
16+
; CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** @d, align 1
17+
; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond()
18+
; CHECK-NEXT: br i1 [[C_0]], label %outer.exit.0, label %inner.1.header.preheader
19+
20+
; CHECK: outer.exit.0:
21+
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8* [ [[TMP0]], %outer.header ]
22+
; CHECK-NEXT: br label %loop.preheader
23+
24+
; CHECK: outer.exit.1:
25+
; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i8* [ [[TMP0]], %inner.1.latch ]
26+
; CHECK-NEXT: br label %loop.preheader
27+
28+
; CHECK: loop.preheader:
29+
; CHECK-NEXT: [[TMP1:%.*]] = phi i8* [ [[DOTLCSSA]], %outer.exit.0 ], [ [[DOTLCSSA1]], %outer.exit.1 ]
30+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
31+
32+
; CHECK: vector.memcheck:
33+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[TMP1]], i64 1
34+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* bitcast (i32* @f.e to i8*), [[SCEVGEP]]
35+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TMP0]], getelementptr (i8, i8* bitcast (i32* @f.e to i8*), i64 1)
36+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
37+
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
38+
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
39+
40+
; CHECK: vector.ph:
41+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
42+
43+
; CHECK: vector.body:
44+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
45+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0
46+
; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
47+
; CHECK-NEXT: store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
48+
; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1
49+
; CHECK-NEXT: store i8 10, i8* [[TMP0]], align 1
50+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
51+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500
52+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]]
53+
54+
; CHECK: middle.block:
55+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 500, 500
56+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
57+
58+
; CHECK: scalar.ph:
59+
; CHECK-NEXT: [[TMP4:%.*]] = phi i8* [ [[TMP1]], %vector.memcheck ], [ [[TMP1]], %loop.preheader ], [ [[TMP1]], %middle.block ]
60+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 500, %middle.block ], [ 0, %loop.preheader ], [ 0, %vector.memcheck ]
61+
; CHECK-NEXT: br label [[LOOP:%.*]]
62+
;
63+
entry:
64+
br label %outer.header
65+
66+
outer.header: ; preds = %cleanup, %entry
67+
%0 = load i8*, i8** @d, align 1
68+
%c.0 = call i1 @cond()
69+
br i1 %c.0, label %outer.exit.0, label %inner.1.header
70+
71+
inner.1.header: ; preds = %if.end, %for.body3.lr.ph.outer
72+
%c.1 = call i1 @cond()
73+
br i1 %c.1, label %inner.1.latch, label %outer.latch
74+
75+
inner.1.latch: ; preds = %land.end
76+
%c.2 = call i1 @cond()
77+
br i1 %c.2, label %outer.exit.1, label %inner.1.header
78+
79+
outer.latch: ; preds = %land.end
80+
br label %outer.header
81+
82+
83+
outer.exit.0: ; preds = %if.end, %if.end.us.us.us
84+
br label %loop
85+
86+
outer.exit.1: ; preds = %if.end, %if.end.us.us.us
87+
br label %loop
88+
89+
loop: ; preds = %if.end.us.us.us, %for.body3.lr.ph.outer
90+
%iv = phi i32 [ %iv.next, %loop ], [ 0, %outer.exit.0 ], [ 0, %outer.exit.1 ]
91+
%conv6.us.us.us = zext i1 false to i32
92+
store i32 %conv6.us.us.us, i32* @f.e, align 1
93+
store i8 10, i8* %0, align 1
94+
%iv.next = add nsw i32 %iv, 1
95+
%ec = icmp eq i32 %iv.next, 500
96+
br i1 %ec, label %exit, label %loop
97+
98+
exit:
99+
ret void
100+
}

0 commit comments

Comments
 (0)