Skip to content

Commit 13107cb

Browse files
authored
[LoopVectorize] Enable more early exit vectorisation tests (#117008)
PR #112138 introduced initial support for dispatching to multiple exit blocks via split middle blocks. This patch fixes a few issues so that we can enable more tests to use the new enable-early-exit-vectorization flag. Fixes are: 1. The code to bail out for any loop live-out values happens too late. This is because collectUsersInExitBlocks ignores induction variables, which get dealt with in fixupIVUsers. I've moved the check much earlier in processLoop by looking for outside users of loop-defined values. 2. We shouldn't yet be interleaving when vectorising loops with uncountable early exits, since we've not added support for this yet. 3. Similarly, we also shouldn't be creating vector epilogues. 4. Similarly, we shouldn't enable tail-folding. 5. The existing implementation doesn't yet support loops that require scalar epilogues, although I plan to add that as part of PR #88385. 6. The new split middle blocks weren't being added to the parent loop.
1 parent 96bb281 commit 13107cb

File tree

8 files changed

+306
-18
lines changed

8 files changed

+306
-18
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,15 @@ void reportVectorizationFailure(const StringRef DebugMsg,
170170
const StringRef OREMsg, const StringRef ORETag,
171171
OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr);
172172

173+
/// Same as above, but the debug message and optimization remark are identical
174+
inline void reportVectorizationFailure(const StringRef DebugMsg,
175+
const StringRef ORETag,
176+
OptimizationRemarkEmitter *ORE,
177+
Loop *TheLoop,
178+
Instruction *I = nullptr) {
179+
reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
180+
}
181+
173182
/// A marker analysis to determine if extra passes should be run after loop
174183
/// vectorization.
175184
struct ShouldRunExtraVectorPasses

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 67 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,6 +3039,22 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
30393039
PSE.getSE()->forgetLoop(OrigLoop);
30403040
PSE.getSE()->forgetBlockAndLoopDispositions();
30413041

3042+
// When dealing with uncountable early exits we create middle.split blocks
3043+
// between the vector loop region and the exit block. These blocks need
3044+
// adding to any outer loop.
3045+
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
3046+
Loop *OuterLoop = OrigLoop->getParentLoop();
3047+
if (Legal->hasUncountableEarlyExit() && OuterLoop) {
3048+
VPBasicBlock *MiddleVPBB = State.Plan->getMiddleBlock();
3049+
VPBlockBase *PredVPBB = MiddleVPBB->getSinglePredecessor();
3050+
while (PredVPBB && PredVPBB != VectorRegion) {
3051+
BasicBlock *MiddleSplitBB =
3052+
State.CFG.VPBB2IRBB[cast<VPBasicBlock>(PredVPBB)];
3053+
OuterLoop->addBasicBlockToLoop(MiddleSplitBB, *LI);
3054+
PredVPBB = PredVPBB->getSinglePredecessor();
3055+
}
3056+
}
3057+
30423058
// After vectorization, the exit blocks of the original loop will have
30433059
// additional predecessors. Invalidate SCEVs for the exit phis in case SE
30443060
// looked through single-entry phis.
@@ -3069,7 +3085,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
30693085
for (Instruction *PI : PredicatedInstructions)
30703086
sinkScalarOperands(&*PI);
30713087

3072-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
30733088
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
30743089
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
30753090

@@ -4776,6 +4791,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
47764791
// Epilogue vectorization code has not been auditted to ensure it handles
47774792
// non-latch exits properly. It may be fine, but it needs auditted and
47784793
// tested.
4794+
// TODO: Add support for loops with an early exit.
47794795
if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
47804796
return false;
47814797

@@ -5024,6 +5040,12 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
50245040
if (!Legal->isSafeForAnyVectorWidth())
50255041
return 1;
50265042

5043+
// We don't attempt to perform interleaving for loops with uncountable early
5044+
// exits because the VPInstruction::AnyOf code cannot currently handle
5045+
// multiple parts.
5046+
if (Legal->hasUncountableEarlyExit())
5047+
return 1;
5048+
50275049
auto BestKnownTC = getSmallBestKnownTC(PSE, TheLoop);
50285050
const bool HasReductions = !Legal->getReductionVars().empty();
50295051

@@ -7837,6 +7859,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78377859
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
78387860
// values from the additional bypass block.
78397861
if (VectorizingEpilogue) {
7862+
assert(!ILV.Legal->hasUncountableEarlyExit() &&
7863+
"Epilogue vectorisation not yet supported with early exits");
78407864
BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock();
78417865
for (VPRecipeBase &R : *ExitVPBB) {
78427866
fixReductionScalarResumeWhenVectorizingEpilog(
@@ -10202,13 +10226,36 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020210226
return false;
1020310227
}
1020410228

10205-
if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) {
10206-
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10207-
"early exit is not enabled",
10208-
"Auto-vectorization of loops with uncountable "
10209-
"early exit is not enabled",
10210-
"UncountableEarlyExitLoopsDisabled", ORE, L);
10211-
return false;
10229+
if (LVL.hasUncountableEarlyExit()) {
10230+
if (!EnableEarlyExitVectorization) {
10231+
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10232+
"early exit is not enabled",
10233+
"UncountableEarlyExitLoopsDisabled", ORE, L);
10234+
return false;
10235+
}
10236+
10237+
// In addUsersInExitBlocks we already bail out if there is an outside use
10238+
// of a loop-defined variable, but it ignores induction variables which are
10239+
// handled by InnerLoopVectorizer::fixupIVUsers. We need to bail out if we
10240+
// encounter induction variables too otherwise fixupIVUsers will crash.
10241+
BasicBlock *LoopLatch = L->getLoopLatch();
10242+
for (const auto &Induction : LVL.getInductionVars()) {
10243+
PHINode *Ind = Induction.first;
10244+
Instruction *IndUpdate =
10245+
cast<Instruction>(Ind->getIncomingValueForBlock(LoopLatch));
10246+
for (Instruction *I : {cast<Instruction>(Ind), IndUpdate}) {
10247+
for (User *U : I->users()) {
10248+
Instruction *UI = cast<Instruction>(U);
10249+
if (!L->contains(UI)) {
10250+
reportVectorizationFailure(
10251+
"Auto-vectorization of loops with uncountable early exits and "
10252+
"outside uses of induction variables unsupported",
10253+
"UncountableEarlyExitLoopIndLiveOutsUnsupported", ORE, L);
10254+
return false;
10255+
}
10256+
}
10257+
}
10258+
}
1021210259
}
1021310260

1021410261
// Entrance to the VPlan-native vectorization path. Outer loops are processed
@@ -10233,6 +10280,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023310280
if (UseInterleaved)
1023410281
IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI));
1023510282

10283+
if (LVL.hasUncountableEarlyExit()) {
10284+
BasicBlock *LoopLatch = L->getLoopLatch();
10285+
if (IAI.requiresScalarEpilogue() ||
10286+
any_of(LVL.getCountableExitingBlocks(),
10287+
[LoopLatch](BasicBlock *BB) { return BB != LoopLatch; })) {
10288+
reportVectorizationFailure("Auto-vectorization of early exit loops "
10289+
"requiring a scalar epilogue is unsupported",
10290+
"UncountableEarlyExitUnsupported", ORE, L);
10291+
return false;
10292+
}
10293+
}
10294+
1023610295
// Check the function attributes and profiles to find out if this function
1023710296
// should be optimized for size.
1023810297
ScalarEpilogueLowering SEL =

llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s --check-prefixes=CHECK
2+
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s --check-prefixes=CHECK
33

44
target triple = "aarch64-unknown-linux-gnu"
55

@@ -272,22 +272,66 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
272272
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
273273
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
274274
; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023
275+
; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[END]] to i10
276+
; CHECK-NEXT: [[TMP20:%.*]] = zext i10 [[TMP19]] to i64
277+
; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP20]], i64 1)
278+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 12
279+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
280+
; CHECK: vector.scevcheck:
281+
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
282+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
283+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
284+
; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
285+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
286+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
287+
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
288+
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
289+
; CHECK: vector.ph:
290+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4
291+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
292+
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
275293
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
294+
; CHECK: vector.body:
295+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY1]] ]
296+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
297+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
298+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
299+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
300+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
301+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
302+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4
303+
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD3]]
304+
; CHECK-NEXT: [[TMP14:%.*]] = xor <4 x i1> [[TMP13]], splat (i1 true)
305+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
306+
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
307+
; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]])
308+
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
309+
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
310+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[FOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
311+
; CHECK: middle.split:
312+
; CHECK-NEXT: br i1 [[TMP16]], label [[FOUND:%.*]], label [[MIDDLE_BLOCK:%.*]]
313+
; CHECK: middle.block:
314+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
315+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
316+
; CHECK: scalar.ph:
317+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
318+
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
319+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
276320
; CHECK: for.body:
277-
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
278-
; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
321+
; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
322+
; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
279323
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
280324
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
281325
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
282326
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
283327
; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
284-
; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]]
328+
; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND]], label [[FOR_INC]]
285329
; CHECK: for.inc:
286330
; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
287331
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
288332
; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
289333
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
290-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT:%.*]]
334+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
291335
; CHECK: found:
292336
; CHECK-NEXT: ret i32 1
293337
; CHECK: exit:
@@ -325,9 +369,58 @@ exit:
325369
ret i32 0
326370
}
327371

372+
%my.struct = type { i8, i8 }
373+
374+
define i64 @same_exit_block_requires_interleaving() {
375+
; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() {
376+
; CHECK-NEXT: entry:
377+
; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8
378+
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256)
379+
; CHECK-NEXT: br label [[LOOP:%.*]]
380+
; CHECK: loop:
381+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ]
382+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]]
383+
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
384+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
385+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]]
386+
; CHECK: loop.latch:
387+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
388+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 69
389+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
390+
; CHECK: loop.end:
391+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP_LATCH]] ], [ 1, [[LOOP]] ]
392+
; CHECK-NEXT: ret i64 [[RETVAL]]
393+
;
394+
entry:
395+
%p1 = alloca [128 x %my.struct]
396+
call void @init_mem(ptr %p1, i64 256)
397+
br label %loop
398+
399+
loop:
400+
%index = phi i64 [ %index.next, %loop.latch ], [ 3, %entry ]
401+
%arrayidx = getelementptr inbounds [128 x %my.struct], ptr %p1, i64 0, i64 %index
402+
%ld1 = load i8, ptr %arrayidx, align 1
403+
%cmp3 = icmp eq i8 %ld1, 3
404+
br i1 %cmp3, label %loop.latch, label %loop.end
405+
406+
loop.latch:
407+
%index.next = add i64 %index, 1
408+
%exitcond = icmp ne i64 %index.next, 69
409+
br i1 %exitcond, label %loop, label %loop.end
410+
411+
loop.end:
412+
%retval = phi i64 [ 0, %loop.latch ], [ 1, %loop ]
413+
ret i64 %retval
414+
}
328415

329416
declare i32 @foo(i32) readonly
330417
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
331418

332419
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
333420
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
421+
;.
422+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
423+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
424+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
425+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
426+
;.

llvm/test/Transforms/LoopVectorize/early_exit_legality.ll

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
4949
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
5050
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
5151
; CHECK-NEXT: LV: We can vectorize this loop!
52-
; CHECK-NOT: LV: Not vectorizing
52+
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported
5353
entry:
5454
%p1 = alloca [1024 x i8]
5555
%p2 = alloca [1024 x i8]
@@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
141141
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
142142
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
143143
; CHECK-NEXT: LV: We can vectorize this loop!
144-
; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
144+
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported
145145
entry:
146146
%p1 = alloca [1024 x i8]
147147
call void @init_mem(ptr %p1, i64 1024)
@@ -167,6 +167,42 @@ loop.end:
167167
}
168168

169169

170+
define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
171+
; CHECK-LABEL: LV: Checking a loop in 'one_uncountable_two_countable_same_exit_phi_of_consts'
172+
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 61
173+
; CHECK-NEXT: LV: We can vectorize this loop!
174+
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops requiring a scalar epilogue is unsupported.
175+
entry:
176+
%p1 = alloca [1024 x i8]
177+
%p2 = alloca [1024 x i8]
178+
call void @init_mem(ptr %p1, i64 1024)
179+
call void @init_mem(ptr %p2, i64 1024)
180+
br label %loop
181+
182+
loop:
183+
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
184+
%cmp1 = icmp ne i64 %index, 64
185+
br i1 %cmp1, label %search, label %loop.end
186+
187+
search:
188+
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
189+
%ld1 = load i8, ptr %arrayidx, align 1
190+
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
191+
%ld2 = load i8, ptr %arrayidx1, align 1
192+
%cmp3 = icmp eq i8 %ld1, %ld2
193+
br i1 %cmp3, label %loop.end, label %loop.inc
194+
195+
loop.inc:
196+
%index.next = add i64 %index, 1
197+
%exitcond = icmp ne i64 %index.next, 128
198+
br i1 %exitcond, label %loop, label %loop.end
199+
200+
loop.end:
201+
%retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ]
202+
ret i64 %retval
203+
}
204+
205+
170206
; == SOME ILLEGAL EXAMPLES ==
171207

172208

llvm/test/Transforms/LoopVectorize/multi_early_exit.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
2+
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
33

44
declare void @init_mem(ptr, i64);
55

llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
2+
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
33

44
declare void @init_mem(ptr, i64);
55

llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
2+
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
33

44
declare void @init_mem(ptr, i64);
55

0 commit comments

Comments
 (0)