Skip to content

Commit 0e8d022

Browse files
authored
[VPlan] Handle exit phis with multiple operands in addUsersInExitBlocks. (llvm#120260)
Currently the addUsersInExitBlocks incorrectly assumes exit phis only have a single operand, which may not be the case for loops with early exits when they share a common exit block. Also further relax the assertion in fixupIVUsers to allow exit values if they come from theloop latch/middle.block. PR: llvm#120260
1 parent fbc18b8 commit 0e8d022

File tree

3 files changed

+71
-54
lines changed

3 files changed

+71
-54
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,8 +2905,17 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
29052905
}
29062906
}
29072907

2908-
assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
2909-
"Expected a single exit block for escaping values");
2908+
assert((MissingVals.empty() ||
2909+
all_of(MissingVals,
2910+
[MiddleBlock, this](const std::pair<Value *, Value *> &P) {
2911+
return all_of(
2912+
predecessors(cast<Instruction>(P.first)->getParent()),
2913+
[MiddleBlock, this](BasicBlock *Pred) {
2914+
return Pred == MiddleBlock ||
2915+
Pred == OrigLoop->getLoopLatch();
2916+
});
2917+
})) &&
2918+
"Expected escaping values from latch/middle.block only");
29102919

29112920
for (auto &I : MissingVals) {
29122921
PHINode *PHI = cast<PHINode>(I.first);
@@ -9049,22 +9058,23 @@ addUsersInExitBlocks(VPlan &Plan,
90499058
// Introduce extract for exiting values and update the VPIRInstructions
90509059
// modeling the corresponding LCSSA phis.
90519060
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
9052-
VPValue *V = ExitIRI->getOperand(0);
9053-
// Pass live-in values used by exit phis directly through to their users in
9054-
// the exit block.
9055-
if (V->isLiveIn())
9056-
continue;
9061+
for (const auto &[Idx, Op] : enumerate(ExitIRI->operands())) {
9062+
// Pass live-in values used by exit phis directly through to their users
9063+
// in the exit block.
9064+
if (Op->isLiveIn())
9065+
continue;
90579066

9058-
// Currently only live-ins can be used by exit values from blocks not
9059-
// exiting via the vector latch through to the middle block.
9060-
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9061-
return false;
9067+
// Currently only live-ins can be used by exit values from blocks not
9068+
// exiting via the vector latch through to the middle block.
9069+
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9070+
return false;
90629071

9063-
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9064-
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9065-
{V, Plan.getOrAddLiveIn(ConstantInt::get(
9066-
IntegerType::get(Ctx, 32), 1))});
9067-
ExitIRI->setOperand(0, Ext);
9072+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9073+
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9074+
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9075+
IntegerType::get(Ctx, 32), 1))});
9076+
ExitIRI->setOperand(Idx, Ext);
9077+
}
90689078
}
90699079
return true;
90709080
}
@@ -10226,36 +10236,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1022610236
return false;
1022710237
}
1022810238

10229-
if (LVL.hasUncountableEarlyExit()) {
10230-
if (!EnableEarlyExitVectorization) {
10231-
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10232-
"early exit is not enabled",
10233-
"UncountableEarlyExitLoopsDisabled", ORE, L);
10234-
return false;
10235-
}
10236-
10237-
// In addUsersInExitBlocks we already bail out if there is an outside use
10238-
// of a loop-defined variable, but it ignores induction variables which are
10239-
// handled by InnerLoopVectorizer::fixupIVUsers. We need to bail out if we
10240-
// encounter induction variables too otherwise fixupIVUsers will crash.
10241-
BasicBlock *LoopLatch = L->getLoopLatch();
10242-
for (const auto &Induction : LVL.getInductionVars()) {
10243-
PHINode *Ind = Induction.first;
10244-
Instruction *IndUpdate =
10245-
cast<Instruction>(Ind->getIncomingValueForBlock(LoopLatch));
10246-
for (Instruction *I : {cast<Instruction>(Ind), IndUpdate}) {
10247-
for (User *U : I->users()) {
10248-
Instruction *UI = cast<Instruction>(U);
10249-
if (!L->contains(UI)) {
10250-
reportVectorizationFailure(
10251-
"Auto-vectorization of loops with uncountable early exits and "
10252-
"outside uses of induction variables unsupported",
10253-
"UncountableEarlyExitLoopIndLiveOutsUnsupported", ORE, L);
10254-
return false;
10255-
}
10256-
}
10257-
}
10258-
}
10239+
if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) {
10240+
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10241+
"early exit is not enabled",
10242+
"UncountableEarlyExitLoopsDisabled", ORE, L);
10243+
return false;
1025910244
}
1026010245

1026110246
// Entrance to the VPlan-native vectorization path. Outer loops are processed

llvm/test/Transforms/LoopVectorize/early_exit_legality.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
4949
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
5050
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
5151
; CHECK-NEXT: LV: We can vectorize this loop!
52-
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported
52+
; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
5353
entry:
5454
%p1 = alloca [1024 x i8]
5555
%p2 = alloca [1024 x i8]
@@ -141,7 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
141141
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
142142
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
143143
; CHECK-NEXT: LV: We can vectorize this loop!
144-
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exits and outside uses of induction variables unsupported
144+
; CHECK: LV: Not vectorizing: Some exit values in loop with uncountable exit not supported yet.
145145
entry:
146146
%p1 = alloca [1024 x i8]
147147
call void @init_mem(ptr %p1, i64 1024)

llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
2+
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -force-vector-width=4 | FileCheck %s
33

44
declare void @init_mem(ptr, i64);
55

@@ -527,24 +527,50 @@ define i64 @diff_exit_block_pre_inc_use2() {
527527
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
528528
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
529529
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
530+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
531+
; CHECK: vector.ph:
530532
; CHECK-NEXT: br label [[LOOP:%.*]]
533+
; CHECK: vector.body:
534+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
535+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
536+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
537+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
538+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
539+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
540+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
541+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
542+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
543+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
544+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
545+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
546+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
547+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
548+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
549+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
550+
; CHECK: middle.split:
551+
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
552+
; CHECK: middle.block:
553+
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
554+
; CHECK: scalar.ph:
555+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
556+
; CHECK-NEXT: br label [[LOOP1:%.*]]
531557
; CHECK: loop:
532-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
558+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
533559
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
534560
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
535561
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
536562
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
537563
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
538-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
564+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]]
539565
; CHECK: loop.inc:
540566
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
541567
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
542-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
568+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
543569
; CHECK: loop.early.exit:
544-
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ]
570+
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ]
545571
; CHECK-NEXT: ret i64 [[RETVAL1]]
546572
; CHECK: loop.end:
547-
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ]
573+
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
548574
; CHECK-NEXT: ret i64 [[RETVAL2]]
549575
;
550576
entry:
@@ -995,3 +1021,9 @@ declare i32 @foo(i32) readonly
9951021
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
9961022

9971023
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
1024+
;.
1025+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1026+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1027+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1028+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1029+
;.

0 commit comments

Comments
 (0)