Skip to content

Commit eb04741

Browse files
committed
[VPlan] Retain exit conditions early
Step tmp
1 parent e086d7b commit eb04741

File tree

6 files changed

+117
-90
lines changed

6 files changed

+117
-90
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9158,6 +9158,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
91589158
// loop.
91599159
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
91609160
DebugLoc DL) {
9161+
using namespace VPlanPatternMatch;
91619162
Value *StartIdx = ConstantInt::get(IdxTy, 0);
91629163
auto *StartV = Plan.getOrAddLiveIn(StartIdx);
91639164

@@ -9167,7 +9168,16 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
91679168
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
91689169
Header->insert(CanonicalIVPHI, Header->begin());
91699170

9170-
VPBuilder Builder(TopRegion->getExitingBasicBlock());
9171+
VPBasicBlock *LatchVPBB = TopRegion->getExitingBasicBlock();
9172+
// We are about to replace the branch to exit the region. Remove the original
9173+
// BranchOnCond, if there is any.
9174+
// TODO: Move canonical IV and BranchOnCount introduction to initial skeleton
9175+
// creation.
9176+
if (!LatchVPBB->empty() &&
9177+
match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
9178+
LatchVPBB->getTerminator()->eraseFromParent();
9179+
9180+
VPBuilder Builder(LatchVPBB);
91719181
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
91729182
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
91739183
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
@@ -9469,6 +9479,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94699479
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
94709480
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
94719481

9482+
if (Legal->hasUncountableEarlyExit()) {
9483+
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9484+
Range);
9485+
} else {
9486+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
9487+
Plan->getExitBlocks().end());
9488+
for (VPBlockBase *VPBB : to_vector(
9489+
vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
9490+
for (VPBlockBase *EB : ExitBlocks) {
9491+
if (is_contained(VPBB->getSuccessors(), EB)) {
9492+
cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
9493+
VPBlockUtils::disconnectBlocks(VPBB, EB);
9494+
}
9495+
}
9496+
}
9497+
}
9498+
94729499
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
94739500
Builder);
94749501

@@ -9639,12 +9666,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
96399666
R->setOperand(1, WideIV->getStepValue());
96409667
}
96419668

9642-
if (auto *UncountableExitingBlock =
9643-
Legal->getUncountableEarlyExitingBlock()) {
9644-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9645-
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9646-
Range);
9647-
}
96489669
DenseMap<VPValue *, VPValue *> IVEndValues;
96499670
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
96509671
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9742,6 +9763,17 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97429763
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
97439764
VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
97449765
PSE, true, false, OrigLoop);
9766+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
9767+
Plan->getExitBlocks().end());
9768+
for (VPBlockBase *VPBB : to_vector(
9769+
vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
9770+
for (VPBlockBase *EB : ExitBlocks) {
9771+
if (is_contained(VPBB->getSuccessors(), EB)) {
9772+
cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
9773+
VPBlockUtils::disconnectBlocks(VPBB, EB);
9774+
}
9775+
}
9776+
}
97459777

97469778
for (ElementCount VF : Range)
97479779
Plan->addVF(VF);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class VPBlockBase {
117117
Predecessors.erase(Pos);
118118
}
119119

120+
public:
120121
/// Remove \p Successor from the successors of this block.
121122
void removeSuccessor(VPBlockBase *Successor) {
122123
auto Pos = find(Successors, Successor);
@@ -129,8 +130,6 @@ class VPBlockBase {
129130
void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
130131
auto I = find(Predecessors, Old);
131132
assert(I != Predecessors.end());
132-
assert(Old->getParent() == New->getParent() &&
133-
"replaced predecessor must have the same parent");
134133
*I = New;
135134
}
136135

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
112112
return VPBB;
113113
}
114114

115+
if (!TheLoop->contains(BB))
116+
return Plan->getExitBlock(BB);
117+
115118
// Create new VPBB.
116119
StringRef Name = BB->getName();
117120
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -145,14 +148,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
145148
// Instruction definition is in outermost loop PH.
146149
return false;
147150

148-
// Check whether Instruction definition is in a loop exit.
149-
SmallVector<BasicBlock *> ExitBlocks;
150-
TheLoop->getExitBlocks(ExitBlocks);
151-
if (is_contained(ExitBlocks, InstParent)) {
152-
// Instruction definition is in outermost loop exit.
153-
return false;
154-
}
155-
156151
// Check whether Instruction definition is in loop body.
157152
return !TheLoop->contains(Inst);
158153
}
@@ -201,11 +196,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
201196
"Instruction shouldn't have been visited.");
202197

203198
if (auto *Br = dyn_cast<BranchInst>(Inst)) {
204-
if (TheLoop->getLoopLatch() == BB ||
205-
any_of(successors(BB),
206-
[this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
207-
continue;
208-
209199
// Conditional branch instruction are represented using BranchOnCond
210200
// recipes.
211201
if (Br->isConditional()) {
@@ -295,7 +285,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
295285
for (BasicBlock *BB : RPO) {
296286
// Create or retrieve the VPBasicBlock for this BB.
297287
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
298-
Loop *LoopForBB = LI->getLoopFor(BB);
299288
// Set VPBB predecessors in the same order as they are in the incoming BB.
300289
setVPBBPredsFromBB(VPBB, BB);
301290

@@ -326,24 +315,12 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
326315
BasicBlock *IRSucc1 = BI->getSuccessor(1);
327316
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
328317
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
329-
330-
// Don't connect any blocks outside the current loop except the latches for
331-
// inner loops.
332-
// TODO: Also connect exit blocks during initial VPlan construction.
333-
if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
334-
if (!LoopForBB->contains(IRSucc0)) {
335-
VPBB->setOneSuccessor(Successor1);
336-
continue;
337-
}
338-
if (!LoopForBB->contains(IRSucc1)) {
339-
VPBB->setOneSuccessor(Successor0);
340-
continue;
341-
}
342-
}
343-
344318
VPBB->setTwoSuccessors(Successor0, Successor1);
345319
}
346320

321+
for (auto *EB : Plan->getExitBlocks())
322+
setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
323+
347324
// 2. The whole CFG has been built at this point so all the input Values must
348325
// have a VPlan counterpart. Fix VPlan header phi by adding their
349326
// corresponding VPlan operands.
@@ -392,6 +369,8 @@ std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
392369
/// return false.
393370
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB,
394371
const VPDominatorTree &VPDT) {
372+
if (isa<VPIRBasicBlock>(HeaderVPB))
373+
return false;
395374
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
396375
if (Preds.size() != 2)
397376
return false;
@@ -447,18 +426,23 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
447426
assert(LatchVPBB->getNumSuccessors() <= 1 &&
448427
"Latch has more than one successor");
449428
if (Succ)
450-
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
429+
LatchVPBB->removeSuccessor(Succ);
451430

452431
auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
453432
false /*isReplicator*/);
454433
// All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
455434
// because VPlan is expected to end at top level latch disconnected above.
435+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
436+
Plan.getExitBlocks().end());
456437
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
457-
VPBB->setParent(R);
438+
if (!ExitBlocks.contains(VPBB))
439+
VPBB->setParent(R);
458440

459441
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
460-
if (Succ)
461-
VPBlockUtils::connectBlocks(R, Succ);
442+
if (Succ) {
443+
R->setOneSuccessor(Succ);
444+
Succ->replacePredecessor(LatchVPBB, R);
445+
}
462446
}
463447

464448
void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
@@ -505,7 +489,11 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
505489
// remainder and we can set the condition to true.
506490
// 3) Otherwise, construct a runtime check.
507491

492+
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
493+
auto *VPExitBlock = IRExitBlock ? Plan.getExitBlock(IRExitBlock) : nullptr;
508494
if (!RequiresScalarEpilogueCheck) {
495+
if (VPExitBlock)
496+
VPBlockUtils::disconnectBlocks(MiddleVPBB, VPExitBlock);
509497
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
510498
// The exit blocks are unreachable, remove their recipes to make sure no
511499
// users remain that may pessimize transforms.
@@ -516,10 +504,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
516504
return;
517505
}
518506

519-
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
520-
auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
521507
// The connection order corresponds to the operands of the conditional branch.
522-
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
523508
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
524509

525510
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2470,64 +2470,74 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24702470
R->eraseFromParent();
24712471
}
24722472

2473-
void VPlanTransforms::handleUncountableEarlyExit(
2474-
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
2475-
VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
2473+
void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan, VFRange &Range) {
2474+
auto *MiddleVPBB = Plan.getMiddleBlock();
2475+
// First find the uncountable early exiting block by looking at the
2476+
// predecessors of the exit blocks.
2477+
VPBasicBlock *EarlyExitingVPBB = nullptr;
2478+
VPIRBasicBlock *EarlyExitVPBB = nullptr;
2479+
for (auto *EB : Plan.getExitBlocks()) {
2480+
for (VPBlockBase *Pred : EB->getPredecessors()) {
2481+
if (Pred != MiddleVPBB) {
2482+
EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
2483+
EarlyExitVPBB = EB;
2484+
break;
2485+
}
2486+
}
2487+
}
2488+
24762489
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
24772490
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
24782491
VPBuilder Builder(LatchVPBB->getTerminator());
2479-
auto *MiddleVPBB = Plan.getMiddleBlock();
2480-
VPValue *IsEarlyExitTaken = nullptr;
2481-
2482-
// Process the uncountable exiting block. Update IsEarlyExitTaken, which
2483-
// tracks if the uncountable early exit has been taken. Also split the middle
2484-
// block and have it conditionally branch to the early exit block if
2485-
// EarlyExitTaken.
2486-
auto *EarlyExitingBranch =
2487-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2488-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2489-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2490-
BasicBlock *EarlyExitIRBB =
2491-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2492-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2493-
2494-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2495-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2496-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2497-
IsEarlyExitTaken =
2498-
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2492+
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
2493+
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
2494+
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
2495+
? EarlyExitCond
2496+
: Builder.createNot(EarlyExitCond);
2497+
2498+
if (!EarlyExitVPBB->getSinglePredecessor() &&
2499+
EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
2500+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
2501+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
2502+
// a single predecessor and 1 if it has two.
2503+
// If EarlyExitVPBB has two predecessors, they are already ordered such
2504+
// that early exit is second (and latch exit is first), by construction.
2505+
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2506+
// ordered the other way around, and it is the order of the latter which
2507+
// corresponds to the order of operands of EarlyExitVPBB's phi recipes.
2508+
// Therefore, if early exit (UncountableExitingBlock) is the first
2509+
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2510+
// thereby bringing them to match EarlyExitVPBB's predecessor order,
2511+
// with early exit being last (second). Otherwise they already match.
2512+
cast<VPIRPhi>(&R)->swapOperands();
2513+
}
2514+
}
24992515

2516+
EarlyExitingVPBB->getTerminator()->eraseFromParent();
2517+
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
2518+
2519+
// Split the middle block and have it conditionally branch to the early exit
2520+
// block if EarlyExitTaken.
2521+
VPValue *IsEarlyExitTaken =
2522+
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
25002523
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
25012524
VPBasicBlock *VectorEarlyExitVPBB =
25022525
Plan.createVPBasicBlock("vector.early.exit");
25032526
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
25042527
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
25052528
NewMiddle->swapSuccessors();
25062529

2507-
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2530+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
25082531

25092532
// Update the exit phis in the early exit block.
25102533
VPBuilder MiddleBuilder(NewMiddle);
25112534
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2512-
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
2535+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
25132536
auto *ExitIRI = cast<VPIRPhi>(&R);
2514-
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
2537+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
25152538
// a single predecessor and 1 if it has two.
25162539
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
2517-
if (!VPEarlyExitBlock->getSinglePredecessor()) {
2518-
// If VPEarlyExitBlock has two predecessors, they are already ordered such
2519-
// that early exit is second (and latch exit is first), by construction.
2520-
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2521-
// ordered the other way around, and it is the order of the latter which
2522-
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
2523-
// Therefore, if early exit (UncountableExitingBlock) is the first
2524-
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2525-
// thereby bringing them to match VPEarlyExitBlock's predecessor order,
2526-
// with early exit being last (second). Otherwise they already match.
2527-
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
2528-
UncountableExitingBlock)
2529-
ExitIRI->swapOperands();
2530-
2540+
if (!EarlyExitVPBB->getSinglePredecessor()) {
25312541
// The first of two operands corresponds to the latch exit, via MiddleVPBB
25322542
// predecessor. Extract its last lane.
25332543
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,7 @@ struct VPlanTransforms {
175175
/// exit conditions
176176
/// * splitting the original middle block to branch to the early exit block
177177
/// if taken.
178-
static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
179-
BasicBlock *UncountableExitingBlock,
180-
VPRecipeBuilder &RecipeBuilder,
181-
VFRange &Range);
178+
static void handleUncountableEarlyExit(VPlan &Plan, VFRange &Range);
182179

183180
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
184181
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.

llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ define void @foo(i64 %n) {
3131
; CHECK-NEXT: outer.latch:
3232
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
3333
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
34-
; CHECK-NEXT: Successor(s): outer.header
34+
; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec>
35+
; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
36+
; CHECK-EMPTY:
37+
; CHECK-NEXT: ir-bb<exit>:
38+
; CHECK-NEXT: No successors
3539
; CHECK-NEXT: }
3640
entry:
3741
br label %outer.header

0 commit comments

Comments
 (0)