-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Model FOR resume value extraction in VPlan. #93396
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,7 @@ | |
#include "VPlan.h" | ||
#include "VPlanAnalysis.h" | ||
#include "VPlanHCFGBuilder.h" | ||
#include "VPlanPatternMatch.h" | ||
#include "VPlanTransforms.h" | ||
#include "VPlanVerifier.h" | ||
#include "llvm/ADT/APInt.h" | ||
|
@@ -606,10 +607,9 @@ class InnerLoopVectorizer { | |
BasicBlock *MiddleBlock, BasicBlock *VectorHeader, | ||
VPlan &Plan, VPTransformState &State); | ||
|
||
/// Create the exit value of first order recurrences in the middle block and | ||
/// update their users. | ||
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR, | ||
VPTransformState &State); | ||
/// Create the phi node for the resume value of first order recurrences in the | ||
/// scalar preheader and update the users in the scalar loop. | ||
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State); | ||
|
||
/// Iteratively sink the scalarized operands of a predicated instruction into | ||
/// the block that was created for it. | ||
|
@@ -3391,16 +3391,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
fixNonInductionPHIs(Plan, State); | ||
|
||
// At this point every instruction in the original loop is widened to a | ||
// vector form. Now we need to fix the recurrences in the loop. These PHI | ||
// nodes are currently empty because we did not want to introduce cycles. | ||
// This is the second stage of vectorizing recurrences. Note that fixing | ||
// reduction phis are already modeled in VPlan. | ||
// TODO: Also model fixing fixed-order recurrence phis in VPlan. | ||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | ||
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); | ||
for (VPRecipeBase &R : HeaderVPBB->phis()) { | ||
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) | ||
fixFixedOrderRecurrence(FOR, State); | ||
// vector form. Note that fixing reduction phis, as well as extracting the | ||
// exit and resume values for fixed-order recurrences are already modeled in | ||
// VPlan. All that remains to do here is to create a phi in the scalar | ||
// pre-header for each fixed-order recurrence resume value. | ||
// TODO: Also model creating phis in the scalar pre-header in VPlan. | ||
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) { | ||
if (!Legal->isFixedOrderRecurrence(LO->getPhi())) | ||
continue; | ||
fixFixedOrderRecurrence(LO, State); | ||
Plan.removeLiveOut(LO->getPhi()); | ||
Comment on lines
+3402
to
+3403
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth a comment, somewhere. Plan holds LO's that need fixing, any LO that is fixed is removed from Plan, remaining LO's receive a default LO->fixPhi() fix below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added to LiveOuts definition in VPlan, thanks! |
||
} | ||
|
||
// Forget the original basic block. | ||
|
@@ -3416,6 +3416,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
for (PHINode &PN : Exit->phis()) | ||
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); | ||
|
||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | ||
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock(); | ||
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); | ||
if (Cost->requiresScalarEpilogue(VF.isVector())) { | ||
|
@@ -3469,85 +3470,31 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
VF.getKnownMinValue() * UF); | ||
} | ||
|
||
void InnerLoopVectorizer::fixFixedOrderRecurrence( | ||
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) { | ||
// This is the second phase of vectorizing first-order recurrences. An | ||
// overview of the transformation is described below. Suppose we have the | ||
// following loop. | ||
// | ||
// for (int i = 0; i < n; ++i) | ||
// b[i] = a[i] - a[i - 1]; | ||
// | ||
// There is a first-order recurrence on "a". For this loop, the shorthand | ||
// scalar IR looks like: | ||
// | ||
// scalar.ph: | ||
// s_init = a[-1] | ||
// br scalar.body | ||
// | ||
// scalar.body: | ||
// i = phi [0, scalar.ph], [i+1, scalar.body] | ||
// s1 = phi [s_init, scalar.ph], [s2, scalar.body] | ||
// s2 = a[i] | ||
// b[i] = s2 - s1 | ||
// br cond, scalar.body, ... | ||
// | ||
// In this example, s1 is a recurrence because it's value depends on the | ||
// previous iteration. In the first phase of vectorization, we created a | ||
// vector phi v1 for s1. We now complete the vectorization and produce the | ||
// shorthand vector IR shown below (for VF = 4, UF = 1). | ||
// | ||
// vector.ph: | ||
// v_init = vector(..., ..., ..., a[-1]) | ||
// br vector.body | ||
// | ||
// vector.body | ||
// i = phi [0, vector.ph], [i+4, vector.body] | ||
// v1 = phi [v_init, vector.ph], [v2, vector.body] | ||
// v2 = a[i, i+1, i+2, i+3]; | ||
// v3 = vector(v1(3), v2(0, 1, 2)) | ||
// b[i, i+1, i+2, i+3] = v2 - v3 | ||
// br cond, vector.body, middle.block | ||
// | ||
// middle.block: | ||
// x = v2(3) | ||
// br scalar.ph | ||
// | ||
// scalar.ph: | ||
// s_init = phi [x, middle.block], [a[-1], otherwise] | ||
// br scalar.body | ||
// | ||
// After execution completes the vector loop, we extract the next value of | ||
// the recurrence (x) to use as the initial value in the scalar loop. | ||
|
||
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO, | ||
VPTransformState &State) { | ||
// Extract the last vector element in the middle block. This will be the | ||
// initial value for the recurrence when jumping to the scalar loop. | ||
VPValue *PreviousDef = PhiR->getBackedgeValue(); | ||
Value *Incoming = State.get(PreviousDef, UF - 1); | ||
auto *ExtractForScalar = Incoming; | ||
auto *IdxTy = Builder.getInt32Ty(); | ||
Value *RuntimeVF = nullptr; | ||
if (VF.isVector()) { | ||
auto *One = ConstantInt::get(IdxTy, 1); | ||
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); | ||
RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); | ||
auto *LastIdx = Builder.CreateSub(RuntimeVF, One); | ||
ExtractForScalar = | ||
Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract"); | ||
} | ||
VPValue *VPExtract = LO->getOperand(0); | ||
using namespace llvm::VPlanPatternMatch; | ||
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>( | ||
m_VPValue(), m_VPValue())) && | ||
"FOR LiveOut expects to use an extract from end."); | ||
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true); | ||
|
||
// Fix the initial value of the original recurrence in the scalar loop. | ||
PHINode *ScalarHeaderPhi = LO->getPhi(); | ||
auto *InitScalarFOR = | ||
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader); | ||
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin()); | ||
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue()); | ||
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); | ||
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); | ||
auto *ScalarPreheaderPhi = | ||
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init"); | ||
for (auto *BB : predecessors(LoopScalarPreHeader)) { | ||
auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; | ||
Start->addIncoming(Incoming, BB); | ||
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR; | ||
ScalarPreheaderPhi->addIncoming(Incoming, BB); | ||
} | ||
|
||
Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start); | ||
Phi->setName("scalar.recur"); | ||
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader, | ||
ScalarPreheaderPhi); | ||
ScalarHeaderPhi->setName("scalar.recur"); | ||
} | ||
|
||
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -847,14 +847,91 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, | |||||||||||||||||||||
// all users. | ||||||||||||||||||||||
RecurSplice->setOperand(0, FOR); | ||||||||||||||||||||||
|
||||||||||||||||||||||
// This is the second phase of vectorizing first-order recurrences. An | ||||||||||||||||||||||
// overview of the transformation is described below. Suppose we have the | ||||||||||||||||||||||
// following loop with some use after the loop of the last a[i-1], | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// for (int i = 0; i < n; ++i) { | ||||||||||||||||||||||
// t = a[i - 1]; | ||||||||||||||||||||||
// b[i] = a[i] - t; | ||||||||||||||||||||||
// } | ||||||||||||||||||||||
// use t; | ||||||||||||||||||||||
// | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merged with the example loop, thanks! |
||||||||||||||||||||||
// There is a first-order recurrence on "a". For this loop, the shorthand | ||||||||||||||||||||||
// scalar IR looks like: | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// scalar.ph: | ||||||||||||||||||||||
// s_init = a[-1] | ||||||||||||||||||||||
// br scalar.body | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// scalar.body: | ||||||||||||||||||||||
// i = phi [0, scalar.ph], [i+1, scalar.body] | ||||||||||||||||||||||
// s1 = phi [s_init, scalar.ph], [s2, scalar.body] | ||||||||||||||||||||||
// s2 = a[i] | ||||||||||||||||||||||
// b[i] = s2 - s1 | ||||||||||||||||||||||
// br cond, scalar.body, exit.block | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// exit.block: | ||||||||||||||||||||||
// use = lcssa.phi [s1, scalar.body] | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// In this example, s1 is a recurrence because it's value depends on the | ||||||||||||||||||||||
// previous iteration. In the first phase of vectorization, we created a | ||||||||||||||||||||||
// vector phi v1 for s1. We now complete the vectorization and produce the | ||||||||||||||||||||||
// shorthand vector IR shown below (for VF = 4, UF = 1). | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// vector.ph: | ||||||||||||||||||||||
// v_init = vector(..., ..., ..., a[-1]) | ||||||||||||||||||||||
// br vector.body | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// vector.body | ||||||||||||||||||||||
// i = phi [0, vector.ph], [i+4, vector.body] | ||||||||||||||||||||||
// v1 = phi [v_init, vector.ph], [v2, vector.body] | ||||||||||||||||||||||
// v2 = a[i, i+1, i+2, i+3]; | ||||||||||||||||||||||
// v3 = vector(v1(3), v2(0, 1, 2)) | ||||||||||||||||||||||
// b[i, i+1, i+2, i+3] = v2 - v3 | ||||||||||||||||||||||
// br cond, vector.body, middle.block | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// middle.block: | ||||||||||||||||||||||
// s_penultimate = v2(2) = v3(3) | ||||||||||||||||||||||
// s_resume = v2(3) | ||||||||||||||||||||||
// br cond, scalar.ph, exit.block | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// scalar.ph: | ||||||||||||||||||||||
// s_init' = phi [s_resume, middle.block], [s_init, otherwise] | ||||||||||||||||||||||
// br scalar.body | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// scalar.body: | ||||||||||||||||||||||
// i = phi [0, scalar.ph], [i+1, scalar.body] | ||||||||||||||||||||||
// s1 = phi [s_init', scalar.ph], [s2, scalar.body] | ||||||||||||||||||||||
// s2 = a[i] | ||||||||||||||||||||||
// b[i] = s2 - s1 | ||||||||||||||||||||||
// br cond, scalar.body, exit.block | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// exit.block: | ||||||||||||||||||||||
// lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block] | ||||||||||||||||||||||
// | ||||||||||||||||||||||
// After execution completes the vector loop, we extract the next value of | ||||||||||||||||||||||
// the recurrence (x) to use as the initial value in the scalar loop. This | ||||||||||||||||||||||
// is modeled by ExtractFromEnd. | ||||||||||||||||||||||
Type *IntTy = Plan.getCanonicalIV()->getScalarType(); | ||||||||||||||||||||||
auto *Result = cast<VPInstruction>(MiddleBuilder.createNaryOp( | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Extract the penultimate value of the recurrence and update VPLiveOut | ||||||||||||||||||||||
// users of the recurrence splice. | ||||||||||||||||||||||
auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp( | ||||||||||||||||||||||
VPInstruction::ExtractFromEnd, | ||||||||||||||||||||||
{FOR->getBackedgeValue(), | ||||||||||||||||||||||
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))}, | ||||||||||||||||||||||
{}, "vector.recur.extract.for.phi")); | ||||||||||||||||||||||
RecurSplice->replaceUsesWithIf( | ||||||||||||||||||||||
Result, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); | ||||||||||||||||||||||
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Extract the resume value and create a new VPLiveOut for it. | ||||||||||||||||||||||
auto *Resume = MiddleBuilder.createNaryOp( | ||||||||||||||||||||||
VPInstruction::ExtractFromEnd, | ||||||||||||||||||||||
{FOR->getBackedgeValue(), | ||||||||||||||||||||||
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))}, | ||||||||||||||||||||||
{}, "vector.recur.extract"); | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two names There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Preferably done in this PR or as follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better retain names in this PR, and change them in a separate patch either before or after. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍🏻 |
||||||||||||||||||||||
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
return true; | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing LiveOuts while iterating over them requires the
to_vector
? Suffice to make_early_inc_range?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately no, as getLiveOuts returns a reference to a MapVector and make_early_inc_range doesn't work for that type.