-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Introduce explicit broadcasts for live-ins. #124644
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fb64c63
cf864b2
bcbcf3b
10610bc
9908be0
789eedc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -864,6 +864,7 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
CanonicalIVIncrementForPart, | ||
BranchOnCount, | ||
BranchOnCond, | ||
Broadcast, | ||
ComputeReductionResult, | ||
// Takes the VPValue to extract from as first operand and the lane or part | ||
// to extract as second operand, counting from the end starting with 1 for | ||
|
@@ -1460,6 +1461,13 @@ struct VPWidenSelectRecipe : public VPRecipeWithIRFlags { | |
bool isInvariantCond() const { | ||
return getCond()->isDefinedOutsideLoopRegions(); | ||
} | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getCond() && isInvariantCond(); | ||
} | ||
}; | ||
|
||
/// A recipe for handling GEP instructions. | ||
|
@@ -1507,6 +1515,13 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { | |
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getOperand(0) && isPointerLoopInvariant(); | ||
} | ||
}; | ||
|
||
/// A recipe to compute the pointers for widened memory accesses of IndexTy | ||
|
@@ -1822,6 +1837,16 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { | |
VPValue *getLastUnrolledPartOperand() { | ||
return getNumOperands() == 5 ? getOperand(4) : this; | ||
} | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
// The recipe creates its own wide start value, so it only requests the | ||
// first lane of the operand. | ||
// TODO: Remove once creating the start value is modeled separately. | ||
return Op == getStartValue(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be the default for all VPHeaderPhiRecipes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so at the moment, VPWidenIntOrFpInductionRecipe is special in a way, it still produces its own vector start value in the preheader. To be cleaned up as followup? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can leave behind a TODO. |
||
} | ||
}; | ||
|
||
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, | ||
|
@@ -1854,6 +1879,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, | |
/// Returns true if only scalar values will be generated. | ||
bool onlyScalarsGenerated(bool IsScalable); | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getOperand(0); | ||
} | ||
|
||
/// Returns the VPValue representing the value of this induction at | ||
/// the first unrolled part, if it exists. Returns itself if unrolling did not | ||
/// take place. | ||
|
@@ -1975,6 +2007,13 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { | |
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getStartValue(); | ||
} | ||
}; | ||
|
||
/// A recipe for handling reduction phis. The start value is the first operand | ||
|
@@ -2041,6 +2080,13 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, | |
|
||
/// Returns true, if the phi is part of an in-loop reduction. | ||
bool isInLoop() const { return IsInLoop; } | ||
|
||
/// Returns true if the recipe only uses the first lane of operand \p Op. | ||
bool onlyFirstLaneUsed(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return Op == getStartValue(); | ||
} | ||
}; | ||
|
||
/// A recipe for forming partial reductions. In the loop, an accumulator and | ||
|
@@ -3464,7 +3510,7 @@ class VPlan { | |
|
||
/// Contains all the external definitions created for this VPlan. External | ||
/// definitions are VPValues that hold a pointer to their underlying IR. | ||
SmallVector<VPValue *, 16> VPLiveInsToFree; | ||
SmallVector<VPValue *, 16> VPLiveIns; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems redundant to hold all live-ins in both VPLiveIns SmallVector and above Value2VPValue map? |
||
|
||
/// Mapping from SCEVs to the VPValues representing their expansions. | ||
/// NOTE: This mapping is temporary and will be removed once all users have | ||
|
@@ -3656,7 +3702,7 @@ class VPlan { | |
auto [It, Inserted] = Value2VPValue.try_emplace(V); | ||
if (Inserted) { | ||
VPValue *VPV = new VPValue(V); | ||
VPLiveInsToFree.push_back(VPV); | ||
VPLiveIns.push_back(VPV); | ||
assert(VPV->isLiveIn() && "VPV must be a live-in."); | ||
It->second = VPV; | ||
} | ||
|
@@ -3668,6 +3714,16 @@ class VPlan { | |
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise. | ||
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); } | ||
|
||
/// Return the list of live-in VPValues available in the VPlan. | ||
ArrayRef<VPValue *> getLiveIns() const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The list of live-in VPValues can be produced from the ValueToVPValue map on request. |
||
assert(all_of(Value2VPValue, | ||
[this](const auto &P) { | ||
return is_contained(VPLiveIns, P.second); | ||
}) && | ||
"all VPValues in Value2VPValue must also be in VPLiveIns"); | ||
return VPLiveIns; | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the live-ins of this VPlan to \p O. | ||
void printLiveIns(raw_ostream &O) const; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -587,6 +587,10 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); | ||
return CondBr; | ||
} | ||
case VPInstruction::Broadcast: { | ||
return Builder.CreateVectorSplat( | ||
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); | ||
} | ||
case VPInstruction::ComputeReductionResult: { | ||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
|
@@ -837,7 +841,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case Instruction::ICmp: | ||
case Instruction::Select: | ||
case Instruction::Or: | ||
case VPInstruction::PtrAdd: | ||
// TODO: Cover additional opcodes. | ||
return vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::ActiveLaneMask: | ||
|
@@ -848,6 +851,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case VPInstruction::BranchOnCond: | ||
case VPInstruction::ResumePhi: | ||
return true; | ||
case VPInstruction::PtrAdd: | ||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why restrict to first operand? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pointer operand will always use the first lane, the index may use all lanes if other lanes are used. |
||
}; | ||
llvm_unreachable("switch should return"); | ||
} | ||
|
@@ -920,6 +925,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
case VPInstruction::BranchOnCount: | ||
O << "branch-on-count"; | ||
break; | ||
case VPInstruction::Broadcast: | ||
O << "broadcast"; | ||
break; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: redundant inconsistent empty line. |
||
case VPInstruction::ExtractFromEnd: | ||
O << "extract-from-end"; | ||
break; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2112,3 +2112,37 @@ void VPlanTransforms::handleUncountableEarlyExit( | |
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken); | ||
LatchExitingBranch->eraseFromParent(); | ||
} | ||
|
||
void VPlanTransforms::materializeLiveInBroadcasts(VPlan &Plan) { | ||
if (Plan.hasScalarVFOnly()) | ||
return; | ||
|
||
VPDominatorTree VPDT; | ||
VPDT.recalculate(Plan); | ||
auto *VectorPreheader = Plan.getVectorPreheader(); | ||
VPBuilder Builder(VectorPreheader); | ||
for (VPValue *LiveIn : Plan.getLiveIns()) { | ||
if (all_of(LiveIn->users(), | ||
[LiveIn](VPUser *U) { | ||
return cast<VPRecipeBase>(U)->usesScalars(LiveIn); | ||
}) || | ||
Comment on lines
+2125
to
+2128
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw that this patch adds many implementations of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it is to avoid introducing broadcasts that then won't be used. |
||
!LiveIn->getLiveInIRValue() || | ||
isa<Constant>(LiveIn->getLiveInIRValue())) | ||
continue; | ||
|
||
// Add explicit broadcast if the vector preheader dominates all users. | ||
// TODO: Find valid insert point for all users. | ||
if (all_of(LiveIn->users(), [&VPDT, VectorPreheader](VPUser *U) { | ||
return VectorPreheader != cast<VPRecipeBase>(U)->getParent() && | ||
VPDT.dominates(VectorPreheader, | ||
cast<VPRecipeBase>(U)->getParent()); | ||
})) { | ||
auto *Broadcast = | ||
Builder.createNaryOp(VPInstruction::Broadcast, {LiveIn}); | ||
LiveIn->replaceUsesWithIf(Broadcast, [LiveIn, Broadcast](VPUser &U, | ||
unsigned Idx) { | ||
return Broadcast != &U && !cast<VPRecipeBase>(&U)->usesScalars(LiveIn); | ||
}); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
runPass()?