Skip to content

Commit 308f820

Browse files
[LLVM][VPlan] Keep all VPBlend masks until VPlan transformation.
It's not possible to pick the best mask to remove when optimising VPBlend at construction and so this patch refactors the code to move the decision (and thus transformation) to VPlanTransforms. NOTE: This patch does not change the decision of which mask to pick. That will be done in a following PR to keep this patch as NFC from an output point of view.
1 parent 9e318ba commit 308f820

File tree

4 files changed

+50
-17
lines changed

4 files changed

+50
-17
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8170,8 +8170,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
81708170
// builder. At this point we generate the predication tree. There may be
81718171
// duplications since this is a simple recursive scan, but future
81728172
// optimizations will clean it up.
8173-
// TODO: At the moment the first mask is always skipped, but it would be
8174-
// better to skip the most expensive mask.
81758173
SmallVector<VPValue *, 2> OperandsWithMask;
81768174

81778175
for (unsigned In = 0; In < NumIncoming; In++) {
@@ -8184,8 +8182,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
81848182
"Distinct incoming values with one having a full mask");
81858183
break;
81868184
}
8187-
if (In == 0)
8188-
continue;
81898185
OperandsWithMask.push_back(EdgeMask);
81908186
}
81918187
return new VPBlendRecipe(Phi, OperandsWithMask);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,12 +2033,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
20332033
class VPBlendRecipe : public VPSingleDefRecipe {
20342034
public:
20352035
/// The blend operation is a User of the incoming values and of their
2036-
/// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2037-
/// incoming value does not have a mask associated.
2036+
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2037+
/// be ommited (implied by passing an odd number of operands) in which case
2038+
/// all other incoming values are merged into it.
20382039
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
20392040
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2040-
assert((Operands.size() + 1) % 2 == 0 &&
2041-
"Expected an odd number of operands");
2041+
assert(Operands.size() > 0 && "Expected at least one operand!");
20422042
}
20432043

20442044
VPBlendRecipe *clone() override {
@@ -2048,19 +2048,23 @@ class VPBlendRecipe : public VPSingleDefRecipe {
20482048

20492049
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
20502050

2051+
/// A normalized blend is one that has an odd number of operands, whereby the
2052+
/// first operand does not have an associated mask.
2053+
bool isNormalized() const { return getNumOperands() % 2; }
2054+
20512055
/// Return the number of incoming values, taking into account that the first
20522056
/// incoming value has no mask.
20532057
unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
20542058

20552059
/// Return incoming value number \p Idx.
20562060
VPValue *getIncomingValue(unsigned Idx) const {
2057-
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2061+
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
20582062
}
20592063

20602064
/// Return mask number \p Idx.
20612065
VPValue *getMask(unsigned Idx) const {
2062-
assert(Idx > 0 && "First index has no mask associated.");
2063-
return getOperand(Idx * 2);
2066+
assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2067+
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
20642068
}
20652069

20662070
/// Generate the phi/select nodes.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
16351635
#endif
16361636

16371637
void VPBlendRecipe::execute(VPTransformState &State) {
1638+
assert(isNormalized() && "Expected blend to be normalized!");
16381639
State.setDebugLocFrom(getDebugLoc());
16391640
// We know that all PHIs in non-header blocks are converted into
16401641
// selects, so we don't have to worry about the insertion order and we

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -989,15 +989,47 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
989989
/// Try to simplify recipe \p R.
990990
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
991991
using namespace llvm::VPlanPatternMatch;
992-
// Try to remove redundant blend recipes.
992+
993993
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
994-
VPValue *Inc0 = Blend->getIncomingValue(0);
994+
// Try to remove redundant blend recipes.
995+
SmallSet<VPValue *, 4> UniqueValues;
996+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
997+
UniqueValues.insert(Blend->getIncomingValue(0));
995998
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
996-
if (Inc0 != Blend->getIncomingValue(I) &&
997-
!match(Blend->getMask(I), m_False()))
998-
return;
999-
Blend->replaceAllUsesWith(Inc0);
999+
if (!match(Blend->getMask(I), m_False()))
1000+
UniqueValues.insert(Blend->getIncomingValue(I));
1001+
1002+
if (UniqueValues.size() == 1) {
1003+
Blend->replaceAllUsesWith(*UniqueValues.begin());
1004+
Blend->eraseFromParent();
1005+
return;
1006+
}
1007+
1008+
if (Blend->isNormalized())
1009+
return;
1010+
1011+
// Normalize the blend so its first incomming value is used as the initial
1012+
// value with the others blended into it.
1013+
1014+
unsigned StartIndex = 0;
1015+
SmallVector<VPValue *, 4> OperandsWithMask;
1016+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
1017+
1018+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1019+
if (I == StartIndex)
1020+
continue;
1021+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
1022+
OperandsWithMask.push_back(Blend->getMask(I));
1023+
}
1024+
1025+
auto *NewBlend = new VPBlendRecipe(
1026+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
1027+
NewBlend->insertBefore(&R);
1028+
1029+
VPValue *DeadMask = Blend->getMask(StartIndex);
1030+
Blend->replaceAllUsesWith(NewBlend);
10001031
Blend->eraseFromParent();
1032+
recursivelyDeleteDeadRecipes(DeadMask);
10011033
return;
10021034
}
10031035

0 commit comments

Comments
 (0)