Skip to content

Commit 4f07508

Browse files
[LLVM][VPlan] Keep all VPBlend masks until VPlan transformation. (#104015)
It's not possible to pick the best mask to remove when optimising VPBlend at construction and so this patch refactors the code to move the decision (and thus transformation) to VPlanTransforms. NOTE: This patch does not change the decision of which mask to pick. That will be done in a following PR to keep this patch as NFC from an output point of view.
1 parent 2644fe4 commit 4f07508

File tree

4 files changed

+55
-20
lines changed

4 files changed

+55
-20
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8149,8 +8149,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
81498149
// builder. At this point we generate the predication tree. There may be
81508150
// duplications since this is a simple recursive scan, but future
81518151
// optimizations will clean it up.
8152-
// TODO: At the moment the first mask is always skipped, but it would be
8153-
// better to skip the most expensive mask.
81548152
SmallVector<VPValue *, 2> OperandsWithMask;
81558153

81568154
for (unsigned In = 0; In < NumIncoming; In++) {
@@ -8163,8 +8161,6 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
81638161
"Distinct incoming values with one having a full mask");
81648162
break;
81658163
}
8166-
if (In == 0)
8167-
continue;
81688164
OperandsWithMask.push_back(EdgeMask);
81698165
}
81708166
return new VPBlendRecipe(Phi, OperandsWithMask);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2041,12 +2041,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
20412041
class VPBlendRecipe : public VPSingleDefRecipe {
20422042
public:
20432043
/// The blend operation is a User of the incoming values and of their
2044-
/// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2045-
/// incoming value does not have a mask associated.
2044+
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2045+
/// be ommited (implied by passing an odd number of operands) in which case
2046+
/// all other incoming values are merged into it.
20462047
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
20472048
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2048-
assert((Operands.size() + 1) % 2 == 0 &&
2049-
"Expected an odd number of operands");
2049+
assert(Operands.size() > 0 && "Expected at least one operand!");
20502050
}
20512051

20522052
VPBlendRecipe *clone() override {
@@ -2056,19 +2056,25 @@ class VPBlendRecipe : public VPSingleDefRecipe {
20562056

20572057
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
20582058

2059-
/// Return the number of incoming values, taking into account that the first
2060-
/// incoming value has no mask.
2061-
unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
2059+
/// A normalized blend is one that has an odd number of operands, whereby the
2060+
/// first operand does not have an associated mask.
2061+
bool isNormalized() const { return getNumOperands() % 2; }
2062+
2063+
/// Return the number of incoming values, taking into account when normalized
2064+
/// the first incoming value will have no mask.
2065+
unsigned getNumIncomingValues() const {
2066+
return (getNumOperands() + isNormalized()) / 2;
2067+
}
20622068

20632069
/// Return incoming value number \p Idx.
20642070
VPValue *getIncomingValue(unsigned Idx) const {
2065-
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2071+
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
20662072
}
20672073

20682074
/// Return mask number \p Idx.
20692075
VPValue *getMask(unsigned Idx) const {
2070-
assert(Idx > 0 && "First index has no mask associated.");
2071-
return getOperand(Idx * 2);
2076+
assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2077+
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
20722078
}
20732079

20742080
/// Generate the phi/select nodes.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,6 +1703,7 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
17031703
#endif
17041704

17051705
void VPBlendRecipe::execute(VPTransformState &State) {
1706+
assert(isNormalized() && "Expected blend to be normalized!");
17061707
State.setDebugLocFrom(getDebugLoc());
17071708
// We know that all PHIs in non-header blocks are converted into
17081709
// selects, so we don't have to worry about the insertion order and we

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -895,15 +895,47 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
895895
/// Try to simplify recipe \p R.
896896
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
897897
using namespace llvm::VPlanPatternMatch;
898-
// Try to remove redundant blend recipes.
898+
899899
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
900-
VPValue *Inc0 = Blend->getIncomingValue(0);
900+
// Try to remove redundant blend recipes.
901+
SmallPtrSet<VPValue *, 4> UniqueValues;
902+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
903+
UniqueValues.insert(Blend->getIncomingValue(0));
901904
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
902-
if (Inc0 != Blend->getIncomingValue(I) &&
903-
!match(Blend->getMask(I), m_False()))
904-
return;
905-
Blend->replaceAllUsesWith(Inc0);
905+
if (!match(Blend->getMask(I), m_False()))
906+
UniqueValues.insert(Blend->getIncomingValue(I));
907+
908+
if (UniqueValues.size() == 1) {
909+
Blend->replaceAllUsesWith(*UniqueValues.begin());
910+
Blend->eraseFromParent();
911+
return;
912+
}
913+
914+
if (Blend->isNormalized())
915+
return;
916+
917+
// Normalize the blend so its first incomming value is used as the initial
918+
// value with the others blended into it.
919+
920+
unsigned StartIndex = 0;
921+
SmallVector<VPValue *, 4> OperandsWithMask;
922+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
923+
924+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
925+
if (I == StartIndex)
926+
continue;
927+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
928+
OperandsWithMask.push_back(Blend->getMask(I));
929+
}
930+
931+
auto *NewBlend = new VPBlendRecipe(
932+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
933+
NewBlend->insertBefore(&R);
934+
935+
VPValue *DeadMask = Blend->getMask(StartIndex);
936+
Blend->replaceAllUsesWith(NewBlend);
906937
Blend->eraseFromParent();
938+
recursivelyDeleteDeadRecipes(DeadMask);
907939
return;
908940
}
909941

0 commit comments

Comments
 (0)