Skip to content

Commit 43e9f29

Browse files
committed
[VPlan] Process simplifyRecipes as a worklist. NFCI
1 parent bdae91b commit 43e9f29

File tree

2 files changed

+110
-84
lines changed

2 files changed

+110
-84
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 107 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -923,85 +923,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
923923
}
924924

925925
/// Try to simplify recipe \p R.
926-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
926+
static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927927
using namespace llvm::VPlanPatternMatch;
928928

929-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930-
// Try to remove redundant blend recipes.
931-
SmallPtrSet<VPValue *, 4> UniqueValues;
932-
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
933-
UniqueValues.insert(Blend->getIncomingValue(0));
934-
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
935-
if (!match(Blend->getMask(I), m_False()))
936-
UniqueValues.insert(Blend->getIncomingValue(I));
937-
938-
if (UniqueValues.size() == 1) {
939-
Blend->replaceAllUsesWith(*UniqueValues.begin());
940-
Blend->eraseFromParent();
941-
return;
942-
}
943-
944-
if (Blend->isNormalized())
945-
return;
946-
947-
// Normalize the blend so its first incoming value is used as the initial
948-
// value with the others blended into it.
949-
950-
unsigned StartIndex = 0;
951-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
952-
// If a value's mask is used only by the blend then is can be deadcoded.
953-
// TODO: Find the most expensive mask that can be deadcoded, or a mask
954-
// that's used by multiple blends where it can be removed from them all.
955-
VPValue *Mask = Blend->getMask(I);
956-
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
957-
StartIndex = I;
958-
break;
959-
}
960-
}
961-
962-
SmallVector<VPValue *, 4> OperandsWithMask;
963-
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
964-
965-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
966-
if (I == StartIndex)
967-
continue;
968-
OperandsWithMask.push_back(Blend->getIncomingValue(I));
969-
OperandsWithMask.push_back(Blend->getMask(I));
970-
}
971-
972-
auto *NewBlend = new VPBlendRecipe(
973-
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
974-
NewBlend->insertBefore(&R);
975-
976-
VPValue *DeadMask = Blend->getMask(StartIndex);
977-
Blend->replaceAllUsesWith(NewBlend);
978-
Blend->eraseFromParent();
979-
recursivelyDeleteDeadRecipes(DeadMask);
980-
981-
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982-
VPValue *NewMask;
983-
if (NewBlend->getNumOperands() == 3 &&
984-
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
985-
VPValue *Inc0 = NewBlend->getOperand(0);
986-
VPValue *Inc1 = NewBlend->getOperand(1);
987-
VPValue *OldMask = NewBlend->getOperand(2);
988-
NewBlend->setOperand(0, Inc1);
989-
NewBlend->setOperand(1, Inc0);
990-
NewBlend->setOperand(2, NewMask);
991-
if (OldMask->getNumUsers() == 0)
992-
cast<VPInstruction>(OldMask)->eraseFromParent();
993-
}
994-
return;
995-
}
996-
997929
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998930
// part 0 can be replaced by their start value, if only the first lane is
999931
// demanded.
1000932
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
1001933
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
1002934
vputils::onlyFirstLaneUsed(Steps)) {
1003-
Steps->replaceAllUsesWith(Steps->getOperand(0));
1004-
return;
935+
return Steps->getOperand(0);
1005936
}
1006937
}
1007938

@@ -1011,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1011942
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
1012943
Type *ATy = TypeInfo.inferScalarType(A);
1013944
if (TruncTy == ATy) {
1014-
Trunc->replaceAllUsesWith(A);
945+
return A;
1015946
} else {
1016947
// Don't replace a scalarizing recipe with a widened cast.
1017948
if (isa<VPReplicateRecipe>(&R))
1018-
return;
949+
return nullptr;
1019950
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1020951

1021952
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
@@ -1028,11 +959,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1028959
VPC->setUnderlyingValue(UnderlyingExt);
1029960
}
1030961
VPC->insertBefore(&R);
1031-
Trunc->replaceAllUsesWith(VPC);
962+
return VPC;
1032963
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1033964
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1034965
VPC->insertBefore(&R);
1035-
Trunc->replaceAllUsesWith(VPC);
966+
return VPC;
1036967
}
1037968
}
1038969
#ifndef NDEBUG
@@ -1056,17 +987,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1056987
VPValue *X, *Y;
1057988
if (match(&R,
1058989
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1059-
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
1060-
R.getVPSingleValue()->replaceAllUsesWith(X);
1061-
R.eraseFromParent();
1062-
return;
1063-
}
990+
m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
991+
return X;
1064992

1065993
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1066-
return R.getVPSingleValue()->replaceAllUsesWith(A);
994+
return A;
1067995

1068996
if (match(&R, m_Not(m_Not(m_VPValue(A)))))
1069-
return R.getVPSingleValue()->replaceAllUsesWith(A);
997+
return A;
1070998

1071999
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
10721000
if ((match(&R,
@@ -1075,16 +1003,110 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10751003
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
10761004
TypeInfo.inferScalarType(R.getOperand(1)) ==
10771005
TypeInfo.inferScalarType(R.getVPSingleValue()))
1078-
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
1006+
return R.getOperand(1);
1007+
1008+
return nullptr;
10791009
}
10801010

10811011
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10821012
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
10831013
Plan.getEntry());
10841014
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1015+
SetVector<VPRecipeBase *> Worklist;
1016+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
1017+
for (VPRecipeBase &R : make_early_inc_range(*VPBB))
1018+
Worklist.insert(&R);
1019+
1020+
while (!Worklist.empty()) {
1021+
VPRecipeBase *R = Worklist.pop_back_val();
1022+
if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
1023+
R->getVPSingleValue()->replaceAllUsesWith(Result);
1024+
R->eraseFromParent();
1025+
if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
1026+
Worklist.insert(ResultR);
1027+
for (VPUser *U : Result->users())
1028+
if (auto *UR = dyn_cast<VPRecipeBase>(U))
1029+
if (UR != R)
1030+
Worklist.insert(UR);
1031+
}
1032+
}
1033+
}
1034+
1035+
void VPlanTransforms::simplifyBlends(VPlan &Plan) {
1036+
using namespace llvm::VPlanPatternMatch;
1037+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1038+
Plan.getEntry());
1039+
SetVector<VPRecipeBase *> Worklist;
10851040
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
10861041
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1087-
simplifyRecipe(R, TypeInfo);
1042+
auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1043+
if (!Blend)
1044+
continue;
1045+
1046+
// Try to remove redundant blend recipes.
1047+
SmallPtrSet<VPValue *, 4> UniqueValues;
1048+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
1049+
UniqueValues.insert(Blend->getIncomingValue(0));
1050+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
1051+
if (!match(Blend->getMask(I), m_False()))
1052+
UniqueValues.insert(Blend->getIncomingValue(I));
1053+
1054+
if (UniqueValues.size() == 1) {
1055+
Blend->replaceAllUsesWith(*UniqueValues.begin());
1056+
Blend->eraseFromParent();
1057+
continue;
1058+
}
1059+
1060+
if (Blend->isNormalized())
1061+
continue;
1062+
1063+
// Normalize the blend so its first incoming value is used as the initial
1064+
// value with the others blended into it.
1065+
1066+
unsigned StartIndex = 0;
1067+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1068+
// If a value's mask is used only by the blend then is can be deadcoded.
1069+
// TODO: Find the most expensive mask that can be deadcoded, or a mask
1070+
// that's used by multiple blends where it can be removed from them all.
1071+
VPValue *Mask = Blend->getMask(I);
1072+
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
1073+
StartIndex = I;
1074+
break;
1075+
}
1076+
}
1077+
1078+
SmallVector<VPValue *, 4> OperandsWithMask;
1079+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
1080+
1081+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1082+
if (I == StartIndex)
1083+
continue;
1084+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
1085+
OperandsWithMask.push_back(Blend->getMask(I));
1086+
}
1087+
1088+
auto *NewBlend = new VPBlendRecipe(
1089+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
1090+
NewBlend->insertBefore(&R);
1091+
1092+
VPValue *DeadMask = Blend->getMask(StartIndex);
1093+
Blend->replaceAllUsesWith(NewBlend);
1094+
Blend->eraseFromParent();
1095+
recursivelyDeleteDeadRecipes(DeadMask);
1096+
1097+
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1098+
VPValue *NewMask;
1099+
if (NewBlend->getNumOperands() == 3 &&
1100+
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
1101+
VPValue *Inc0 = NewBlend->getOperand(0);
1102+
VPValue *Inc1 = NewBlend->getOperand(1);
1103+
VPValue *OldMask = NewBlend->getOperand(2);
1104+
NewBlend->setOperand(0, Inc1);
1105+
NewBlend->setOperand(1, Inc0);
1106+
NewBlend->setOperand(2, NewMask);
1107+
if (OldMask->getNumUsers() == 0)
1108+
cast<VPInstruction>(OldMask)->eraseFromParent();
1109+
}
10881110
}
10891111
}
10901112
}
@@ -1684,6 +1706,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
16841706
runPass(removeRedundantInductionCasts, Plan);
16851707

16861708
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1709+
runPass(simplifyBlends, Plan);
16871710
runPass(removeDeadRecipes, Plan);
16881711
runPass(legalizeAndOptimizeInductions, Plan);
16891712
runPass(removeRedundantExpandSCEVRecipes, Plan);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ struct VPlanTransforms {
183183
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
184184
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
185185

186+
/// Normalize and simplify VPBlendRecipes.
187+
static void simplifyBlends(VPlan &Plan);
188+
186189
/// If there's a single exit block, optimize its phi recipes that use exiting
187190
/// IV values by feeding them precomputed end values instead, possibly taken
188191
/// one step backwards.

0 commit comments

Comments
 (0)