Skip to content

Commit ca30d43

Browse files
committed
[VPlan] Split out VPBlendRecipe simplifications from simplifyRecipes. NFC
This is split off from llvm#133977 VPBlendRecipe normalisation is sensitive to the number of users a mask has, so should probably be run after the masks are simplified as much as possible. Note this could be run after removeDeadRecipes but this causes test diffs, some regressions, so this is left to a later patch.
1 parent 5bbcc76 commit ca30d43

File tree

2 files changed

+84
-68
lines changed

2 files changed

+84
-68
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 80 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -926,74 +926,6 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
926926
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927927
using namespace llvm::VPlanPatternMatch;
928928

929-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930-
// Try to remove redundant blend recipes.
931-
SmallPtrSet<VPValue *, 4> UniqueValues;
932-
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
933-
UniqueValues.insert(Blend->getIncomingValue(0));
934-
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
935-
if (!match(Blend->getMask(I), m_False()))
936-
UniqueValues.insert(Blend->getIncomingValue(I));
937-
938-
if (UniqueValues.size() == 1) {
939-
Blend->replaceAllUsesWith(*UniqueValues.begin());
940-
Blend->eraseFromParent();
941-
return;
942-
}
943-
944-
if (Blend->isNormalized())
945-
return;
946-
947-
// Normalize the blend so its first incoming value is used as the initial
948-
// value with the others blended into it.
949-
950-
unsigned StartIndex = 0;
951-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
952-
// If a value's mask is used only by the blend then is can be deadcoded.
953-
// TODO: Find the most expensive mask that can be deadcoded, or a mask
954-
// that's used by multiple blends where it can be removed from them all.
955-
VPValue *Mask = Blend->getMask(I);
956-
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
957-
StartIndex = I;
958-
break;
959-
}
960-
}
961-
962-
SmallVector<VPValue *, 4> OperandsWithMask;
963-
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
964-
965-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
966-
if (I == StartIndex)
967-
continue;
968-
OperandsWithMask.push_back(Blend->getIncomingValue(I));
969-
OperandsWithMask.push_back(Blend->getMask(I));
970-
}
971-
972-
auto *NewBlend = new VPBlendRecipe(
973-
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
974-
NewBlend->insertBefore(&R);
975-
976-
VPValue *DeadMask = Blend->getMask(StartIndex);
977-
Blend->replaceAllUsesWith(NewBlend);
978-
Blend->eraseFromParent();
979-
recursivelyDeleteDeadRecipes(DeadMask);
980-
981-
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982-
VPValue *NewMask;
983-
if (NewBlend->getNumOperands() == 3 &&
984-
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
985-
VPValue *Inc0 = NewBlend->getOperand(0);
986-
VPValue *Inc1 = NewBlend->getOperand(1);
987-
VPValue *OldMask = NewBlend->getOperand(2);
988-
NewBlend->setOperand(0, Inc1);
989-
NewBlend->setOperand(1, Inc0);
990-
NewBlend->setOperand(2, NewMask);
991-
if (OldMask->getNumUsers() == 0)
992-
cast<VPInstruction>(OldMask)->eraseFromParent();
993-
}
994-
return;
995-
}
996-
997929
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998930
// part 0 can be replaced by their start value, if only the first lane is
999931
// demanded.
@@ -1092,6 +1024,85 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10921024
}
10931025
}
10941026

1027+
void VPlanTransforms::simplifyBlends(VPlan &Plan) {
1028+
using namespace llvm::VPlanPatternMatch;
1029+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1030+
Plan.getEntry());
1031+
SetVector<VPRecipeBase *> Worklist;
1032+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1033+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1034+
auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1035+
if (!Blend)
1036+
continue;
1037+
1038+
// Try to remove redundant blend recipes.
1039+
SmallPtrSet<VPValue *, 4> UniqueValues;
1040+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
1041+
UniqueValues.insert(Blend->getIncomingValue(0));
1042+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
1043+
if (!match(Blend->getMask(I), m_False()))
1044+
UniqueValues.insert(Blend->getIncomingValue(I));
1045+
1046+
if (UniqueValues.size() == 1) {
1047+
Blend->replaceAllUsesWith(*UniqueValues.begin());
1048+
Blend->eraseFromParent();
1049+
continue;
1050+
}
1051+
1052+
if (Blend->isNormalized())
1053+
continue;
1054+
1055+
// Normalize the blend so its first incoming value is used as the initial
1056+
// value with the others blended into it.
1057+
1058+
unsigned StartIndex = 0;
1059+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1060+
// If a value's mask is used only by the blend then is can be deadcoded.
1061+
// TODO: Find the most expensive mask that can be deadcoded, or a mask
1062+
// that's used by multiple blends where it can be removed from them all.
1063+
VPValue *Mask = Blend->getMask(I);
1064+
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
1065+
StartIndex = I;
1066+
break;
1067+
}
1068+
}
1069+
1070+
SmallVector<VPValue *, 4> OperandsWithMask;
1071+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
1072+
1073+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1074+
if (I == StartIndex)
1075+
continue;
1076+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
1077+
OperandsWithMask.push_back(Blend->getMask(I));
1078+
}
1079+
1080+
auto *NewBlend = new VPBlendRecipe(
1081+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
1082+
NewBlend->insertBefore(&R);
1083+
1084+
VPValue *DeadMask = Blend->getMask(StartIndex);
1085+
Blend->replaceAllUsesWith(NewBlend);
1086+
Blend->eraseFromParent();
1087+
recursivelyDeleteDeadRecipes(DeadMask);
1088+
1089+
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1090+
VPValue *NewMask;
1091+
if (NewBlend->getNumOperands() == 3 &&
1092+
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
1093+
VPValue *Inc0 = NewBlend->getOperand(0);
1094+
VPValue *Inc1 = NewBlend->getOperand(1);
1095+
VPValue *OldMask = NewBlend->getOperand(2);
1096+
NewBlend->setOperand(0, Inc1);
1097+
NewBlend->setOperand(1, Inc0);
1098+
NewBlend->setOperand(2, NewMask);
1099+
if (OldMask->getNumUsers() == 0)
1100+
cast<VPInstruction>(OldMask)->eraseFromParent();
1101+
}
1102+
}
1103+
}
1104+
}
1105+
10951106
/// Optimize the width of vector induction variables in \p Plan based on a known
10961107
/// constant Trip Count, \p BestVF and \p BestUF.
10971108
static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
@@ -1687,6 +1698,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
16871698
runPass(removeRedundantInductionCasts, Plan);
16881699

16891700
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1701+
runPass(simplifyBlends, Plan);
16901702
runPass(removeDeadRecipes, Plan);
16911703
runPass(legalizeAndOptimizeInductions, Plan);
16921704
runPass(removeRedundantExpandSCEVRecipes, Plan);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ struct VPlanTransforms {
183183
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
184184
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
185185

186+
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
187+
/// to make sure the masks are simplified.
188+
static void simplifyBlends(VPlan &Plan);
189+
186190
/// If there's a single exit block, optimize its phi recipes that use exiting
187191
/// IV values by feeding them precomputed end values instead, possibly taken
188192
/// one step backwards.

0 commit comments

Comments
 (0)