Skip to content

Commit be6ccc9

Browse files
authored
[VPlan] Split out VPBlendRecipe simplifications from simplifyRecipes. NFC (#134073)
This is split off from #133977 VPBlendRecipe normalisation is sensitive to the number of users a mask has, so should probably be run after the masks are simplified as much as possible. Note this could be run after removeDeadRecipes but this causes test diffs, some regressions, so this is left to a later patch.
1 parent f20cb3f commit be6ccc9

File tree

1 file changed

+80
-68
lines changed

1 file changed

+80
-68
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 80 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -926,74 +926,6 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
926926
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927927
using namespace llvm::VPlanPatternMatch;
928928

929-
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930-
// Try to remove redundant blend recipes.
931-
SmallPtrSet<VPValue *, 4> UniqueValues;
932-
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
933-
UniqueValues.insert(Blend->getIncomingValue(0));
934-
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
935-
if (!match(Blend->getMask(I), m_False()))
936-
UniqueValues.insert(Blend->getIncomingValue(I));
937-
938-
if (UniqueValues.size() == 1) {
939-
Blend->replaceAllUsesWith(*UniqueValues.begin());
940-
Blend->eraseFromParent();
941-
return;
942-
}
943-
944-
if (Blend->isNormalized())
945-
return;
946-
947-
// Normalize the blend so its first incoming value is used as the initial
948-
// value with the others blended into it.
949-
950-
unsigned StartIndex = 0;
951-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
952-
// If a value's mask is used only by the blend then is can be deadcoded.
953-
// TODO: Find the most expensive mask that can be deadcoded, or a mask
954-
// that's used by multiple blends where it can be removed from them all.
955-
VPValue *Mask = Blend->getMask(I);
956-
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
957-
StartIndex = I;
958-
break;
959-
}
960-
}
961-
962-
SmallVector<VPValue *, 4> OperandsWithMask;
963-
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
964-
965-
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
966-
if (I == StartIndex)
967-
continue;
968-
OperandsWithMask.push_back(Blend->getIncomingValue(I));
969-
OperandsWithMask.push_back(Blend->getMask(I));
970-
}
971-
972-
auto *NewBlend = new VPBlendRecipe(
973-
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
974-
NewBlend->insertBefore(&R);
975-
976-
VPValue *DeadMask = Blend->getMask(StartIndex);
977-
Blend->replaceAllUsesWith(NewBlend);
978-
Blend->eraseFromParent();
979-
recursivelyDeleteDeadRecipes(DeadMask);
980-
981-
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982-
VPValue *NewMask;
983-
if (NewBlend->getNumOperands() == 3 &&
984-
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
985-
VPValue *Inc0 = NewBlend->getOperand(0);
986-
VPValue *Inc1 = NewBlend->getOperand(1);
987-
VPValue *OldMask = NewBlend->getOperand(2);
988-
NewBlend->setOperand(0, Inc1);
989-
NewBlend->setOperand(1, Inc0);
990-
NewBlend->setOperand(2, NewMask);
991-
if (OldMask->getNumUsers() == 0)
992-
cast<VPInstruction>(OldMask)->eraseFromParent();
993-
}
994-
return;
995-
}
996-
997929
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998930
// part 0 can be replaced by their start value, if only the first lane is
999931
// demanded.
@@ -1092,6 +1024,85 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10921024
}
10931025
}
10941026

1027+
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
1028+
/// to make sure the masks are simplified.
1029+
static void simplifyBlends(VPlan &Plan) {
1030+
using namespace llvm::VPlanPatternMatch;
1031+
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1032+
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
1033+
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1034+
auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1035+
if (!Blend)
1036+
continue;
1037+
1038+
// Try to remove redundant blend recipes.
1039+
SmallPtrSet<VPValue *, 4> UniqueValues;
1040+
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
1041+
UniqueValues.insert(Blend->getIncomingValue(0));
1042+
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
1043+
if (!match(Blend->getMask(I), m_False()))
1044+
UniqueValues.insert(Blend->getIncomingValue(I));
1045+
1046+
if (UniqueValues.size() == 1) {
1047+
Blend->replaceAllUsesWith(*UniqueValues.begin());
1048+
Blend->eraseFromParent();
1049+
continue;
1050+
}
1051+
1052+
if (Blend->isNormalized())
1053+
continue;
1054+
1055+
// Normalize the blend so its first incoming value is used as the initial
1056+
// value with the others blended into it.
1057+
1058+
unsigned StartIndex = 0;
1059+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1060+
// If a value's mask is used only by the blend then is can be deadcoded.
1061+
// TODO: Find the most expensive mask that can be deadcoded, or a mask
1062+
// that's used by multiple blends where it can be removed from them all.
1063+
VPValue *Mask = Blend->getMask(I);
1064+
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
1065+
StartIndex = I;
1066+
break;
1067+
}
1068+
}
1069+
1070+
SmallVector<VPValue *, 4> OperandsWithMask;
1071+
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
1072+
1073+
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
1074+
if (I == StartIndex)
1075+
continue;
1076+
OperandsWithMask.push_back(Blend->getIncomingValue(I));
1077+
OperandsWithMask.push_back(Blend->getMask(I));
1078+
}
1079+
1080+
auto *NewBlend = new VPBlendRecipe(
1081+
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
1082+
NewBlend->insertBefore(&R);
1083+
1084+
VPValue *DeadMask = Blend->getMask(StartIndex);
1085+
Blend->replaceAllUsesWith(NewBlend);
1086+
Blend->eraseFromParent();
1087+
recursivelyDeleteDeadRecipes(DeadMask);
1088+
1089+
/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1090+
VPValue *NewMask;
1091+
if (NewBlend->getNumOperands() == 3 &&
1092+
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
1093+
VPValue *Inc0 = NewBlend->getOperand(0);
1094+
VPValue *Inc1 = NewBlend->getOperand(1);
1095+
VPValue *OldMask = NewBlend->getOperand(2);
1096+
NewBlend->setOperand(0, Inc1);
1097+
NewBlend->setOperand(1, Inc0);
1098+
NewBlend->setOperand(2, NewMask);
1099+
if (OldMask->getNumUsers() == 0)
1100+
cast<VPInstruction>(OldMask)->eraseFromParent();
1101+
}
1102+
}
1103+
}
1104+
}
1105+
10951106
/// Optimize the width of vector induction variables in \p Plan based on a known
10961107
/// constant Trip Count, \p BestVF and \p BestUF.
10971108
static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
@@ -1733,6 +1744,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
17331744
runPass(removeRedundantInductionCasts, Plan);
17341745

17351746
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1747+
runPass(simplifyBlends, Plan);
17361748
runPass(removeDeadRecipes, Plan);
17371749
runPass(legalizeAndOptimizeInductions, Plan);
17381750
runPass(removeRedundantExpandSCEVRecipes, Plan);

0 commit comments

Comments
 (0)