@@ -926,74 +926,6 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
926
926
static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927
927
using namespace llvm ::VPlanPatternMatch;
928
928
929
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930
- // Try to remove redundant blend recipes.
931
- SmallPtrSet<VPValue *, 4 > UniqueValues;
932
- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
933
- UniqueValues.insert (Blend->getIncomingValue (0 ));
934
- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
935
- if (!match (Blend->getMask (I), m_False ()))
936
- UniqueValues.insert (Blend->getIncomingValue (I));
937
-
938
- if (UniqueValues.size () == 1 ) {
939
- Blend->replaceAllUsesWith (*UniqueValues.begin ());
940
- Blend->eraseFromParent ();
941
- return ;
942
- }
943
-
944
- if (Blend->isNormalized ())
945
- return ;
946
-
947
- // Normalize the blend so its first incoming value is used as the initial
948
- // value with the others blended into it.
949
-
950
- unsigned StartIndex = 0 ;
951
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
952
- // If a value's mask is used only by the blend then is can be deadcoded.
953
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
954
- // that's used by multiple blends where it can be removed from them all.
955
- VPValue *Mask = Blend->getMask (I);
956
- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
957
- StartIndex = I;
958
- break ;
959
- }
960
- }
961
-
962
- SmallVector<VPValue *, 4 > OperandsWithMask;
963
- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
964
-
965
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
966
- if (I == StartIndex)
967
- continue ;
968
- OperandsWithMask.push_back (Blend->getIncomingValue (I));
969
- OperandsWithMask.push_back (Blend->getMask (I));
970
- }
971
-
972
- auto *NewBlend = new VPBlendRecipe (
973
- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
974
- NewBlend->insertBefore (&R);
975
-
976
- VPValue *DeadMask = Blend->getMask (StartIndex);
977
- Blend->replaceAllUsesWith (NewBlend);
978
- Blend->eraseFromParent ();
979
- recursivelyDeleteDeadRecipes (DeadMask);
980
-
981
- // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982
- VPValue *NewMask;
983
- if (NewBlend->getNumOperands () == 3 &&
984
- match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
985
- VPValue *Inc0 = NewBlend->getOperand (0 );
986
- VPValue *Inc1 = NewBlend->getOperand (1 );
987
- VPValue *OldMask = NewBlend->getOperand (2 );
988
- NewBlend->setOperand (0 , Inc1);
989
- NewBlend->setOperand (1 , Inc0);
990
- NewBlend->setOperand (2 , NewMask);
991
- if (OldMask->getNumUsers () == 0 )
992
- cast<VPInstruction>(OldMask)->eraseFromParent ();
993
- }
994
- return ;
995
- }
996
-
997
929
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998
930
// part 0 can be replaced by their start value, if only the first lane is
999
931
// demanded.
@@ -1092,6 +1024,85 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
1092
1024
}
1093
1025
}
1094
1026
1027
+ // / Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
1028
+ // / to make sure the masks are simplified.
1029
+ static void simplifyBlends (VPlan &Plan) {
1030
+ using namespace llvm ::VPlanPatternMatch;
1031
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1032
+ vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()))) {
1033
+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1034
+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1035
+ if (!Blend)
1036
+ continue ;
1037
+
1038
+ // Try to remove redundant blend recipes.
1039
+ SmallPtrSet<VPValue *, 4 > UniqueValues;
1040
+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
1041
+ UniqueValues.insert (Blend->getIncomingValue (0 ));
1042
+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
1043
+ if (!match (Blend->getMask (I), m_False ()))
1044
+ UniqueValues.insert (Blend->getIncomingValue (I));
1045
+
1046
+ if (UniqueValues.size () == 1 ) {
1047
+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
1048
+ Blend->eraseFromParent ();
1049
+ continue ;
1050
+ }
1051
+
1052
+ if (Blend->isNormalized ())
1053
+ continue ;
1054
+
1055
+ // Normalize the blend so its first incoming value is used as the initial
1056
+ // value with the others blended into it.
1057
+
1058
+ unsigned StartIndex = 0 ;
1059
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1060
+ // If a value's mask is used only by the blend then is can be deadcoded.
1061
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
1062
+ // that's used by multiple blends where it can be removed from them all.
1063
+ VPValue *Mask = Blend->getMask (I);
1064
+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
1065
+ StartIndex = I;
1066
+ break ;
1067
+ }
1068
+ }
1069
+
1070
+ SmallVector<VPValue *, 4 > OperandsWithMask;
1071
+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
1072
+
1073
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1074
+ if (I == StartIndex)
1075
+ continue ;
1076
+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
1077
+ OperandsWithMask.push_back (Blend->getMask (I));
1078
+ }
1079
+
1080
+ auto *NewBlend = new VPBlendRecipe (
1081
+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
1082
+ NewBlend->insertBefore (&R);
1083
+
1084
+ VPValue *DeadMask = Blend->getMask (StartIndex);
1085
+ Blend->replaceAllUsesWith (NewBlend);
1086
+ Blend->eraseFromParent ();
1087
+ recursivelyDeleteDeadRecipes (DeadMask);
1088
+
1089
+ // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1090
+ VPValue *NewMask;
1091
+ if (NewBlend->getNumOperands () == 3 &&
1092
+ match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
1093
+ VPValue *Inc0 = NewBlend->getOperand (0 );
1094
+ VPValue *Inc1 = NewBlend->getOperand (1 );
1095
+ VPValue *OldMask = NewBlend->getOperand (2 );
1096
+ NewBlend->setOperand (0 , Inc1);
1097
+ NewBlend->setOperand (1 , Inc0);
1098
+ NewBlend->setOperand (2 , NewMask);
1099
+ if (OldMask->getNumUsers () == 0 )
1100
+ cast<VPInstruction>(OldMask)->eraseFromParent ();
1101
+ }
1102
+ }
1103
+ }
1104
+ }
1105
+
1095
1106
// / Optimize the width of vector induction variables in \p Plan based on a known
1096
1107
// / constant Trip Count, \p BestVF and \p BestUF.
1097
1108
static bool optimizeVectorInductionWidthForTCAndVFUF (VPlan &Plan,
@@ -1733,6 +1744,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
1733
1744
runPass (removeRedundantInductionCasts, Plan);
1734
1745
1735
1746
runPass (simplifyRecipes, Plan, *Plan.getCanonicalIV ()->getScalarType ());
1747
+ runPass (simplifyBlends, Plan);
1736
1748
runPass (removeDeadRecipes, Plan);
1737
1749
runPass (legalizeAndOptimizeInductions, Plan);
1738
1750
runPass (removeRedundantExpandSCEVRecipes, Plan);
0 commit comments