@@ -923,85 +923,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
923
923
}
924
924
925
925
// / Try to simplify recipe \p R.
926
- static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
926
+ static VPValue * simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927
927
using namespace llvm ::VPlanPatternMatch;
928
928
929
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930
- // Try to remove redundant blend recipes.
931
- SmallPtrSet<VPValue *, 4 > UniqueValues;
932
- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
933
- UniqueValues.insert (Blend->getIncomingValue (0 ));
934
- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
935
- if (!match (Blend->getMask (I), m_False ()))
936
- UniqueValues.insert (Blend->getIncomingValue (I));
937
-
938
- if (UniqueValues.size () == 1 ) {
939
- Blend->replaceAllUsesWith (*UniqueValues.begin ());
940
- Blend->eraseFromParent ();
941
- return ;
942
- }
943
-
944
- if (Blend->isNormalized ())
945
- return ;
946
-
947
- // Normalize the blend so its first incoming value is used as the initial
948
- // value with the others blended into it.
949
-
950
- unsigned StartIndex = 0 ;
951
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
952
- // If a value's mask is used only by the blend then is can be deadcoded.
953
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
954
- // that's used by multiple blends where it can be removed from them all.
955
- VPValue *Mask = Blend->getMask (I);
956
- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
957
- StartIndex = I;
958
- break ;
959
- }
960
- }
961
-
962
- SmallVector<VPValue *, 4 > OperandsWithMask;
963
- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
964
-
965
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
966
- if (I == StartIndex)
967
- continue ;
968
- OperandsWithMask.push_back (Blend->getIncomingValue (I));
969
- OperandsWithMask.push_back (Blend->getMask (I));
970
- }
971
-
972
- auto *NewBlend = new VPBlendRecipe (
973
- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
974
- NewBlend->insertBefore (&R);
975
-
976
- VPValue *DeadMask = Blend->getMask (StartIndex);
977
- Blend->replaceAllUsesWith (NewBlend);
978
- Blend->eraseFromParent ();
979
- recursivelyDeleteDeadRecipes (DeadMask);
980
-
981
- // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982
- VPValue *NewMask;
983
- if (NewBlend->getNumOperands () == 3 &&
984
- match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
985
- VPValue *Inc0 = NewBlend->getOperand (0 );
986
- VPValue *Inc1 = NewBlend->getOperand (1 );
987
- VPValue *OldMask = NewBlend->getOperand (2 );
988
- NewBlend->setOperand (0 , Inc1);
989
- NewBlend->setOperand (1 , Inc0);
990
- NewBlend->setOperand (2 , NewMask);
991
- if (OldMask->getNumUsers () == 0 )
992
- cast<VPInstruction>(OldMask)->eraseFromParent ();
993
- }
994
- return ;
995
- }
996
-
997
929
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998
930
// part 0 can be replaced by their start value, if only the first lane is
999
931
// demanded.
1000
932
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
1001
933
if (Steps->getParent ()->getPlan ()->isUnrolled () && Steps->isPart0 () &&
1002
934
vputils::onlyFirstLaneUsed (Steps)) {
1003
- Steps->replaceAllUsesWith (Steps->getOperand (0 ));
1004
- return ;
935
+ return Steps->getOperand (0 );
1005
936
}
1006
937
}
1007
938
@@ -1011,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1011
942
Type *TruncTy = TypeInfo.inferScalarType (Trunc);
1012
943
Type *ATy = TypeInfo.inferScalarType (A);
1013
944
if (TruncTy == ATy) {
1014
- Trunc-> replaceAllUsesWith (A) ;
945
+ return A ;
1015
946
} else {
1016
947
// Don't replace a scalarizing recipe with a widened cast.
1017
948
if (isa<VPReplicateRecipe>(&R))
1018
- return ;
949
+ return nullptr ;
1019
950
if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
1020
951
1021
952
unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
@@ -1028,11 +959,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1028
959
VPC->setUnderlyingValue (UnderlyingExt);
1029
960
}
1030
961
VPC->insertBefore (&R);
1031
- Trunc-> replaceAllUsesWith ( VPC) ;
962
+ return VPC;
1032
963
} else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
1033
964
auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
1034
965
VPC->insertBefore (&R);
1035
- Trunc-> replaceAllUsesWith ( VPC) ;
966
+ return VPC;
1036
967
}
1037
968
}
1038
969
#ifndef NDEBUG
@@ -1056,17 +987,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1056
987
VPValue *X, *Y;
1057
988
if (match (&R,
1058
989
m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
1059
- m_LogicalAnd (m_Deferred (X), m_Not (m_Deferred (Y)))))) {
1060
- R.getVPSingleValue ()->replaceAllUsesWith (X);
1061
- R.eraseFromParent ();
1062
- return ;
1063
- }
990
+ m_LogicalAnd (m_Deferred (X), m_Not (m_Deferred (Y))))))
991
+ return X;
1064
992
1065
993
if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
1066
- return R. getVPSingleValue ()-> replaceAllUsesWith (A) ;
994
+ return A ;
1067
995
1068
996
if (match (&R, m_Not (m_Not (m_VPValue (A)))))
1069
- return R. getVPSingleValue ()-> replaceAllUsesWith (A) ;
997
+ return A ;
1070
998
1071
999
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
1072
1000
if ((match (&R,
@@ -1075,16 +1003,110 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1075
1003
m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
1076
1004
TypeInfo.inferScalarType (R.getOperand (1 )) ==
1077
1005
TypeInfo.inferScalarType (R.getVPSingleValue ()))
1078
- return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
1006
+ return R.getOperand (1 );
1007
+
1008
+ return nullptr ;
1079
1009
}
1080
1010
1081
1011
void VPlanTransforms::simplifyRecipes (VPlan &Plan, Type &CanonicalIVTy) {
1082
1012
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
1083
1013
Plan.getEntry ());
1084
1014
VPTypeAnalysis TypeInfo (&CanonicalIVTy);
1015
+ SetVector<VPRecipeBase *> Worklist;
1016
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
1017
+ for (VPRecipeBase &R : make_early_inc_range (*VPBB))
1018
+ Worklist.insert (&R);
1019
+
1020
+ while (!Worklist.empty ()) {
1021
+ VPRecipeBase *R = Worklist.pop_back_val ();
1022
+ if (VPValue *Result = simplifyRecipe (*R, TypeInfo)) {
1023
+ R->getVPSingleValue ()->replaceAllUsesWith (Result);
1024
+ R->eraseFromParent ();
1025
+ if (VPRecipeBase *ResultR = Result->getDefiningRecipe ())
1026
+ Worklist.insert (ResultR);
1027
+ for (VPUser *U : Result->users ())
1028
+ if (auto *UR = dyn_cast<VPRecipeBase>(U))
1029
+ if (UR != R)
1030
+ Worklist.insert (UR);
1031
+ }
1032
+ }
1033
+ }
1034
+
1035
+ void VPlanTransforms::simplifyBlends (VPlan &Plan) {
1036
+ using namespace llvm ::VPlanPatternMatch;
1037
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
1038
+ Plan.getEntry ());
1039
+ SetVector<VPRecipeBase *> Worklist;
1085
1040
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1086
1041
for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1087
- simplifyRecipe (R, TypeInfo);
1042
+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1043
+ if (!Blend)
1044
+ continue ;
1045
+
1046
+ // Try to remove redundant blend recipes.
1047
+ SmallPtrSet<VPValue *, 4 > UniqueValues;
1048
+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
1049
+ UniqueValues.insert (Blend->getIncomingValue (0 ));
1050
+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
1051
+ if (!match (Blend->getMask (I), m_False ()))
1052
+ UniqueValues.insert (Blend->getIncomingValue (I));
1053
+
1054
+ if (UniqueValues.size () == 1 ) {
1055
+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
1056
+ Blend->eraseFromParent ();
1057
+ continue ;
1058
+ }
1059
+
1060
+ if (Blend->isNormalized ())
1061
+ continue ;
1062
+
1063
+ // Normalize the blend so its first incoming value is used as the initial
1064
+ // value with the others blended into it.
1065
+
1066
+ unsigned StartIndex = 0 ;
1067
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1068
+ // If a value's mask is used only by the blend then is can be deadcoded.
1069
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
1070
+ // that's used by multiple blends where it can be removed from them all.
1071
+ VPValue *Mask = Blend->getMask (I);
1072
+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
1073
+ StartIndex = I;
1074
+ break ;
1075
+ }
1076
+ }
1077
+
1078
+ SmallVector<VPValue *, 4 > OperandsWithMask;
1079
+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
1080
+
1081
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1082
+ if (I == StartIndex)
1083
+ continue ;
1084
+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
1085
+ OperandsWithMask.push_back (Blend->getMask (I));
1086
+ }
1087
+
1088
+ auto *NewBlend = new VPBlendRecipe (
1089
+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
1090
+ NewBlend->insertBefore (&R);
1091
+
1092
+ VPValue *DeadMask = Blend->getMask (StartIndex);
1093
+ Blend->replaceAllUsesWith (NewBlend);
1094
+ Blend->eraseFromParent ();
1095
+ recursivelyDeleteDeadRecipes (DeadMask);
1096
+
1097
+ // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1098
+ VPValue *NewMask;
1099
+ if (NewBlend->getNumOperands () == 3 &&
1100
+ match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
1101
+ VPValue *Inc0 = NewBlend->getOperand (0 );
1102
+ VPValue *Inc1 = NewBlend->getOperand (1 );
1103
+ VPValue *OldMask = NewBlend->getOperand (2 );
1104
+ NewBlend->setOperand (0 , Inc1);
1105
+ NewBlend->setOperand (1 , Inc0);
1106
+ NewBlend->setOperand (2 , NewMask);
1107
+ if (OldMask->getNumUsers () == 0 )
1108
+ cast<VPInstruction>(OldMask)->eraseFromParent ();
1109
+ }
1088
1110
}
1089
1111
}
1090
1112
}
@@ -1684,6 +1706,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
1684
1706
runPass (removeRedundantInductionCasts, Plan);
1685
1707
1686
1708
runPass (simplifyRecipes, Plan, *Plan.getCanonicalIV ()->getScalarType ());
1709
+ runPass (simplifyBlends, Plan);
1687
1710
runPass (removeDeadRecipes, Plan);
1688
1711
runPass (legalizeAndOptimizeInductions, Plan);
1689
1712
runPass (removeRedundantExpandSCEVRecipes, Plan);
0 commit comments