@@ -662,6 +662,151 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
662
662
}
663
663
}
664
664
665
+ // / Try to simplify recipe \p R.
666
+ static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
667
+ using namespace llvm ::VPlanPatternMatch;
668
+
669
+ if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
670
+ // Try to remove redundant blend recipes.
671
+ SmallPtrSet<VPValue *, 4 > UniqueValues;
672
+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
673
+ UniqueValues.insert (Blend->getIncomingValue (0 ));
674
+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
675
+ if (!match (Blend->getMask (I), m_False ()))
676
+ UniqueValues.insert (Blend->getIncomingValue (I));
677
+
678
+ if (UniqueValues.size () == 1 ) {
679
+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
680
+ Blend->eraseFromParent ();
681
+ return ;
682
+ }
683
+
684
+ if (Blend->isNormalized ())
685
+ return ;
686
+
687
+ // Normalize the blend so its first incoming value is used as the initial
688
+ // value with the others blended into it.
689
+
690
+ unsigned StartIndex = 0 ;
691
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
692
+ // If a value's mask is used only by the blend then is can be deadcoded.
693
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
694
+ // that's used by multiple blends where it can be removed from them all.
695
+ VPValue *Mask = Blend->getMask (I);
696
+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
697
+ StartIndex = I;
698
+ break ;
699
+ }
700
+ }
701
+
702
+ SmallVector<VPValue *, 4 > OperandsWithMask;
703
+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
704
+
705
+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
706
+ if (I == StartIndex)
707
+ continue ;
708
+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
709
+ OperandsWithMask.push_back (Blend->getMask (I));
710
+ }
711
+
712
+ auto *NewBlend = new VPBlendRecipe (
713
+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
714
+ NewBlend->insertBefore (&R);
715
+
716
+ VPValue *DeadMask = Blend->getMask (StartIndex);
717
+ Blend->replaceAllUsesWith (NewBlend);
718
+ Blend->eraseFromParent ();
719
+ recursivelyDeleteDeadRecipes (DeadMask);
720
+ return ;
721
+ }
722
+
723
+ VPValue *A;
724
+ if (match (&R, m_Trunc (m_ZExtOrSExt (m_VPValue (A))))) {
725
+ VPValue *Trunc = R.getVPSingleValue ();
726
+ Type *TruncTy = TypeInfo.inferScalarType (Trunc);
727
+ Type *ATy = TypeInfo.inferScalarType (A);
728
+ if (TruncTy == ATy) {
729
+ Trunc->replaceAllUsesWith (A);
730
+ } else {
731
+ // Don't replace a scalarizing recipe with a widened cast.
732
+ if (isa<VPReplicateRecipe>(&R))
733
+ return ;
734
+ if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
735
+
736
+ unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
737
+ ? Instruction::SExt
738
+ : Instruction::ZExt;
739
+ auto *VPC =
740
+ new VPWidenCastRecipe (Instruction::CastOps (ExtOpcode), A, TruncTy);
741
+ if (auto *UnderlyingExt = R.getOperand (0 )->getUnderlyingValue ()) {
742
+ // UnderlyingExt has distinct return type, used to retain legacy cost.
743
+ VPC->setUnderlyingValue (UnderlyingExt);
744
+ }
745
+ VPC->insertBefore (&R);
746
+ Trunc->replaceAllUsesWith (VPC);
747
+ } else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
748
+ auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
749
+ VPC->insertBefore (&R);
750
+ Trunc->replaceAllUsesWith (VPC);
751
+ }
752
+ }
753
+ #ifndef NDEBUG
754
+ // Verify that the cached type info is for both A and its users is still
755
+ // accurate by comparing it to freshly computed types.
756
+ VPTypeAnalysis TypeInfo2 (
757
+ R.getParent ()->getPlan ()->getCanonicalIV ()->getScalarType ());
758
+ assert (TypeInfo.inferScalarType (A) == TypeInfo2.inferScalarType (A));
759
+ for (VPUser *U : A->users ()) {
760
+ auto *R = cast<VPRecipeBase>(U);
761
+ for (VPValue *VPV : R->definedValues ())
762
+ assert (TypeInfo.inferScalarType (VPV) == TypeInfo2.inferScalarType (VPV));
763
+ }
764
+ #endif
765
+ }
766
+
767
+ // Simplify (X && Y) || (X && !Y) -> X.
768
+ // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
769
+ // && (Y || Z) and (X || !X) into true. This requires queuing newly created
770
+ // recipes to be visited during simplification.
771
+ VPValue *X, *Y, *X1, *Y1;
772
+ if (match (&R,
773
+ m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
774
+ m_LogicalAnd (m_VPValue (X1), m_Not (m_VPValue (Y1))))) &&
775
+ X == X1 && Y == Y1) {
776
+ R.getVPSingleValue ()->replaceAllUsesWith (X);
777
+ R.eraseFromParent ();
778
+ return ;
779
+ }
780
+
781
+ if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
782
+ return R.getVPSingleValue ()->replaceAllUsesWith (A);
783
+
784
+ if (match (&R, m_Not (m_Not (m_VPValue (A)))))
785
+ return R.getVPSingleValue ()->replaceAllUsesWith (A);
786
+
787
+ // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
788
+ if ((match (&R,
789
+ m_DerivedIV (m_SpecificInt (0 ), m_VPValue (A), m_SpecificInt (1 ))) ||
790
+ match (&R,
791
+ m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
792
+ TypeInfo.inferScalarType (R.getOperand (1 )) ==
793
+ TypeInfo.inferScalarType (R.getVPSingleValue ()))
794
+ return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
795
+ }
796
+
797
+ // / Try to simplify the recipes in \p Plan
798
+ static void simplifyRecipes (VPlan &Plan) {
799
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
800
+ Plan.getEntry ());
801
+ Type *CanonicalIVType = Plan.getCanonicalIV ()->getScalarType ();
802
+ VPTypeAnalysis TypeInfo (CanonicalIVType);
803
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
804
+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
805
+ simplifyRecipe (R, TypeInfo);
806
+ }
807
+ }
808
+ }
809
+
665
810
void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
666
811
unsigned BestUF,
667
812
PredicatedScalarEvolution &PSE) {
@@ -942,138 +1087,6 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
942
1087
}
943
1088
}
944
1089
945
- // / Try to simplify recipe \p R.
946
- static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
947
- using namespace llvm ::VPlanPatternMatch;
948
-
949
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
950
- // Try to remove redundant blend recipes.
951
- SmallPtrSet<VPValue *, 4 > UniqueValues;
952
- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
953
- UniqueValues.insert (Blend->getIncomingValue (0 ));
954
- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
955
- if (!match (Blend->getMask (I), m_False ()))
956
- UniqueValues.insert (Blend->getIncomingValue (I));
957
-
958
- if (UniqueValues.size () == 1 ) {
959
- Blend->replaceAllUsesWith (*UniqueValues.begin ());
960
- Blend->eraseFromParent ();
961
- return ;
962
- }
963
-
964
- if (Blend->isNormalized ())
965
- return ;
966
-
967
- // Normalize the blend so its first incoming value is used as the initial
968
- // value with the others blended into it.
969
-
970
- unsigned StartIndex = 0 ;
971
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
972
- // If a value's mask is used only by the blend then is can be deadcoded.
973
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
974
- // that's used by multiple blends where it can be removed from them all.
975
- VPValue *Mask = Blend->getMask (I);
976
- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
977
- StartIndex = I;
978
- break ;
979
- }
980
- }
981
-
982
- SmallVector<VPValue *, 4 > OperandsWithMask;
983
- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
984
-
985
- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
986
- if (I == StartIndex)
987
- continue ;
988
- OperandsWithMask.push_back (Blend->getIncomingValue (I));
989
- OperandsWithMask.push_back (Blend->getMask (I));
990
- }
991
-
992
- auto *NewBlend = new VPBlendRecipe (
993
- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
994
- NewBlend->insertBefore (&R);
995
-
996
- VPValue *DeadMask = Blend->getMask (StartIndex);
997
- Blend->replaceAllUsesWith (NewBlend);
998
- Blend->eraseFromParent ();
999
- recursivelyDeleteDeadRecipes (DeadMask);
1000
- return ;
1001
- }
1002
-
1003
- VPValue *A;
1004
- if (match (&R, m_Trunc (m_ZExtOrSExt (m_VPValue (A))))) {
1005
- VPValue *Trunc = R.getVPSingleValue ();
1006
- Type *TruncTy = TypeInfo.inferScalarType (Trunc);
1007
- Type *ATy = TypeInfo.inferScalarType (A);
1008
- if (TruncTy == ATy) {
1009
- Trunc->replaceAllUsesWith (A);
1010
- } else {
1011
- // Don't replace a scalarizing recipe with a widened cast.
1012
- if (isa<VPReplicateRecipe>(&R))
1013
- return ;
1014
- if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
1015
-
1016
- unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
1017
- ? Instruction::SExt
1018
- : Instruction::ZExt;
1019
- auto *VPC =
1020
- new VPWidenCastRecipe (Instruction::CastOps (ExtOpcode), A, TruncTy);
1021
- if (auto *UnderlyingExt = R.getOperand (0 )->getUnderlyingValue ()) {
1022
- // UnderlyingExt has distinct return type, used to retain legacy cost.
1023
- VPC->setUnderlyingValue (UnderlyingExt);
1024
- }
1025
- VPC->insertBefore (&R);
1026
- Trunc->replaceAllUsesWith (VPC);
1027
- } else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
1028
- auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
1029
- VPC->insertBefore (&R);
1030
- Trunc->replaceAllUsesWith (VPC);
1031
- }
1032
- }
1033
- #ifndef NDEBUG
1034
- // Verify that the cached type info is for both A and its users is still
1035
- // accurate by comparing it to freshly computed types.
1036
- VPTypeAnalysis TypeInfo2 (
1037
- R.getParent ()->getPlan ()->getCanonicalIV ()->getScalarType ());
1038
- assert (TypeInfo.inferScalarType (A) == TypeInfo2.inferScalarType (A));
1039
- for (VPUser *U : A->users ()) {
1040
- auto *R = cast<VPRecipeBase>(U);
1041
- for (VPValue *VPV : R->definedValues ())
1042
- assert (TypeInfo.inferScalarType (VPV) == TypeInfo2.inferScalarType (VPV));
1043
- }
1044
- #endif
1045
- }
1046
-
1047
- // Simplify (X && Y) || (X && !Y) -> X.
1048
- // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1049
- // && (Y || Z) and (X || !X) into true. This requires queuing newly created
1050
- // recipes to be visited during simplification.
1051
- VPValue *X, *Y, *X1, *Y1;
1052
- if (match (&R,
1053
- m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
1054
- m_LogicalAnd (m_VPValue (X1), m_Not (m_VPValue (Y1))))) &&
1055
- X == X1 && Y == Y1) {
1056
- R.getVPSingleValue ()->replaceAllUsesWith (X);
1057
- R.eraseFromParent ();
1058
- return ;
1059
- }
1060
-
1061
- if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
1062
- return R.getVPSingleValue ()->replaceAllUsesWith (A);
1063
-
1064
- if (match (&R, m_Not (m_Not (m_VPValue (A)))))
1065
- return R.getVPSingleValue ()->replaceAllUsesWith (A);
1066
-
1067
- // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
1068
- if ((match (&R,
1069
- m_DerivedIV (m_SpecificInt (0 ), m_VPValue (A), m_SpecificInt (1 ))) ||
1070
- match (&R,
1071
- m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
1072
- TypeInfo.inferScalarType (R.getOperand (1 )) ==
1073
- TypeInfo.inferScalarType (R.getVPSingleValue ()))
1074
- return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
1075
- }
1076
-
1077
1090
// / Move loop-invariant recipes out of the vector loop region in \p Plan.
1078
1091
static void licm (VPlan &Plan) {
1079
1092
VPBasicBlock *Preheader = Plan.getVectorPreheader ();
@@ -1108,19 +1121,6 @@ static void licm(VPlan &Plan) {
1108
1121
}
1109
1122
}
1110
1123
1111
- // / Try to simplify the recipes in \p Plan.
1112
- static void simplifyRecipes (VPlan &Plan) {
1113
- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
1114
- Plan.getEntry ());
1115
- Type *CanonicalIVType = Plan.getCanonicalIV ()->getScalarType ();
1116
- VPTypeAnalysis TypeInfo (CanonicalIVType);
1117
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1118
- for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1119
- simplifyRecipe (R, TypeInfo);
1120
- }
1121
- }
1122
- }
1123
-
1124
1124
void VPlanTransforms::truncateToMinimalBitwidths (
1125
1125
VPlan &Plan, const MapVector<Instruction *, uint64_t > &MinBWs) {
1126
1126
#ifndef NDEBUG
0 commit comments