@@ -2740,100 +2740,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
2740
2740
// rotate matching code under visitSelect and visitTrunc?
2741
2741
unsigned Width = Or.getType ()->getScalarSizeInBits ();
2742
2742
2743
- // First, find an or'd pair of opposite shifts:
2744
- // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2745
- BinaryOperator *Or0, *Or1;
2746
- if (!match (Or.getOperand (0 ), m_BinOp (Or0)) ||
2747
- !match (Or.getOperand (1 ), m_BinOp (Or1)))
2748
- return nullptr ;
2749
-
2750
- Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2751
- if (!match (Or0, m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2752
- !match (Or1, m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2753
- Or0->getOpcode () == Or1->getOpcode ())
2743
+ Instruction *Or0, *Or1;
2744
+ if (!match (Or.getOperand (0 ), m_Instruction (Or0)) ||
2745
+ !match (Or.getOperand (1 ), m_Instruction (Or1)))
2754
2746
return nullptr ;
2755
2747
2756
- // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2757
- if (Or0->getOpcode () == BinaryOperator::LShr) {
2758
- std::swap (Or0, Or1);
2759
- std::swap (ShVal0, ShVal1);
2760
- std::swap (ShAmt0, ShAmt1);
2761
- }
2762
- assert (Or0->getOpcode () == BinaryOperator::Shl &&
2763
- Or1->getOpcode () == BinaryOperator::LShr &&
2764
- " Illegal or(shift,shift) pair" );
2765
-
2766
- // Match the shift amount operands for a funnel shift pattern. This always
2767
- // matches a subtraction on the R operand.
2768
- auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2769
- // Check for constant shift amounts that sum to the bitwidth.
2770
- const APInt *LI, *RI;
2771
- if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2772
- if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2773
- return ConstantInt::get (L->getType (), *LI);
2774
-
2775
- Constant *LC, *RC;
2776
- if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2777
- match (L, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2778
- match (R, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2779
- match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2780
- return ConstantExpr::mergeUndefsWith (LC, RC);
2781
-
2782
- // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2783
- // We limit this to X < Width in case the backend re-expands the intrinsic,
2784
- // and has to reintroduce a shift modulo operation (InstCombine might remove
2785
- // it after this fold). This still doesn't guarantee that the final codegen
2786
- // will match this original pattern.
2787
- if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2788
- KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2789
- return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2790
- }
2748
+ bool IsFshl = true ; // Sub on LSHR.
2749
+ SmallVector<Value *, 3 > FShiftArgs;
2791
2750
2792
- // For non-constant cases, the following patterns currently only work for
2793
- // rotation patterns.
2794
- // TODO: Add general funnel-shift compatible patterns.
2795
- if (ShVal0 != ShVal1)
2751
+ // First, find an or'd pair of opposite shifts:
2752
+ // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2753
+ if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
2754
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2755
+ if (!match (Or0,
2756
+ m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2757
+ !match (Or1,
2758
+ m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2759
+ Or0->getOpcode () == Or1->getOpcode ())
2796
2760
return nullptr ;
2797
2761
2798
- // For non-constant cases we don't support non-pow2 shift masks.
2799
- // TODO: Is it worth matching urem as well?
2800
- if (!isPowerOf2_32 (Width))
2801
- return nullptr ;
2762
+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2763
+ if (Or0->getOpcode () == BinaryOperator::LShr) {
2764
+ std::swap (Or0, Or1);
2765
+ std::swap (ShVal0, ShVal1);
2766
+ std::swap (ShAmt0, ShAmt1);
2767
+ }
2768
+ assert (Or0->getOpcode () == BinaryOperator::Shl &&
2769
+ Or1->getOpcode () == BinaryOperator::LShr &&
2770
+ " Illegal or(shift,shift) pair" );
2771
+
2772
+ // Match the shift amount operands for a funnel shift pattern. This always
2773
+ // matches a subtraction on the R operand.
2774
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2775
+ // Check for constant shift amounts that sum to the bitwidth.
2776
+ const APInt *LI, *RI;
2777
+ if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2778
+ if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2779
+ return ConstantInt::get (L->getType (), *LI);
2780
+
2781
+ Constant *LC, *RC;
2782
+ if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2783
+ match (L,
2784
+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2785
+ match (R,
2786
+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2787
+ match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2788
+ return ConstantExpr::mergeUndefsWith (LC, RC);
2789
+
2790
+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2791
+ // We limit this to X < Width in case the backend re-expands the
2792
+ // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2793
+ // might remove it after this fold). This still doesn't guarantee that the
2794
+ // final codegen will match this original pattern.
2795
+ if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2796
+ KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2797
+ return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2798
+ }
2802
2799
2803
- // The shift amount may be masked with negation:
2804
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2805
- Value *X;
2806
- unsigned Mask = Width - 1 ;
2807
- if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2808
- match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2809
- return X;
2800
+ // For non-constant cases, the following patterns currently only work for
2801
+ // rotation patterns.
2802
+ // TODO: Add general funnel-shift compatible patterns.
2803
+ if (ShVal0 != ShVal1)
2804
+ return nullptr ;
2810
2805
2811
- // Similar to above, but the shift amount may be extended after masking,
2812
- // so return the extended value as the parameter for the intrinsic.
2813
- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2814
- match (R, m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2815
- m_SpecificInt (Mask))))
2816
- return L;
2806
+ // For non-constant cases we don't support non-pow2 shift masks.
2807
+ // TODO: Is it worth matching urem as well?
2808
+ if (!isPowerOf2_32 (Width))
2809
+ return nullptr ;
2817
2810
2818
- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2819
- match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2820
- return L;
2811
+ // The shift amount may be masked with negation:
2812
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2813
+ Value *X;
2814
+ unsigned Mask = Width - 1 ;
2815
+ if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2816
+ match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2817
+ return X;
2818
+
2819
+ // Similar to above, but the shift amount may be extended after masking,
2820
+ // so return the extended value as the parameter for the intrinsic.
2821
+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2822
+ match (R,
2823
+ m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2824
+ m_SpecificInt (Mask))))
2825
+ return L;
2826
+
2827
+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2828
+ match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2829
+ return L;
2821
2830
2822
- return nullptr ;
2823
- };
2831
+ return nullptr ;
2832
+ };
2824
2833
2825
- Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2826
- bool IsFshl = true ; // Sub on LSHR.
2827
- if (!ShAmt) {
2828
- ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2829
- IsFshl = false ; // Sub on SHL.
2834
+ Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2835
+ if (!ShAmt) {
2836
+ ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2837
+ IsFshl = false ; // Sub on SHL.
2838
+ }
2839
+ if (!ShAmt)
2840
+ return nullptr ;
2841
+
2842
+ FShiftArgs = {ShVal0, ShVal1, ShAmt};
2830
2843
}
2831
- if (!ShAmt)
2844
+
2845
+ if (FShiftArgs.empty ())
2832
2846
return nullptr ;
2833
2847
2834
2848
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
2835
2849
Function *F = Intrinsic::getDeclaration (Or.getModule (), IID, Or.getType ());
2836
- return CallInst::Create (F, {ShVal0, ShVal1, ShAmt} );
2850
+ return CallInst::Create (F, FShiftArgs );
2837
2851
}
2838
2852
2839
2853
// / Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
0 commit comments