@@ -2732,100 +2732,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
2732
2732
// rotate matching code under visitSelect and visitTrunc?
2733
2733
unsigned Width = Or.getType ()->getScalarSizeInBits ();
2734
2734
2735
- // First, find an or'd pair of opposite shifts:
2736
- // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2737
- BinaryOperator *Or0, *Or1;
2738
- if (!match (Or.getOperand (0 ), m_BinOp (Or0)) ||
2739
- !match (Or.getOperand (1 ), m_BinOp (Or1)))
2740
- return nullptr ;
2741
-
2742
- Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2743
- if (!match (Or0, m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2744
- !match (Or1, m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2745
- Or0->getOpcode () == Or1->getOpcode ())
2735
+ Instruction *Or0, *Or1;
2736
+ if (!match (Or.getOperand (0 ), m_Instruction (Or0)) ||
2737
+ !match (Or.getOperand (1 ), m_Instruction (Or1)))
2746
2738
return nullptr ;
2747
2739
2748
- // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2749
- if (Or0->getOpcode () == BinaryOperator::LShr) {
2750
- std::swap (Or0, Or1);
2751
- std::swap (ShVal0, ShVal1);
2752
- std::swap (ShAmt0, ShAmt1);
2753
- }
2754
- assert (Or0->getOpcode () == BinaryOperator::Shl &&
2755
- Or1->getOpcode () == BinaryOperator::LShr &&
2756
- " Illegal or(shift,shift) pair" );
2757
-
2758
- // Match the shift amount operands for a funnel shift pattern. This always
2759
- // matches a subtraction on the R operand.
2760
- auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2761
- // Check for constant shift amounts that sum to the bitwidth.
2762
- const APInt *LI, *RI;
2763
- if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2764
- if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2765
- return ConstantInt::get (L->getType (), *LI);
2766
-
2767
- Constant *LC, *RC;
2768
- if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2769
- match (L, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2770
- match (R, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2771
- match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2772
- return ConstantExpr::mergeUndefsWith (LC, RC);
2773
-
2774
- // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2775
- // We limit this to X < Width in case the backend re-expands the intrinsic,
2776
- // and has to reintroduce a shift modulo operation (InstCombine might remove
2777
- // it after this fold). This still doesn't guarantee that the final codegen
2778
- // will match this original pattern.
2779
- if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2780
- KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2781
- return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2782
- }
2740
+ bool IsFshl = true ; // Sub on LSHR.
2741
+ SmallVector<Value *, 3 > FShiftArgs;
2783
2742
2784
- // For non-constant cases, the following patterns currently only work for
2785
- // rotation patterns.
2786
- // TODO: Add general funnel-shift compatible patterns.
2787
- if (ShVal0 != ShVal1)
2743
+ // First, find an or'd pair of opposite shifts:
2744
+ // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2745
+ if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
2746
+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2747
+ if (!match (Or0,
2748
+ m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2749
+ !match (Or1,
2750
+ m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2751
+ Or0->getOpcode () == Or1->getOpcode ())
2788
2752
return nullptr ;
2789
2753
2790
- // For non-constant cases we don't support non-pow2 shift masks.
2791
- // TODO: Is it worth matching urem as well?
2792
- if (!isPowerOf2_32 (Width))
2793
- return nullptr ;
2754
+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2755
+ if (Or0->getOpcode () == BinaryOperator::LShr) {
2756
+ std::swap (Or0, Or1);
2757
+ std::swap (ShVal0, ShVal1);
2758
+ std::swap (ShAmt0, ShAmt1);
2759
+ }
2760
+ assert (Or0->getOpcode () == BinaryOperator::Shl &&
2761
+ Or1->getOpcode () == BinaryOperator::LShr &&
2762
+ " Illegal or(shift,shift) pair" );
2763
+
2764
+ // Match the shift amount operands for a funnel shift pattern. This always
2765
+ // matches a subtraction on the R operand.
2766
+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2767
+ // Check for constant shift amounts that sum to the bitwidth.
2768
+ const APInt *LI, *RI;
2769
+ if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2770
+ if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2771
+ return ConstantInt::get (L->getType (), *LI);
2772
+
2773
+ Constant *LC, *RC;
2774
+ if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2775
+ match (L,
2776
+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2777
+ match (R,
2778
+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2779
+ match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2780
+ return ConstantExpr::mergeUndefsWith (LC, RC);
2781
+
2782
+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2783
+ // We limit this to X < Width in case the backend re-expands the
2784
+ // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2785
+ // might remove it after this fold). This still doesn't guarantee that the
2786
+ // final codegen will match this original pattern.
2787
+ if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2788
+ KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2789
+ return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2790
+ }
2794
2791
2795
- // The shift amount may be masked with negation:
2796
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2797
- Value *X;
2798
- unsigned Mask = Width - 1 ;
2799
- if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2800
- match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2801
- return X;
2792
+ // For non-constant cases, the following patterns currently only work for
2793
+ // rotation patterns.
2794
+ // TODO: Add general funnel-shift compatible patterns.
2795
+ if (ShVal0 != ShVal1)
2796
+ return nullptr ;
2802
2797
2803
- // Similar to above, but the shift amount may be extended after masking,
2804
- // so return the extended value as the parameter for the intrinsic.
2805
- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2806
- match (R, m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2807
- m_SpecificInt (Mask))))
2808
- return L;
2798
+ // For non-constant cases we don't support non-pow2 shift masks.
2799
+ // TODO: Is it worth matching urem as well?
2800
+ if (!isPowerOf2_32 (Width))
2801
+ return nullptr ;
2809
2802
2810
- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2811
- match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2812
- return L;
2803
+ // The shift amount may be masked with negation:
2804
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2805
+ Value *X;
2806
+ unsigned Mask = Width - 1 ;
2807
+ if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2808
+ match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2809
+ return X;
2810
+
2811
+ // Similar to above, but the shift amount may be extended after masking,
2812
+ // so return the extended value as the parameter for the intrinsic.
2813
+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2814
+ match (R,
2815
+ m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2816
+ m_SpecificInt (Mask))))
2817
+ return L;
2818
+
2819
+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2820
+ match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2821
+ return L;
2813
2822
2814
- return nullptr ;
2815
- };
2823
+ return nullptr ;
2824
+ };
2816
2825
2817
- Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2818
- bool IsFshl = true ; // Sub on LSHR.
2819
- if (!ShAmt) {
2820
- ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2821
- IsFshl = false ; // Sub on SHL.
2826
+ Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2827
+ if (!ShAmt) {
2828
+ ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2829
+ IsFshl = false ; // Sub on SHL.
2830
+ }
2831
+ if (!ShAmt)
2832
+ return nullptr ;
2833
+
2834
+ FShiftArgs = {ShVal0, ShVal1, ShAmt};
2822
2835
}
2823
- if (!ShAmt)
2836
+
2837
+ if (FShiftArgs.empty ())
2824
2838
return nullptr ;
2825
2839
2826
2840
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
2827
2841
Function *F = Intrinsic::getDeclaration (Or.getModule (), IID, Or.getType ());
2828
- return CallInst::Create (F, {ShVal0, ShVal1, ShAmt} );
2842
+ return CallInst::Create (F, FShiftArgs );
2829
2843
}
2830
2844
2831
2845
// / Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
0 commit comments