[InstCombine] reduce right-shift-of-left-shifted constant via demanded bits

rotateright · rotateright · commit 82040d414b3c · 2022-06-07T13:28:18.000-04:00
If we don't demand high bits (zeros) and it is valid to pre-shift a constant: (C2 << X) >> C1 --> (C2 >> C1) << X https://alive2.llvm.org/ce/z/P3dWDW There are a variety of related patterns, but I haven't found a single solution that gets all of the motivating examples - so pulling this piece out of D126617 along with more tests. We should also handle the case where we shift-right followed by shift-left, but I'll make that a follow-on patch assuming this one is ok. It seems likely that we would want to add this to the SDAG version of the code too to keep it on par with IR. Differential Revision: https://reviews.llvm.org/D127122
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -630,6 +630,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
             ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
         if (SignBits >= NumHiDemandedBits)
           return I->getOperand(0);
+
+        // If we can pre-shift a left-shifted constant to the right without
+        // losing any low bits (we already know we don't demand the high bits),
+        // then eliminate the right-shift:
+        // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
+        Value *X;
+        Constant *C;
+        if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
+          Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
+          Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);
+          if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {
+            Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
+            return InsertNewInstWith(Shl, *I);
+          }
+        }
       }
 
       // Unsigned shift right.
diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -421,11 +421,12 @@ define i32 @shl_lshr_constants(i32 %x) {
   ret i32 %r
 }
 
+; Pre-shift a constant to eliminate lshr.
+
 define i8 @shl_lshr_demand1(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand1(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[LSHR]], -32
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 5, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP1]], -32
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %shl = shl i8 40, %x ; 0b0010_1000
@@ -434,11 +435,13 @@ define i8 @shl_lshr_demand1(i8 %x) {
   ret i8 %r
 }
 
+; Pre-shift a constant to eliminate disguised lshr.
+
 define i8 @shl_ashr_demand2(i8 %x) {
 ; CHECK-LABEL: @shl_ashr_demand2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
 ; CHECK-NEXT:    call void @use8(i8 [[SHL]])
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i8 [[SHL]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 5, [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP1]], -32
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
@@ -449,6 +452,8 @@ define i8 @shl_ashr_demand2(i8 %x) {
   ret i8 %r
 }
 
+; It is not safe to pre-shift because we demand an extra high bit.
+
 define i8 @shl_lshr_demand3(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 40, [[X:%.*]]
@@ -462,6 +467,8 @@ define i8 @shl_lshr_demand3(i8 %x) {
   ret i8 %r
 }
 
+; It is not valid to pre-shift because we lose the low bit of 44.
+
 define i8 @shl_lshr_demand4(i8 %x) {
 ; CHECK-LABEL: @shl_lshr_demand4(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 44, [[X:%.*]]
@@ -475,11 +482,12 @@ define i8 @shl_lshr_demand4(i8 %x) {
   ret i8 %r
 }
 
+; Splat vectors work too, and we don't care what instruction reduces demand for high bits.
+
 define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <2 x i8> [[SHL]], <i8 2, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 37>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
   %shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100
@@ -488,6 +496,8 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow undef/poison elements for this transform.
+
 define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_left(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]]
@@ -501,6 +511,8 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow undef/poison elements for this transform.
+
 define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_right(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -514,6 +526,8 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; TODO: allow non-splat vector constants.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -527,11 +541,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; non-splat shl constant is ok.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -112>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 2, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 36>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
   %shl = shl <2 x i8> <i8 148, i8 144>, %x ; 0b1001_0100, 0b1001_0000
@@ -540,6 +555,8 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; This is possible, but may require significant changes to the demanded bits framework.
+
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -104, i8 -108>, [[X:%.*]]
@@ -553,11 +570,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
   ret <2 x i6> %r
 }
 
+; 'and' can reduce demand for high bits too.
+
 define i16 @shl_lshr_demand6(i16 %x) {
 ; CHECK-LABEL: @shl_lshr_demand6(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i16 -32624, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i16 [[SHL]], 4
-; CHECK-NEXT:    [[R:%.*]] = and i16 [[LSHR]], 4094
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i16 2057, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i16 [[TMP1]], 4094
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %shl = shl i16 32912, %x ; 0b1000_0000_1001_0000