llvm · goldsteinn · Mar 7, 2024 · Mar 7, 2024 · nikic · Mar 9, 2024
@@ -380,6 +380,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
   Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
   Instruction *foldFBinOpOfIntCasts(BinaryOperator &I);
+  // Should only be called by `foldFBinOpOfIntCasts`.
+  Instruction *foldFBinOpOfIntCastsFromSign(
+      BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
+      Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown);
   Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I);
   Instruction *narrowBinOp(TruncInst &Trunc);
   Instruction *narrowMaskedBinOp(BinaryOperator &And);

@@ -1406,41 +1406,27 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
 //        -> ({s|u}itofp (int_binop x, y))
 //    2) (fp_binop ({s|u}itofp x), FpC)
 //        -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
-Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
-  Value *IntOps[2] = {nullptr, nullptr};
-  Constant *Op1FpC = nullptr;
-
-  // Check for:
-  //    1) (binop ({s|u}itofp x), ({s|u}itofp y))
-  //    2) (binop ({s|u}itofp x), FpC)
-  if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
-      !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
-    return nullptr;
-
-  if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
-      !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
-      !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
-    return nullptr;
+//
+// Assuming the sign of the cast for x/y is `OpsFromSigned`.
+Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
+    BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
+    Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
 
   Type *FPTy = BO.getType();
   Type *IntTy = IntOps[0]->getType();
 
-  // Do we have signed casts?
-  bool OpsFromSigned = isa<SIToFPInst>(BO.getOperand(0));
-
   unsigned IntSz = IntTy->getScalarSizeInBits();
   // This is the maximum number of inuse bits by the integer where the int -> fp
   // casts are exact.
   unsigned MaxRepresentableBits =
       APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
 
-  // Cache KnownBits a bit to potentially save some analysis.
-  WithCache<const Value *> OpsKnown[2] = {IntOps[0], IntOps[1]};
-
   // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
   // checks later on.
   unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
 
+  // NB: This only comes up is OpsFromSigned is true, so there is no need to
-  // NB: This only comes up is OpsFromSigned is true, so there is no need to
+  // NB: This only comes up if OpsFromSigned is true, so there is no need to
-  // NB: This only comes up is OpsFromSigned is true, so there is no need to
+  // NB: This only comes up if OpsFromSigned is true, so there is no need to
+  // cache is between calls to `foldFBinOpOfIntCastsFromSign`.
-  // cache is between calls to `foldFBinOpOfIntCastsFromSign`.
+  // cache it between calls to `foldFBinOpOfIntCastsFromSign`.
-  // cache is between calls to `foldFBinOpOfIntCastsFromSign`.
+  // cache it between calls to `foldFBinOpOfIntCastsFromSign`.
   auto IsNonZero = [&](unsigned OpNo) -> bool {
     if (OpsKnown[OpNo].hasKnownBits() &&
         OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
@@ -1449,14 +1435,19 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
   };
 
   auto IsNonNeg = [&](unsigned OpNo) -> bool {
-    if (OpsKnown[OpNo].hasKnownBits() &&
-        OpsKnown[OpNo].getKnownBits(SQ).isNonNegative())
-      return true;
-    return isKnownNonNegative(IntOps[OpNo], SQ);
+    // NB: This matches the impl in ValueTracking, we just try to use cached
+    // knownbits here. If we ever start supporting WithCache for
+    // `isKnownNonNegative`, change this to an explicitly call.
-    // `isKnownNonNegative`, change this to an explicitly call.
+    // `isKnownNonNegative`, change this to an explicit call.
-    // `isKnownNonNegative`, change this to an explicitly call.
+    // `isKnownNonNegative`, change this to an explicit call.
+    return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
   };
 
   // Check if we know for certain that ({s|u}itofp op) is exact.
   auto IsValidPromotion = [&](unsigned OpNo) -> bool {
+    // Can we treat this operand as the desired sign?
+    if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
+        !IsNonNeg(OpNo))
+      return false;
+
     // If fp precision >= bitwidth(op) then its exact.
     // NB: This is slightly conservative for `sitofp`. For signed conversion, we
     // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
@@ -1509,13 +1500,6 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
     return nullptr;
 
   if (Op1FpC == nullptr) {
-    if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(1))) {
-      // If we have a signed + unsigned, see if we can treat both as signed
-      // (uitofp nneg x) == (sitofp nneg x).
-      if (OpsFromSigned ? !IsNonNeg(1) : !IsNonNeg(0))
-        return nullptr;
-      OpsFromSigned = true;
-    }
     if (!IsValidPromotion(1))
       return nullptr;
   }
@@ -1574,6 +1558,39 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
   return new UIToFPInst(IntBinOp, FPTy);
 }
 
+// Try to fold:
+//    1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
+//        -> ({s|u}itofp (int_binop x, y))
+//    2) (fp_binop ({s|u}itofp x), FpC)
+//        -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
+Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
+  std::array<Value *, 2> IntOps = {nullptr, nullptr};
+  Constant *Op1FpC = nullptr;
+  // Check for:
+  //    1) (binop ({s|u}itofp x), ({s|u}itofp y))
+  //    2) (binop ({s|u}itofp x), FpC)
+  if (!match(BO.getOperand(0), m_SIToFP(m_Value(IntOps[0]))) &&
+      !match(BO.getOperand(0), m_UIToFP(m_Value(IntOps[0]))))
+    return nullptr;
+
+  if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
+      !match(BO.getOperand(1), m_SIToFP(m_Value(IntOps[1]))) &&
+      !match(BO.getOperand(1), m_UIToFP(m_Value(IntOps[1]))))
+    return nullptr;
+
+  // Cache KnownBits a bit to potentially save some analysis.
+  SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
+
+  // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
+  // different constraints depending on the sign of the cast.
+  // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
+  if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
+                                                    IntOps, Op1FpC, OpsKnown))
+    return R;
+  return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
+                                      Op1FpC, OpsKnown);
+}
+
 /// A binop with a constant operand and a sign-extended boolean operand may be
 /// converted into a select of constants by applying the binary operation to
 /// the constant with the two possible values of the extended boolean (0 or -1).

diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll
@@ -6,7 +6,7 @@ define double @x(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
 ; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1
-; CHECK-NEXT:    [[P:%.*]] = sitofp i32 [[TMP1]] to double
+; CHECK-NEXT:    [[P:%.*]] = uitofp i32 [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[P]]
 ;
   %m = lshr i32 %a, 24
@@ -20,7 +20,7 @@ define double @test(i32 %a) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], 1
-; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[TMP1]] to double
+; CHECK-NEXT:    [[RES:%.*]] = uitofp i32 [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
   ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
@@ -49,7 +49,7 @@ define double @test_2(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
 ; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
-; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[TMP1]] to double
+; CHECK-NEXT:    [[RES:%.*]] = uitofp i32 [[TMP1]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
   ; Drop two highest bits to guarantee that %a + %b doesn't overflow
@@ -89,7 +89,7 @@ define float @test_3(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
 ; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1
-; CHECK-NEXT:    [[P:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    [[P:%.*]] = uitofp i32 [[TMP1]] to float
 ; CHECK-NEXT:    ret float [[P]]
 ;
   %m = lshr i32 %a, 24
@@ -104,7 +104,7 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
 ; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
-; CHECK-NEXT:    [[RES:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double>
+; CHECK-NEXT:    [[RES:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double>
 ; CHECK-NEXT:    ret <4 x double> [[RES]]
 ;
   ; Drop two highest bits to guarantee that %a + %b doesn't overflow

diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll
@@ -110,7 +110,7 @@ define half @test_ui_si_i8_add(i8 noundef %x_in, i8 noundef %y_in) {
 ; CHECK-NEXT:    [[X:%.*]] = and i8 [[X_IN:%.*]], 63
 ; CHECK-NEXT:    [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i8 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = sitofp i8 [[TMP1]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i8 %x_in, 63
@@ -125,9 +125,8 @@ define half @test_ui_si_i8_add_overflow(i8 noundef %x_in, i8 noundef %y_in) {
 ; CHECK-LABEL: @test_ui_si_i8_add_overflow(
 ; CHECK-NEXT:    [[X:%.*]] = and i8 [[X_IN:%.*]], 63
 ; CHECK-NEXT:    [[Y:%.*]] = and i8 [[Y_IN:%.*]], 65
-; CHECK-NEXT:    [[XF:%.*]] = sitofp i8 [[X]] to half
-; CHECK-NEXT:    [[YF:%.*]] = uitofp i8 [[Y]] to half
-; CHECK-NEXT:    [[R:%.*]] = fadd half [[XF]], [[YF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i8 %x_in, 63
@@ -152,9 +151,8 @@ define half @test_ui_ui_i8_sub_C(i8 noundef %x_in) {
 
 define half @test_ui_ui_i8_sub_C_fail_overflow(i8 noundef %x_in) {
 ; CHECK-LABEL: @test_ui_ui_i8_sub_C_fail_overflow(
-; CHECK-NEXT:    [[X:%.*]] = and i8 [[X_IN:%.*]], 127
-; CHECK-NEXT:    [[XF:%.*]] = uitofp i8 [[X]] to half
-; CHECK-NEXT:    [[R:%.*]] = fadd half [[XF]], 0xHD800
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X_IN:%.*]], -128
+; CHECK-NEXT:    [[R:%.*]] = sitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i8 %x_in, 127
@@ -212,8 +210,8 @@ define half @test_si_si_i8_sub_C(i8 noundef %x_in) {
 define half @test_si_si_i8_sub_C_fail_overflow(i8 noundef %x_in) {
 ; CHECK-LABEL: @test_si_si_i8_sub_C_fail_overflow(
 ; CHECK-NEXT:    [[X:%.*]] = and i8 [[X_IN:%.*]], 65
-; CHECK-NEXT:    [[XF:%.*]] = sitofp i8 [[X]] to half
-; CHECK-NEXT:    [[R:%.*]] = fadd half [[XF]], 0xH5400
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[X]], 64
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i8 %x_in, 65
@@ -242,9 +240,8 @@ define half @test_ui_si_i8_sub_fail_maybe_sign(i8 noundef %x_in, i8 noundef %y_i
 ; CHECK-LABEL: @test_ui_si_i8_sub_fail_maybe_sign(
 ; CHECK-NEXT:    [[X:%.*]] = or i8 [[X_IN:%.*]], 64
 ; CHECK-NEXT:    [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63
-; CHECK-NEXT:    [[XF:%.*]] = uitofp i8 [[X]] to half
-; CHECK-NEXT:    [[YF:%.*]] = sitofp i8 [[Y]] to half
-; CHECK-NEXT:    [[R:%.*]] = fsub half [[XF]], [[YF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw nsw i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = or i8 %x_in, 64
@@ -273,8 +270,8 @@ define half @test_ui_ui_i8_mul(i8 noundef %x_in, i8 noundef %y_in) {
 
 define half @test_ui_ui_i8_mul_C(i8 noundef %x_in) {
 ; CHECK-LABEL: @test_ui_ui_i8_mul_C(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X_IN:%.*]], 4
-; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
+; CHECK-NEXT:    [[X:%.*]] = shl i8 [[X_IN:%.*]], 4
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[X]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i8 %x_in, 15
@@ -368,7 +365,7 @@ define half @test_ui_si_i8_mul(i8 noundef %x_in, i8 noundef %y_in) {
 ; CHECK-NEXT:    [[YY:%.*]] = and i8 [[Y_IN:%.*]], 7
 ; CHECK-NEXT:    [[Y:%.*]] = add nuw nsw i8 [[YY]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = sitofp i8 [[TMP1]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %xx = and i8 %x_in, 6
@@ -386,9 +383,8 @@ define half @test_ui_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i
 ; CHECK-NEXT:    [[XX:%.*]] = and i8 [[X_IN:%.*]], 7
 ; CHECK-NEXT:    [[X:%.*]] = add nuw nsw i8 [[XX]], 1
 ; CHECK-NEXT:    [[Y:%.*]] = and i8 [[Y_IN:%.*]], 7
-; CHECK-NEXT:    [[XF:%.*]] = sitofp i8 [[X]] to half
-; CHECK-NEXT:    [[YF:%.*]] = uitofp i8 [[Y]] to half
-; CHECK-NEXT:    [[R:%.*]] = fmul half [[XF]], [[YF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = uitofp i8 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %xx = and i8 %x_in, 7
@@ -694,7 +690,7 @@ define half @test_ui_si_i16_mul(i16 noundef %x_in, i16 noundef %y_in) {
 ; CHECK-NEXT:    [[YY:%.*]] = and i16 [[Y_IN:%.*]], 126
 ; CHECK-NEXT:    [[Y:%.*]] = or disjoint i16 [[YY]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i16 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = sitofp i16 [[TMP1]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i16 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %xx = and i16 %x_in, 126
@@ -807,9 +803,8 @@ define half @test_ui_ui_i12_sub_fail_overflow(i12 noundef %x_in, i12 noundef %y_
 ; CHECK-LABEL: @test_ui_ui_i12_sub_fail_overflow(
 ; CHECK-NEXT:    [[X:%.*]] = and i12 [[X_IN:%.*]], 1023
 ; CHECK-NEXT:    [[Y:%.*]] = and i12 [[Y_IN:%.*]], 2047
-; CHECK-NEXT:    [[XF:%.*]] = uitofp i12 [[X]] to half
-; CHECK-NEXT:    [[YF:%.*]] = uitofp i12 [[Y]] to half
-; CHECK-NEXT:    [[R:%.*]] = fsub half [[XF]], [[YF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i12 [[X]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = sitofp i12 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %x = and i12 %x_in, 1023
@@ -984,7 +979,7 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) {
 ; CHECK-NEXT:    [[YY:%.*]] = and i12 [[Y_IN:%.*]], 30
 ; CHECK-NEXT:    [[Y:%.*]] = or disjoint i12 [[YY]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = sitofp i12 [[TMP1]] to half
+; CHECK-NEXT:    [[R:%.*]] = uitofp i12 [[TMP1]] to half
 ; CHECK-NEXT:    ret half [[R]]
 ;
   %xx = and i12 %x_in, 31
@@ -996,3 +991,16 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) {
   %r = fmul half %xf, %yf
   ret half %r
 }
+
+define float @test_ui_add_with_signed_constant(i32 %shr.i) {
+; CHECK-LABEL: @test_ui_add_with_signed_constant(
+; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[SHR_I:%.*]], 32767
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[AND_I]], -16383
+; CHECK-NEXT:    [[ADD:%.*]] = sitofp i32 [[TMP1]] to float
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %and.i = and i32 %shr.i, 32767
+  %sub = uitofp i32 %and.i to float
+  %add = fadd float %sub, -16383.0
+  ret float %add
+}