InstCombine: Fold ldexp(ldexp(x, a), b) -> ldexp(x, a + b)

arsenm · arsenm · commit 4f9aad964f4a · 2023-07-07T08:15:09.000-04:00
The problem here is overflow or underflow which would have occurred in
the inner operation, which the exponent offsetting avoids. We can do
this if we know the two exponents are in the same direction, or
reassoc flags allow unsafe reassociates.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1026,6 +1026,19 @@ static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
       ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
 }
 
+/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
+static bool signBitMustBeTheSame(Value *Op0, Value *Op1, Instruction *CxtI,
+                                 const DataLayout &DL, AssumptionCache *AC,
+                                 DominatorTree *DT) {
+  std::optional<bool> Known1 = getKnownSign(Op1, CxtI, DL, AC, DT);
+  if (!Known1)
+    return false;
+  std::optional<bool> Known0 = getKnownSign(Op0, CxtI, DL, AC, DT);
+  if (!Known0)
+    return false;
+  return *Known0 == *Known1;
+}
+
 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
 /// can trigger other combines.
 static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
@@ -2358,6 +2371,42 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     }
     break;
   }
+  case Intrinsic::ldexp: {
+    // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
+    //
+    // The danger is if the first ldexp would overflow to infinity or underflow
+    // to zero, but the combined exponent avoids it. We ignore this with
+    // reassoc.
+    //
+    // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
+    // it would just double down on the overflow/underflow which would occur
+    // anyway.
+    //
+    // TODO: Could do better if we had range tracking for the input value
+    // exponent. Also could broaden sign check to cover == 0 case.
+    Value *Src = II->getArgOperand(0);
+    Value *Exp = II->getArgOperand(1);
+    Value *InnerSrc;
+    Value *InnerExp;
+    if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ldexp>(
+                       m_Value(InnerSrc), m_Value(InnerExp)))) &&
+        Exp->getType() == InnerExp->getType()) {
+      FastMathFlags FMF = II->getFastMathFlags();
+      FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
+
+      if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
+          signBitMustBeTheSame(Exp, InnerExp, II, DL, &AC, &DT)) {
+        // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
+        // width.
+        Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
+        II->setArgOperand(1, NewExp);
+        II->setFastMathFlags(InnerFlags); // Or the inner flags.
+        return replaceOperand(*II, 0, InnerSrc);
+      }
+    }
+
+    break;
+  }
   case Intrinsic::ptrauth_auth:
   case Intrinsic::ptrauth_resign: {
     // (sign|resign) + (auth|resign) can be folded by omitting the middle
diff --git a/llvm/test/Transforms/InstCombine/ldexp.ll b/llvm/test/Transforms/InstCombine/ldexp.ll
@@ -368,8 +368,8 @@ define float @ldexp_ldexp_reassoc(float %x, i32 %a, i32 %b) {
 define float @ldexp_reassoc_ldexp_reassoc(float %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: define float @ldexp_reassoc_ldexp_reassoc
 ; CHECK-SAME: (float [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 [[A]])
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc float @llvm.ldexp.f32.i32(float %x, i32 %a)
@@ -381,8 +381,8 @@ define float @ldexp_reassoc_ldexp_reassoc(float %x, i32 %a, i32 %b) {
 define float @ldexp_reassoc_ldexp_reassoc_preserve_flags(float %x, i32 %a, i32 %b) {
 ; CHECK-LABEL: define float @ldexp_reassoc_ldexp_reassoc_preserve_flags
 ; CHECK-SAME: (float [[X:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc ninf float @llvm.ldexp.f32.i32(float [[X]], i32 [[A]])
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc nnan float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc nnan ninf float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc ninf float @llvm.ldexp.f32.i32(float %x, i32 %a)
@@ -393,8 +393,8 @@ define float @ldexp_reassoc_ldexp_reassoc_preserve_flags(float %x, i32 %a, i32 %
 define <2 x float> @ldexp_reassoc_ldexp_reassoc_vec(<2 x float> %x, <2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: define <2 x float> @ldexp_reassoc_ldexp_reassoc_vec
 ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> [[A]])
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[LDEXP0]], <2 x i32> [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A]], [[B]]
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[LDEXP1]]
 ;
   %ldexp0 = call reassoc <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %a)
@@ -432,8 +432,7 @@ define float @ldexp_ldexp_different_exp_type(float %x, i32 %a, i64 %b) {
 define float @ldexp_ldexp_constants(float %x) {
 ; CHECK-LABEL: define float @ldexp_ldexp_constants
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 8)
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 24)
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 32)
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc float @llvm.ldexp.f32.i32(float %x, i32 8)
@@ -444,8 +443,7 @@ define float @ldexp_ldexp_constants(float %x) {
 define float @ldexp_ldexp_opposite_constants(float %x) {
 ; CHECK-LABEL: define float @ldexp_ldexp_opposite_constants
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 8)
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 -8)
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 0)
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc float @llvm.ldexp.f32.i32(float %x, i32 8)
@@ -456,9 +454,7 @@ define float @ldexp_ldexp_opposite_constants(float %x) {
 define float @ldexp_ldexp_negated_variable_reassoc(float %x, i32 %a) {
 ; CHECK-LABEL: define float @ldexp_ldexp_negated_variable_reassoc
 ; CHECK-SAME: (float [[X:%.*]], i32 [[A:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 [[A]])
-; CHECK-NEXT:    [[NEG_A:%.*]] = sub i32 0, [[A]]
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[NEG_A]])
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 0)
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc float @llvm.ldexp.f32.i32(float %x, i32 %a)
@@ -514,8 +510,8 @@ define float @ldexp_ldexp_both_exp_known_positive(float %x, i32 %a.arg, i32 %b.a
 ; CHECK-SAME: (float [[X:%.*]], i32 [[A_ARG:%.*]], i32 [[B_ARG:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = and i32 [[A_ARG]], 127
 ; CHECK-NEXT:    [[B:%.*]] = and i32 [[B_ARG]], 127
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[A]])
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[A]], [[B]]
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %a = and i32 %a.arg, 127
@@ -530,8 +526,8 @@ define float @ldexp_ldexp_both_exp_known_negative(float %x, ptr %a.ptr, ptr %b.p
 ; CHECK-SAME: (float [[X:%.*]], ptr [[A_PTR:%.*]], ptr [[B_PTR:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[A_PTR]], align 4, !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[B_PTR]], align 4, !range [[RNG0]]
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[A]])
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[A]], [[B]]
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP1]])
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %a = load i32, ptr %a.ptr, !range !0
@@ -576,8 +572,7 @@ define float @ldexp_ldexp_exp_known_positive_and_negative(float %x, ptr %a.ptr,
 define float @ldexp_reassoc_ldexp_reassoc_0(float %x, i32 %y) {
 ; CHECK-LABEL: define float @ldexp_reassoc_ldexp_reassoc_0
 ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT:    [[LDEXP0:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 0)
-; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[LDEXP0]], i32 [[Y]])
+; CHECK-NEXT:    [[LDEXP1:%.*]] = call reassoc float @llvm.ldexp.f32.i32(float [[X]], i32 [[Y]])
 ; CHECK-NEXT:    ret float [[LDEXP1]]
 ;
   %ldexp0 = call reassoc float @llvm.ldexp.f32.i32(float %x, i32 0)