Reapply "AMDGPU: Drop and auto-upgrade llvm.amdgcn.ldexp to llvm.ldexp"

arsenm · arsenm · commit edecb6048148 · 2023-09-13T08:38:48.000+03:00
This reverts commit d9333e3.
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -362,12 +362,6 @@ def int_amdgcn_rsq_legacy :  ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
 def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
 
-// For int_amdgcn_ldexp_f16, only the low 16 bits of the i32 src1 operand will used.
-def int_amdgcn_ldexp : DefaultAttrsIntrinsic<
-  [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
->;
-
 def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1564,7 +1564,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::amdgcn_fmul_legacy:
   case Intrinsic::amdgcn_fma_legacy:
   case Intrinsic::amdgcn_fract:
-  case Intrinsic::amdgcn_ldexp:
   case Intrinsic::amdgcn_sin:
   // The intrinsics below depend on rounding mode in MXCSR.
   case Intrinsic::x86_sse_cvtss2si:
@@ -2669,16 +2668,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
             Ty->getContext(),
             APFloat((double)std::pow(Op1V.convertToDouble(),
                                      (int)Op2C->getZExtValue())));
-
-      if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
-        // FIXME: Should flush denorms depending on FP mode, but that's ignored
-        // everywhere else.
-
-        // scalbn is equivalent to ldexp with float radix 2
-        APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
-                                APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(Ty->getContext(), Result);
-      }
     }
     return nullptr;
   }
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
@@ -926,6 +926,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         NewFn = nullptr;
         return true;
       }
+
+      if (Name.startswith("ldexp.")) {
+        // Target specific intrinsic became redundant
+        NewFn = Intrinsic::getDeclaration(
+          F->getParent(), Intrinsic::ldexp,
+          {F->getReturnType(), F->getArg(1)->getType()});
+        return true;
+      }
     }
 
     break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3745,8 +3745,7 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
   case Intrinsic::amdgcn_rsq:
   case Intrinsic::amdgcn_rcp_legacy:
   case Intrinsic::amdgcn_rsq_legacy:
-  case Intrinsic::amdgcn_rsq_clamp:
-  case Intrinsic::amdgcn_ldexp: {
+  case Intrinsic::amdgcn_rsq_clamp: {
     // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
     SDValue Src = N->getOperand(1);
     return Src.isUndef() ? Src : SDValue();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1048,50 +1048,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
     break;
   }
-  case Intrinsic::amdgcn_ldexp: {
-    // FIXME: This doesn't introduce new instructions and belongs in
-    // InstructionSimplify.
-    Type *Ty = II.getType();
-    Value *Op0 = II.getArgOperand(0);
-    Value *Op1 = II.getArgOperand(1);
-
-    // Folding undef to qnan is safe regardless of the FP mode.
-    if (isa<UndefValue>(Op0)) {
-      auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
-      return IC.replaceInstUsesWith(II, QNaN);
-    }
-
-    const APFloat *C = nullptr;
-    match(Op0, PatternMatch::m_APFloat(C));
-
-    // FIXME: Should flush denorms depending on FP mode, but that's ignored
-    // everywhere else.
-    //
-    // These cases should be safe, even with strictfp.
-    // ldexp(0.0, x) -> 0.0
-    // ldexp(-0.0, x) -> -0.0
-    // ldexp(inf, x) -> inf
-    // ldexp(-inf, x) -> -inf
-    if (C && (C->isZero() || C->isInfinity())) {
-      return IC.replaceInstUsesWith(II, Op0);
-    }
-
-    // With strictfp, be more careful about possibly needing to flush denormals
-    // or not, and snan behavior depends on ieee_mode.
-    if (II.isStrictFP())
-      break;
-
-    if (C && C->isNaN())
-      return IC.replaceInstUsesWith(II, ConstantFP::get(Ty, C->makeQuiet()));
-
-    // ldexp(x, 0) -> x
-    // ldexp(x, undef) -> x
-    if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
-      return IC.replaceInstUsesWith(II, Op0);
-    }
-
-    break;
-  }
   case Intrinsic::amdgcn_fmul_legacy: {
     Value *Op0 = II.getArgOperand(0);
     Value *Op1 = II.getArgOperand(1);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7459,9 +7459,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     return emitRemovedIntrinsicError(DAG, DL, VT);
   }
-  case Intrinsic::amdgcn_ldexp:
-    return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2));
-
   case Intrinsic::amdgcn_fract:
     return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
 
@@ -11619,7 +11616,6 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
     case Intrinsic::amdgcn_div_fmas:
     case Intrinsic::amdgcn_div_fixup:
     case Intrinsic::amdgcn_fract:
-    case Intrinsic::amdgcn_ldexp:
     case Intrinsic::amdgcn_cvt_pkrtz:
     case Intrinsic::amdgcn_cubeid:
     case Intrinsic::amdgcn_cubema:
diff --git a/llvm/test/Bitcode/amdgcn-ldexp.ll b/llvm/test/Bitcode/amdgcn-ldexp.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define float @f32(float %a, i32 %b) {
+  ; CHECK: %call = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
+  ; CHECK-NOT: amdgcn.ldexp
+  %call = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b)
+  ret float %call
+}
+
+define double @f64(double %a, i32 %b) {
+  ; CHECK: %call = call double @llvm.ldexp.f64.i32(double %a, i32 %b)
+  ; CHECK-NOT: amdgcn.ldexp
+  %call = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b)
+  ret double %call
+}
+
+define half @f16(half %a, i32 %b) {
+  ; CHECK: %call = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
+  ; CHECK-NOT: amdgcn.ldexp
+  %call = call half @llvm.amdgcn.ldexp.f16(half %a, i32 %b)
+  ret half %call
+}
+
+declare half @llvm.amdgcn.ldexp.f16(half, i32)
+declare float @llvm.amdgcn.ldexp.f32(float, i32)
+declare double @llvm.amdgcn.ldexp.f64(double, i32)
+; CHECK: declare half @llvm.ldexp.f16.i32(half, i32)
+; CHECK: declare float @llvm.ldexp.f32.i32(float, i32)
+; CHECK: declare double @llvm.ldexp.f64.i32(double, i32)
+; CHECK-NOT: amdgcn.ldexp
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -516,7 +516,7 @@ define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b
 ; GCN-NEXT:    v_ldexp_f32 v0, v0, v1
 ; GCN-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %known.not.snan = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b)
+  %known.not.snan = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
   %max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
   %med = call float @llvm.minnum.f32(float %max, float 4.0)
   ret float %med
@@ -658,7 +658,7 @@ declare float @llvm.maxnum.f32(float, float) #1
 declare float @llvm.copysign.f32(float, float) #1
 declare float @llvm.fma.f32(float, float, float) #1
 declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.amdgcn.ldexp.f32(float, i32) #1
+declare float @llvm.ldexp.f32.i32(float, i32) #1
 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
 declare float @llvm.amdgcn.frexp.mant.f32(float) #1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll

Original file line number	Diff line number	Diff line change
`@@ -926,6 +926,14 @@ static bool UpgradeIntrinsicFunction1(Function F, Function &NewFn) {`
`926`	`926`	`NewFn = nullptr;`
`927`	`927`	`return true;`
`928`	`928`	`}`
	`929`	`+`
	`930`	`+ if (Name.startswith("ldexp.")) {`
	`931`	`+ // Target specific intrinsic became redundant`
	`932`	`+ NewFn = Intrinsic::getDeclaration(`
	`933`	`+ F->getParent(), Intrinsic::ldexp,`
	`934`	`+ {F->getReturnType(), F->getArg(1)->getType()});`
	`935`	`+ return true;`
	`936`	`+ }`
`929`	`937`	`}`
`930`	`938`
`931`	`939`	`break;`