Skip to content

Commit edecb60

Browse files
committed
Reapply "AMDGPU: Drop and auto-upgrade llvm.amdgcn.ldexp to llvm.ldexp"
This reverts commit d9333e3.
1 parent 0006184 commit edecb60

File tree

11 files changed

+41
-675
lines changed

11 files changed

+41
-675
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,6 @@ def int_amdgcn_rsq_legacy : ClangBuiltin<"__builtin_amdgcn_rsq_legacy">,
362362
def int_amdgcn_rsq_clamp : DefaultAttrsIntrinsic<
363363
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
364364

365-
// For int_amdgcn_ldexp_f16, only the low 16 bits of the i32 src1 operand will used.
366-
def int_amdgcn_ldexp : DefaultAttrsIntrinsic<
367-
[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
368-
[IntrNoMem, IntrSpeculatable]
369-
>;
370-
371365
def int_amdgcn_frexp_mant : DefaultAttrsIntrinsic<
372366
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
373367
>;

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15641564
case Intrinsic::amdgcn_fmul_legacy:
15651565
case Intrinsic::amdgcn_fma_legacy:
15661566
case Intrinsic::amdgcn_fract:
1567-
case Intrinsic::amdgcn_ldexp:
15681567
case Intrinsic::amdgcn_sin:
15691568
// The intrinsics below depend on rounding mode in MXCSR.
15701569
case Intrinsic::x86_sse_cvtss2si:
@@ -2669,16 +2668,6 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
26692668
Ty->getContext(),
26702669
APFloat((double)std::pow(Op1V.convertToDouble(),
26712670
(int)Op2C->getZExtValue())));
2672-
2673-
if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
2674-
// FIXME: Should flush denorms depending on FP mode, but that's ignored
2675-
// everywhere else.
2676-
2677-
// scalbn is equivalent to ldexp with float radix 2
2678-
APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
2679-
APFloat::rmNearestTiesToEven);
2680-
return ConstantFP::get(Ty->getContext(), Result);
2681-
}
26822671
}
26832672
return nullptr;
26842673
}

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
926926
NewFn = nullptr;
927927
return true;
928928
}
929+
930+
if (Name.startswith("ldexp.")) {
931+
// Target specific intrinsic became redundant
932+
NewFn = Intrinsic::getDeclaration(
933+
F->getParent(), Intrinsic::ldexp,
934+
{F->getReturnType(), F->getArg(1)->getType()});
935+
return true;
936+
}
929937
}
930938

931939
break;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3745,8 +3745,7 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
37453745
case Intrinsic::amdgcn_rsq:
37463746
case Intrinsic::amdgcn_rcp_legacy:
37473747
case Intrinsic::amdgcn_rsq_legacy:
3748-
case Intrinsic::amdgcn_rsq_clamp:
3749-
case Intrinsic::amdgcn_ldexp: {
3748+
case Intrinsic::amdgcn_rsq_clamp: {
37503749
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
37513750
SDValue Src = N->getOperand(1);
37523751
return Src.isUndef() ? Src : SDValue();

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,50 +1048,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
10481048

10491049
break;
10501050
}
1051-
case Intrinsic::amdgcn_ldexp: {
1052-
// FIXME: This doesn't introduce new instructions and belongs in
1053-
// InstructionSimplify.
1054-
Type *Ty = II.getType();
1055-
Value *Op0 = II.getArgOperand(0);
1056-
Value *Op1 = II.getArgOperand(1);
1057-
1058-
// Folding undef to qnan is safe regardless of the FP mode.
1059-
if (isa<UndefValue>(Op0)) {
1060-
auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
1061-
return IC.replaceInstUsesWith(II, QNaN);
1062-
}
1063-
1064-
const APFloat *C = nullptr;
1065-
match(Op0, PatternMatch::m_APFloat(C));
1066-
1067-
// FIXME: Should flush denorms depending on FP mode, but that's ignored
1068-
// everywhere else.
1069-
//
1070-
// These cases should be safe, even with strictfp.
1071-
// ldexp(0.0, x) -> 0.0
1072-
// ldexp(-0.0, x) -> -0.0
1073-
// ldexp(inf, x) -> inf
1074-
// ldexp(-inf, x) -> -inf
1075-
if (C && (C->isZero() || C->isInfinity())) {
1076-
return IC.replaceInstUsesWith(II, Op0);
1077-
}
1078-
1079-
// With strictfp, be more careful about possibly needing to flush denormals
1080-
// or not, and snan behavior depends on ieee_mode.
1081-
if (II.isStrictFP())
1082-
break;
1083-
1084-
if (C && C->isNaN())
1085-
return IC.replaceInstUsesWith(II, ConstantFP::get(Ty, C->makeQuiet()));
1086-
1087-
// ldexp(x, 0) -> x
1088-
// ldexp(x, undef) -> x
1089-
if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
1090-
return IC.replaceInstUsesWith(II, Op0);
1091-
}
1092-
1093-
break;
1094-
}
10951051
case Intrinsic::amdgcn_fmul_legacy: {
10961052
Value *Op0 = II.getArgOperand(0);
10971053
Value *Op1 = II.getArgOperand(1);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7459,9 +7459,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
74597459

74607460
return emitRemovedIntrinsicError(DAG, DL, VT);
74617461
}
7462-
case Intrinsic::amdgcn_ldexp:
7463-
return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2));
7464-
74657462
case Intrinsic::amdgcn_fract:
74667463
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
74677464

@@ -11619,7 +11616,6 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
1161911616
case Intrinsic::amdgcn_div_fmas:
1162011617
case Intrinsic::amdgcn_div_fixup:
1162111618
case Intrinsic::amdgcn_fract:
11622-
case Intrinsic::amdgcn_ldexp:
1162311619
case Intrinsic::amdgcn_cvt_pkrtz:
1162411620
case Intrinsic::amdgcn_cubeid:
1162511621
case Intrinsic::amdgcn_cubema:

llvm/test/Bitcode/amdgcn-ldexp.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
define float @f32(float %a, i32 %b) {
4+
; CHECK: %call = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
5+
; CHECK-NOT: amdgcn.ldexp
6+
%call = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b)
7+
ret float %call
8+
}
9+
10+
define double @f64(double %a, i32 %b) {
11+
; CHECK: %call = call double @llvm.ldexp.f64.i32(double %a, i32 %b)
12+
; CHECK-NOT: amdgcn.ldexp
13+
%call = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b)
14+
ret double %call
15+
}
16+
17+
define half @f16(half %a, i32 %b) {
18+
; CHECK: %call = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
19+
; CHECK-NOT: amdgcn.ldexp
20+
%call = call half @llvm.amdgcn.ldexp.f16(half %a, i32 %b)
21+
ret half %call
22+
}
23+
24+
declare half @llvm.amdgcn.ldexp.f16(half, i32)
25+
declare float @llvm.amdgcn.ldexp.f32(float, i32)
26+
declare double @llvm.amdgcn.ldexp.f64(double, i32)
27+
; CHECK: declare half @llvm.ldexp.f16.i32(half, i32)
28+
; CHECK: declare float @llvm.ldexp.f32.i32(float, i32)
29+
; CHECK: declare double @llvm.ldexp.f64.i32(double, i32)
30+
; CHECK-NOT: amdgcn.ldexp

llvm/test/CodeGen/AMDGPU/known-never-snan.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ define float @v_test_known_not_snan_ldexp_input_fmed3_r_i_i_f32(float %a, i32 %b
516516
; GCN-NEXT: v_ldexp_f32 v0, v0, v1
517517
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
518518
; GCN-NEXT: s_setpc_b64 s[30:31]
519-
%known.not.snan = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b)
519+
%known.not.snan = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
520520
%max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0)
521521
%med = call float @llvm.minnum.f32(float %max, float 4.0)
522522
ret float %med
@@ -658,7 +658,7 @@ declare float @llvm.maxnum.f32(float, float) #1
658658
declare float @llvm.copysign.f32(float, float) #1
659659
declare float @llvm.fma.f32(float, float, float) #1
660660
declare float @llvm.fmuladd.f32(float, float, float) #1
661-
declare float @llvm.amdgcn.ldexp.f32(float, i32) #1
661+
declare float @llvm.ldexp.f32.i32(float, i32) #1
662662
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
663663
declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
664664
declare float @llvm.amdgcn.frexp.mant.f32(float) #1

0 commit comments

Comments
 (0)