Skip to content

AMDGPU: Use real copysign in fast pow #97152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,17 +1131,18 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
if (needcopysign) {
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
Type *nTy = FPOp->getType()->getWithNewType(nTyS);
unsigned size = nTy->getScalarSizeInBits();
Value *opr_n = FPOp->getOperand(1);
if (opr_n->getType()->getScalarType()->isIntegerTy())
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
else
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");

unsigned size = nTy->getScalarSizeInBits();
Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
nval = B.CreateBitCast(nval, opr0->getType());

nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
nullptr, "__pow_sign");
}

LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
Expand Down
52 changes: 23 additions & 29 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1783,7 +1783,8 @@ define float @test_pow_afn_f32_nnan_ninf__y_10(float %x) {
define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison
; CHECK-SAME: (<2 x float> [[X:%.*]]) {
; CHECK-NEXT: ret <2 x float> poison
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> poison)
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)
ret <2 x float> %pow
Expand Down Expand Up @@ -2215,10 +2216,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
%y.cast = sitofp i32 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
Expand Down Expand Up @@ -2303,10 +2303,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
%y.cast = uitofp i32 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
Expand Down Expand Up @@ -2352,10 +2351,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
%y.cast = uitofp i256 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
Expand All @@ -2375,10 +2373,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
%y.cast = sitofp i256 %y to float
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
Expand All @@ -2398,10 +2395,9 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
;
%y.cast = sitofp <2 x i32> %y to <2 x float>
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
Expand Down Expand Up @@ -2447,10 +2443,9 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
;
%y.cast = uitofp <2 x i32> %y to <2 x float>
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
Expand Down Expand Up @@ -2559,10 +2554,9 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
%y = call float @llvm.trunc.f32(float %y.arg)
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)
Expand Down
59 changes: 26 additions & 33 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
Original file line number Diff line number Diff line change
Expand Up @@ -679,10 +679,9 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y)
Expand All @@ -701,10 +700,9 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP1]])
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> %y)
Expand All @@ -724,10 +722,9 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) {
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i64 [[__YTOU]], 63
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i64 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[__EXP2]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to double
; CHECK-NEXT: ret double [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[__POW_SIGN]] to double
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn double @llvm.copysign.f64(double [[__EXP2]], double [[TMP1]])
; CHECK-NEXT: ret double [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn double @_Z4powndi(double %x, i32 %y)
Expand All @@ -747,10 +744,9 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
; CHECK-NEXT: ret <2 x double> [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[__POW_SIGN]] to <2 x double>
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x double> @llvm.copysign.v2f64(<2 x double> [[__EXP2]], <2 x double> [[TMP1]])
; CHECK-NEXT: ret <2 x double> [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn <2 x double> @_Z4pownDv2_dDv2_i(<2 x double> %x, <2 x i32> %y)
Expand All @@ -770,10 +766,9 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: ret half [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[__POW_SIGN]] to half
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn half @llvm.copysign.f16(half [[__EXP2]], half [[TMP1]])
; CHECK-NEXT: ret half [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y)
Expand All @@ -793,10 +788,9 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
; CHECK-NEXT: ret <2 x half> [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[__POW_SIGN]] to <2 x half>
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x half> @llvm.copysign.v2f16(<2 x half> [[__EXP2]], <2 x half> [[TMP1]])
; CHECK-NEXT: ret <2 x half> [[__POW_SIGN1]]
;
entry:
%call = tail call nnan ninf afn <2 x half> @_Z4pownDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y)
Expand Down Expand Up @@ -827,10 +821,9 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call fast float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]]) #[[ATTR0]]
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
entry:
%call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1
Expand All @@ -840,7 +833,8 @@ entry:
define float @test_pown_fast_f32__y_poison(float %x) {
; CHECK-LABEL: define float @test_pown_fast_f32__y_poison
; CHECK-SAME: (float [[X:%.*]]) {
; CHECK-NEXT: ret float poison
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float poison)
; CHECK-NEXT: ret float [[__EXP2]]
;
%call = tail call fast float @_Z4pownfi(float %x, i32 poison)
ret float %call
Expand Down Expand Up @@ -1073,10 +1067,9 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: ret float [[TMP3]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
; CHECK-NEXT: ret float [[__POW_SIGN1]]
;
entry:
%call = tail call afn ninf nnan float @_Z4pownfi(float %x, i32 %y)
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,9 @@ declare half @_Z4pownDhi(half, i32)
; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
; GCN-NATIVE: %0 = bitcast half %x to i16
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
; GCN-NATIVE: %3 = bitcast i16 %2 to half
; GCN-NATIVE: %1 = bitcast i16 %__pow_sign to half
; GCN-NATIVE: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %1)
; GCN-NATIVE: ret half %__pow_sign1
define half @test_pown_f16(half %x, i32 %y) {
entry:
%call = call fast half @_Z4pownDhi(half %x, i32 %y)
Expand All @@ -377,9 +377,9 @@ declare float @_Z4pownfi(float, i32)
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN: %[[r1:.+]] = bitcast i32 %__pow_sign to float
; GCN: %[[r2:.+]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand Down Expand Up @@ -413,9 +413,9 @@ entry:
; GCN: %__yeven = shl i32 %conv, 31
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN: %[[r1:.*]] = bitcast i32 %__pow_sign to float
; GCN: %[[r2:.*]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand All @@ -437,9 +437,9 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN: %1 = bitcast half %x to i16
; GCN: %__pow_sign = and i16 %1, -32768
; GCN: %2 = bitcast half %__exp2 to i16
; GCN: %3 = or disjoint i16 %__pow_sign, %2
; GCN: %4 = bitcast i16 %3 to half
; GCN: %2 = bitcast i16 %__pow_sign to half
; GCN: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %2)
; GCN: ret half %__pow_sign1
define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
ret half %powr
Expand All @@ -452,9 +452,9 @@ define half @test_pow_fast_f16__y_13(half %x) {
; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
; GCN: %2 = bitcast <2 x i16> %__pow_sign to <2 x half>
; GCN: %__pow_sign1 = tail call fast <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %2)
; GCN: ret <2 x half> %__pow_sign1
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
ret <2 x half> %powr
Expand Down
Loading