Skip to content

Commit 3810635

Browse files
committed
AMDGPU: Use real copysign in fast pow
Previously this would introduce some codegen regressions, but those have been avoided by simplifying demanded bits on copysign operations.
1 parent 1c0e722 commit 3810635

File tree

4 files changed

+68
-80
lines changed

4 files changed

+68
-80
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,17 +1131,18 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
11311131
if (needcopysign) {
11321132
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
11331133
Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1134-
unsigned size = nTy->getScalarSizeInBits();
11351134
Value *opr_n = FPOp->getOperand(1);
11361135
if (opr_n->getType()->getScalarType()->isIntegerTy())
11371136
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
11381137
else
11391138
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11401139

1140+
unsigned size = nTy->getScalarSizeInBits();
11411141
Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11421142
sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1143-
nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1144-
nval = B.CreateBitCast(nval, opr0->getType());
1143+
1144+
nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1145+
nullptr, "__pow_sign");
11451146
}
11461147

11471148
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,7 +1783,8 @@ define float @test_pow_afn_f32_nnan_ninf__y_10(float %x) {
17831783
define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {
17841784
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison
17851785
; CHECK-SAME: (<2 x float> [[X:%.*]]) {
1786-
; CHECK-NEXT: ret <2 x float> poison
1786+
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> poison)
1787+
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
17871788
;
17881789
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)
17891790
ret <2 x float> %pow
@@ -2215,10 +2216,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
22152216
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
22162217
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
22172218
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2218-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2219-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2220-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2221-
; CHECK-NEXT: ret float [[TMP5]]
2219+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2220+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2221+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
22222222
;
22232223
%y.cast = sitofp i32 %y to float
22242224
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2303,10 +2303,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
23032303
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23042304
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23052305
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2306-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2307-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2308-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2309-
; CHECK-NEXT: ret float [[TMP5]]
2306+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2307+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2308+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
23102309
;
23112310
%y.cast = uitofp i32 %y to float
23122311
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2352,10 +2351,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
23522351
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23532352
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23542353
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2355-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2356-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2357-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2358-
; CHECK-NEXT: ret float [[TMP5]]
2354+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2355+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2356+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
23592357
;
23602358
%y.cast = uitofp i256 %y to float
23612359
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2375,10 +2373,9 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
23752373
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23762374
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23772375
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2378-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2379-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2380-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2381-
; CHECK-NEXT: ret float [[TMP5]]
2376+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2377+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2378+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
23822379
;
23832380
%y.cast = sitofp i256 %y to float
23842381
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2398,10 +2395,9 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
23982395
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
23992396
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
24002397
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
2401-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2402-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
2403-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
2404-
; CHECK-NEXT: ret <2 x float> [[TMP5]]
2398+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
2399+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
2400+
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
24052401
;
24062402
%y.cast = sitofp <2 x i32> %y to <2 x float>
24072403
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
@@ -2447,10 +2443,9 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
24472443
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
24482444
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
24492445
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
2450-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2451-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
2452-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
2453-
; CHECK-NEXT: ret <2 x float> [[TMP5]]
2446+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
2447+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
2448+
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
24542449
;
24552450
%y.cast = uitofp <2 x i32> %y to <2 x float>
24562451
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
@@ -2559,10 +2554,9 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
25592554
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
25602555
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
25612556
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2562-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2563-
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2564-
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2565-
; CHECK-NEXT: ret float [[TMP5]]
2557+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2558+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2559+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
25662560
;
25672561
%y = call float @llvm.trunc.f32(float %y.arg)
25682562
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -679,10 +679,9 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
679679
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
680680
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
681681
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
682-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
683-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
684-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
685-
; CHECK-NEXT: ret float [[TMP3]]
682+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
683+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
684+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
686685
;
687686
entry:
688687
%call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y)
@@ -701,10 +700,9 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
701700
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
702701
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
703702
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
704-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
705-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
706-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
707-
; CHECK-NEXT: ret <2 x float> [[TMP3]]
703+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
704+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP1]])
705+
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
708706
;
709707
entry:
710708
%call = tail call nnan ninf afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> %y)
@@ -724,10 +722,9 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) {
724722
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i64 [[__YTOU]], 63
725723
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64
726724
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i64 [[__YEVEN]], [[TMP0]]
727-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[__EXP2]] to i64
728-
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[__POW_SIGN]], [[TMP1]]
729-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to double
730-
; CHECK-NEXT: ret double [[TMP3]]
725+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[__POW_SIGN]] to double
726+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn double @llvm.copysign.f64(double [[__EXP2]], double [[TMP1]])
727+
; CHECK-NEXT: ret double [[__POW_SIGN1]]
731728
;
732729
entry:
733730
%call = tail call nnan ninf afn double @_Z4powndi(double %x, i32 %y)
@@ -747,10 +744,9 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
747744
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
748745
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
749746
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
750-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
751-
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[__POW_SIGN]], [[TMP1]]
752-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
753-
; CHECK-NEXT: ret <2 x double> [[TMP3]]
747+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[__POW_SIGN]] to <2 x double>
748+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x double> @llvm.copysign.v2f64(<2 x double> [[__EXP2]], <2 x double> [[TMP1]])
749+
; CHECK-NEXT: ret <2 x double> [[__POW_SIGN1]]
754750
;
755751
entry:
756752
%call = tail call nnan ninf afn <2 x double> @_Z4pownDv2_dDv2_i(<2 x double> %x, <2 x i32> %y)
@@ -770,10 +766,9 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
770766
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15
771767
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
772768
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
773-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
774-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
775-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
776-
; CHECK-NEXT: ret half [[TMP3]]
769+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[__POW_SIGN]] to half
770+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn half @llvm.copysign.f16(half [[__EXP2]], half [[TMP1]])
771+
; CHECK-NEXT: ret half [[__POW_SIGN1]]
777772
;
778773
entry:
779774
%call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y)
@@ -793,10 +788,9 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
793788
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
794789
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
795790
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
796-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
797-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
798-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
799-
; CHECK-NEXT: ret <2 x half> [[TMP3]]
791+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[__POW_SIGN]] to <2 x half>
792+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x half> @llvm.copysign.v2f16(<2 x half> [[__EXP2]], <2 x half> [[TMP1]])
793+
; CHECK-NEXT: ret <2 x half> [[__POW_SIGN1]]
800794
;
801795
entry:
802796
%call = tail call nnan ninf afn <2 x half> @_Z4pownDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y)
@@ -827,10 +821,9 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
827821
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
828822
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
829823
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
830-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
831-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
832-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
833-
; CHECK-NEXT: ret float [[TMP3]]
824+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
825+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call fast float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]]) #[[ATTR0]]
826+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
834827
;
835828
entry:
836829
%call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1
@@ -840,7 +833,8 @@ entry:
840833
define float @test_pown_fast_f32__y_poison(float %x) {
841834
; CHECK-LABEL: define float @test_pown_fast_f32__y_poison
842835
; CHECK-SAME: (float [[X:%.*]]) {
843-
; CHECK-NEXT: ret float poison
836+
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float poison)
837+
; CHECK-NEXT: ret float [[__EXP2]]
844838
;
845839
%call = tail call fast float @_Z4pownfi(float %x, i32 poison)
846840
ret float %call
@@ -1073,10 +1067,9 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
10731067
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
10741068
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
10751069
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
1076-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
1077-
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
1078-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
1079-
; CHECK-NEXT: ret float [[TMP3]]
1070+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
1071+
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
1072+
; CHECK-NEXT: ret float [[__POW_SIGN1]]
10801073
;
10811074
entry:
10821075
%call = tail call afn ninf nnan float @_Z4pownfi(float %x, i32 %y)

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,9 @@ declare half @_Z4pownDhi(half, i32)
359359
; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
360360
; GCN-NATIVE: %0 = bitcast half %x to i16
361361
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
362-
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
363-
; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
364-
; GCN-NATIVE: %3 = bitcast i16 %2 to half
362+
; GCN-NATIVE: %1 = bitcast i16 %__pow_sign to half
363+
; GCN-NATIVE: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %1)
364+
; GCN-NATIVE: ret half %__pow_sign1
365365
define half @test_pown_f16(half %x, i32 %y) {
366366
entry:
367367
%call = call fast half @_Z4pownDhi(half %x, i32 %y)
@@ -377,9 +377,9 @@ declare float @_Z4pownfi(float, i32)
377377
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
378378
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
379379
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
380-
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
381-
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
382-
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
380+
; GCN: %[[r1:.+]] = bitcast i32 %__pow_sign to float
381+
; GCN: %[[r2:.+]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
382+
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
383383
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
384384
entry:
385385
%tmp = load float, ptr addrspace(1) %a, align 4
@@ -413,9 +413,9 @@ entry:
413413
; GCN: %__yeven = shl i32 %conv, 31
414414
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
415415
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
416-
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
417-
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
418-
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
416+
; GCN: %[[r1:.*]] = bitcast i32 %__pow_sign to float
417+
; GCN: %[[r2:.*]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
418+
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
419419
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
420420
entry:
421421
%tmp = load float, ptr addrspace(1) %a, align 4
@@ -437,9 +437,9 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
437437
; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
438438
; GCN: %1 = bitcast half %x to i16
439439
; GCN: %__pow_sign = and i16 %1, -32768
440-
; GCN: %2 = bitcast half %__exp2 to i16
441-
; GCN: %3 = or disjoint i16 %__pow_sign, %2
442-
; GCN: %4 = bitcast i16 %3 to half
440+
; GCN: %2 = bitcast i16 %__pow_sign to half
441+
; GCN: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %2)
442+
; GCN: ret half %__pow_sign1
443443
define half @test_pow_fast_f16__y_13(half %x) {
444444
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
445445
ret half %powr
@@ -452,9 +452,9 @@ define half @test_pow_fast_f16__y_13(half %x) {
452452
; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
453453
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
454454
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
455-
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
456-
; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
457-
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
455+
; GCN: %2 = bitcast <2 x i16> %__pow_sign to <2 x half>
456+
; GCN: %__pow_sign1 = tail call fast <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %2)
457+
; GCN: ret <2 x half> %__pow_sign1
458458
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
459459
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
460460
ret <2 x half> %powr

0 commit comments

Comments
 (0)