Skip to content

Commit bff619f

Browse files
committed
Revert "AMDGPU: Use real copysign in fast pow (#97152)"
This reverts commit d3e7c4c.
1 parent 8477ca6 commit bff619f

File tree

4 files changed

+80
-68
lines changed

4 files changed

+80
-68
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,18 +1131,17 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
11311131
if (needcopysign) {
11321132
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
11331133
Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1134+
unsigned size = nTy->getScalarSizeInBits();
11341135
Value *opr_n = FPOp->getOperand(1);
11351136
if (opr_n->getType()->getScalarType()->isIntegerTy())
11361137
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
11371138
else
11381139
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11391140

1140-
unsigned size = nTy->getScalarSizeInBits();
11411141
Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11421142
sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1143-
1144-
nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1145-
nullptr, "__pow_sign");
1143+
nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1144+
nval = B.CreateBitCast(nval, opr0->getType());
11461145
}
11471146

11481147
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,8 +1783,7 @@ define float @test_pow_afn_f32_nnan_ninf__y_10(float %x) {
17831783
define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) {
17841784
; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison
17851785
; CHECK-SAME: (<2 x float> [[X:%.*]]) {
1786-
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> poison)
1787-
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
1786+
; CHECK-NEXT: ret <2 x float> poison
17881787
;
17891788
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison)
17901789
ret <2 x float> %pow
@@ -2216,9 +2215,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y)
22162215
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
22172216
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
22182217
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2219-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2220-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2221-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
2218+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2219+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2220+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2221+
; CHECK-NEXT: ret float [[TMP5]]
22222222
;
22232223
%y.cast = sitofp i32 %y to float
22242224
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2303,9 +2303,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y)
23032303
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23042304
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23052305
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2306-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2307-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2308-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
2306+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2307+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2308+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2309+
; CHECK-NEXT: ret float [[TMP5]]
23092310
;
23102311
%y.cast = uitofp i32 %y to float
23112312
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2351,9 +2352,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2
23512352
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23522353
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23532354
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2354-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2355-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2356-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
2355+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2356+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2357+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2358+
; CHECK-NEXT: ret float [[TMP5]]
23572359
;
23582360
%y.cast = uitofp i256 %y to float
23592361
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2373,9 +2375,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2
23732375
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
23742376
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
23752377
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2376-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2377-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2378-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
2378+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2379+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2380+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2381+
; CHECK-NEXT: ret float [[TMP5]]
23792382
;
23802383
%y.cast = sitofp i256 %y to float
23812384
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast)
@@ -2395,9 +2398,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
23952398
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
23962399
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
23972400
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
2398-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
2399-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
2400-
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
2401+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2402+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
2403+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
2404+
; CHECK-NEXT: ret <2 x float> [[TMP5]]
24012405
;
24022406
%y.cast = sitofp <2 x i32> %y to <2 x float>
24032407
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
@@ -2443,9 +2447,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
24432447
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
24442448
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
24452449
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
2446-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
2447-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP3]])
2448-
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
2450+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
2451+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]]
2452+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float>
2453+
; CHECK-NEXT: ret <2 x float> [[TMP5]]
24492454
;
24502455
%y.cast = uitofp <2 x i32> %y to <2 x float>
24512456
%pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
@@ -2554,9 +2559,10 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float
25542559
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31
25552560
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32
25562561
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]]
2557-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[__POW_SIGN]] to float
2558-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP3]])
2559-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
2562+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32
2563+
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]]
2564+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
2565+
; CHECK-NEXT: ret float [[TMP5]]
25602566
;
25612567
%y = call float @llvm.trunc.f32(float %y.arg)
25622568
%pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y)

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -679,9 +679,10 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) {
679679
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
680680
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
681681
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
682-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
683-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
684-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
682+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
683+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
684+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
685+
; CHECK-NEXT: ret float [[TMP3]]
685686
;
686687
entry:
687688
%call = tail call nnan ninf afn float @_Z4pownfi(float %x, i32 %y)
@@ -700,9 +701,10 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
700701
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
701702
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
702703
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
703-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[__POW_SIGN]] to <2 x float>
704-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x float> @llvm.copysign.v2f32(<2 x float> [[__EXP2]], <2 x float> [[TMP1]])
705-
; CHECK-NEXT: ret <2 x float> [[__POW_SIGN1]]
704+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
705+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP1]]
706+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <2 x float>
707+
; CHECK-NEXT: ret <2 x float> [[TMP3]]
706708
;
707709
entry:
708710
%call = tail call nnan ninf afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> %y)
@@ -722,9 +724,10 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) {
722724
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i64 [[__YTOU]], 63
723725
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[X]] to i64
724726
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i64 [[__YEVEN]], [[TMP0]]
725-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[__POW_SIGN]] to double
726-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn double @llvm.copysign.f64(double [[__EXP2]], double [[TMP1]])
727-
; CHECK-NEXT: ret double [[__POW_SIGN1]]
727+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[__EXP2]] to i64
728+
; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[__POW_SIGN]], [[TMP1]]
729+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to double
730+
; CHECK-NEXT: ret double [[TMP3]]
728731
;
729732
entry:
730733
%call = tail call nnan ninf afn double @_Z4powndi(double %x, i32 %y)
@@ -744,9 +747,10 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
744747
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
745748
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
746749
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
747-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[__POW_SIGN]] to <2 x double>
748-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x double> @llvm.copysign.v2f64(<2 x double> [[__EXP2]], <2 x double> [[TMP1]])
749-
; CHECK-NEXT: ret <2 x double> [[__POW_SIGN1]]
750+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
751+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[__POW_SIGN]], [[TMP1]]
752+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to <2 x double>
753+
; CHECK-NEXT: ret <2 x double> [[TMP3]]
750754
;
751755
entry:
752756
%call = tail call nnan ninf afn <2 x double> @_Z4pownDv2_dDv2_i(<2 x double> %x, <2 x i32> %y)
@@ -766,9 +770,10 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) {
766770
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i16 [[__YTOU]], 15
767771
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[X]] to i16
768772
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i16 [[__YEVEN]], [[TMP0]]
769-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[__POW_SIGN]] to half
770-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn half @llvm.copysign.f16(half [[__EXP2]], half [[TMP1]])
771-
; CHECK-NEXT: ret half [[__POW_SIGN1]]
773+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[__EXP2]] to i16
774+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i16 [[__POW_SIGN]], [[TMP1]]
775+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
776+
; CHECK-NEXT: ret half [[TMP3]]
772777
;
773778
entry:
774779
%call = tail call nnan ninf afn half @_Z4pownDhi(half %x, i32 %y)
@@ -788,9 +793,10 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
788793
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
789794
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
790795
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
791-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[__POW_SIGN]] to <2 x half>
792-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn <2 x half> @llvm.copysign.v2f16(<2 x half> [[__EXP2]], <2 x half> [[TMP1]])
793-
; CHECK-NEXT: ret <2 x half> [[__POW_SIGN1]]
796+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
797+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint <2 x i16> [[__POW_SIGN]], [[TMP1]]
798+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to <2 x half>
799+
; CHECK-NEXT: ret <2 x half> [[TMP3]]
794800
;
795801
entry:
796802
%call = tail call nnan ninf afn <2 x half> @_Z4pownDv2_DhDv2_i(<2 x half> %x, <2 x i32> %y)
@@ -821,9 +827,10 @@ define float @test_pown_fast_f32_strictfp(float %x, i32 %y) #1 {
821827
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
822828
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
823829
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
824-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
825-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call fast float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]]) #[[ATTR0]]
826-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
830+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
831+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
832+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
833+
; CHECK-NEXT: ret float [[TMP3]]
827834
;
828835
entry:
829836
%call = tail call fast float @_Z4pownfi(float %x, i32 %y) #1
@@ -833,8 +840,7 @@ entry:
833840
define float @test_pown_fast_f32__y_poison(float %x) {
834841
; CHECK-LABEL: define float @test_pown_fast_f32__y_poison
835842
; CHECK-SAME: (float [[X:%.*]]) {
836-
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float poison)
837-
; CHECK-NEXT: ret float [[__EXP2]]
843+
; CHECK-NEXT: ret float poison
838844
;
839845
%call = tail call fast float @_Z4pownfi(float %x, i32 poison)
840846
ret float %call
@@ -1067,9 +1073,10 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf
10671073
; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31
10681074
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[X]] to i32
10691075
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP0]]
1070-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[__POW_SIGN]] to float
1071-
; CHECK-NEXT: [[__POW_SIGN1:%.*]] = call nnan ninf afn float @llvm.copysign.f32(float [[__EXP2]], float [[TMP1]])
1072-
; CHECK-NEXT: ret float [[__POW_SIGN1]]
1076+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[__EXP2]] to i32
1077+
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP1]]
1078+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
1079+
; CHECK-NEXT: ret float [[TMP3]]
10731080
;
10741081
entry:
10751082
%call = tail call afn ninf nnan float @_Z4pownfi(float %x, i32 %y)

llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,9 @@ declare half @_Z4pownDhi(half, i32)
359359
; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
360360
; GCN-NATIVE: %0 = bitcast half %x to i16
361361
; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0
362-
; GCN-NATIVE: %1 = bitcast i16 %__pow_sign to half
363-
; GCN-NATIVE: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %1)
364-
; GCN-NATIVE: ret half %__pow_sign1
362+
; GCN-NATIVE: %1 = bitcast half %__exp2 to i16
363+
; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1
364+
; GCN-NATIVE: %3 = bitcast i16 %2 to half
365365
define half @test_pown_f16(half %x, i32 %y) {
366366
entry:
367367
%call = call fast half @_Z4pownDhi(half %x, i32 %y)
@@ -377,9 +377,9 @@ declare float @_Z4pownfi(float, i32)
377377
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
378378
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
379379
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
380-
; GCN: %[[r1:.+]] = bitcast i32 %__pow_sign to float
381-
; GCN: %[[r2:.+]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
382-
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
380+
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
381+
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
382+
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
383383
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
384384
entry:
385385
%tmp = load float, ptr addrspace(1) %a, align 4
@@ -413,9 +413,9 @@ entry:
413413
; GCN: %__yeven = shl i32 %conv, 31
414414
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
415415
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
416-
; GCN: %[[r1:.*]] = bitcast i32 %__pow_sign to float
417-
; GCN: %[[r2:.*]] = tail call fast float @llvm.copysign.f32(float %__exp2, float %[[r1]])
418-
; GCN: store float %[[r2]], ptr addrspace(1) %a, align 4
416+
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
417+
; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]]
418+
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
419419
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
420420
entry:
421421
%tmp = load float, ptr addrspace(1) %a, align 4
@@ -437,9 +437,9 @@ declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)
437437
; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
438438
; GCN: %1 = bitcast half %x to i16
439439
; GCN: %__pow_sign = and i16 %1, -32768
440-
; GCN: %2 = bitcast i16 %__pow_sign to half
441-
; GCN: %__pow_sign1 = tail call fast half @llvm.copysign.f16(half %__exp2, half %2)
442-
; GCN: ret half %__pow_sign1
440+
; GCN: %2 = bitcast half %__exp2 to i16
441+
; GCN: %3 = or disjoint i16 %__pow_sign, %2
442+
; GCN: %4 = bitcast i16 %3 to half
443443
define half @test_pow_fast_f16__y_13(half %x) {
444444
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
445445
ret half %powr
@@ -452,9 +452,9 @@ define half @test_pow_fast_f16__y_13(half %x) {
452452
; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
453453
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
454454
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
455-
; GCN: %2 = bitcast <2 x i16> %__pow_sign to <2 x half>
456-
; GCN: %__pow_sign1 = tail call fast <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %2)
457-
; GCN: ret <2 x half> %__pow_sign1
455+
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
456+
; GCN: %3 = or disjoint <2 x i16> %__pow_sign, %2
457+
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
458458
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
459459
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
460460
ret <2 x half> %powr

0 commit comments

Comments
 (0)