Skip to content

Commit ca040d6

Browse files
arsenmAlexisPerry
authored andcommitted
AMDGPU: Fix vector handling in pown libcall simplification (llvm#95832)
The isIntegerTy check would not work as you would hope in the vector case.
1 parent 4e771a8 commit ca040d6

File tree

2 files changed

+7
-12
lines changed

2 files changed

+7
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,15 +1129,11 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
11291129
nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11301130

11311131
if (needcopysign) {
1132-
Value *opr_n;
1133-
Type* rTy = opr0->getType();
11341132
Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1135-
Type *nTy = nTyS;
1136-
if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1137-
nTy = FixedVectorType::get(nTyS, vTy);
1133+
Type *nTy = FPOp->getType()->getWithNewType(nTyS);
11381134
unsigned size = nTy->getScalarSizeInBits();
1139-
opr_n = FPOp->getOperand(1);
1140-
if (opr_n->getType()->isIntegerTy())
1135+
Value *opr_n = FPOp->getOperand(1);
1136+
if (opr_n->getType()->getScalarType()->isIntegerTy())
11411137
opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
11421138
else
11431139
opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
698698
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
699699
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
700700
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
701-
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x float> [[POWNI2F]] to <2 x i32>
702-
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[__YTOU]], <i32 31, i32 31>
701+
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
703702
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
704703
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
705704
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
@@ -744,8 +743,8 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
744743
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double>
745744
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]]
746745
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
747-
; CHECK-NEXT: [[__YTOU1:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
748-
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU1]], <i64 63, i64 63>
746+
; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
747+
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
749748
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
750749
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
751750
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
@@ -790,7 +789,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
790789
; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half>
791790
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
792791
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
793-
; CHECK-NEXT: [[__YTOU:%.*]] = fptosi <2 x half> [[POWNI2F]] to <2 x i16>
792+
; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16>
794793
; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
795794
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
796795
; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]

0 commit comments

Comments
 (0)