-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand is fpext #108237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This also generalizes the Undef handling and adds Poison handling.
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesThis also generalizes the Undef handling and adds Poison handling. Full diff: https://github.com/llvm/llvm-project/pull/108237.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4da3618357c420..389cc35b51f9da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -643,27 +643,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II.getArgOperand(0);
- Value *Src1 = II.getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem =
- II.getType()->getScalarType()->getFltSemantics();
+ auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+ Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+ if (isa<PoisonValue>(Arg))
+ return PoisonValue::get(HalfTy);
+ if (isa<UndefValue>(Arg))
+ return UndefValue::get(HalfTy);
+
+ ConstantFP *CFP = nullptr;
+ if (match(Arg, m_ConstantFP(CFP))) {
bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded =
- ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
- ConstantFP::get(II.getContext(), Val1)});
- return IC.replaceInstUsesWith(II, Folded);
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return ConstantFP::get(HalfTy, Val);
}
- }
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ Value *Src = nullptr;
+ if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+ if (Src->getType()->isHalfTy())
+ return Src;
+ }
+
+ return nullptr;
+ };
+
+ if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+ if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+ Value *V = PoisonValue::get(II.getType());
+ V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+ V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+ return IC.replaceInstUsesWith(II, V);
+ }
}
break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d8a..176b3560b4064d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,48 @@ define <2 x half> @constant_rtz_pkrtz() {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext half %x to float
+ %exty = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
|
@llvm/pr-subscribers-llvm-transforms Author: Jay Foad (jayfoad) ChangesThis also generalizes the Undef handling and adds Poison handling. Full diff: https://github.com/llvm/llvm-project/pull/108237.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4da3618357c420..389cc35b51f9da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -643,27 +643,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II.getArgOperand(0);
- Value *Src1 = II.getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem =
- II.getType()->getScalarType()->getFltSemantics();
+ auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+ Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+ if (isa<PoisonValue>(Arg))
+ return PoisonValue::get(HalfTy);
+ if (isa<UndefValue>(Arg))
+ return UndefValue::get(HalfTy);
+
+ ConstantFP *CFP = nullptr;
+ if (match(Arg, m_ConstantFP(CFP))) {
bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded =
- ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
- ConstantFP::get(II.getContext(), Val1)});
- return IC.replaceInstUsesWith(II, Folded);
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return ConstantFP::get(HalfTy, Val);
}
- }
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ Value *Src = nullptr;
+ if (match(Arg, m_OneUse(m_FPExt(m_Value(Src))))) {
+ if (Src->getType()->isHalfTy())
+ return Src;
+ }
+
+ return nullptr;
+ };
+
+ if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+ if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+ Value *V = PoisonValue::get(II.getType());
+ V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+ V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+ return IC.replaceInstUsesWith(II, V);
+ }
}
break;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d8a..176b3560b4064d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,48 @@ define <2 x half> @constant_rtz_pkrtz() {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext half %x to float
+ %exty = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
|
if (isa<PoisonValue>(Arg)) | ||
return PoisonValue::get(HalfTy); | ||
if (isa<UndefValue>(Arg)) | ||
return UndefValue::get(HalfTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
usually we do undef -> qnan for FP folds (although I think this is overly conservative, and I assume is only to exclude snan bit patterns which isn't guaranteed to quiet anyway)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was following ConstantFoldCastInstruction which will fold (fptrunc undef) to undef.
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext) | ||
ret <2 x half> %cvt | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add some tests with bfloat sources. Also negative multi use test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bfloat tests would be negative since the intrinsic only supports half.
As for multi use tests, I'm not sure if they should be negative. I know I used m_OneUse
in the implementation, but now I think the optimization might be beneficial even with multiple uses.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, negative
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%ext = fpext half %y to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also test poison on RHS
…m#108237) This also generalizes the Undef handling and adds Poison handling.
This also generalizes the Undef handling and adds Poison handling.