Skip to content

Commit 06cc9b7

Browse files
committed
AMDGPU: Cost model for minimumnum/maximumnum
1 parent 63d221a commit 06cc9b7

File tree

4 files changed

+695
-528
lines changed

4 files changed

+695
-528
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,8 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
685685
case Intrinsic::fma:
686686
case Intrinsic::fmuladd:
687687
case Intrinsic::copysign:
688+
case Intrinsic::minimumnum:
689+
case Intrinsic::maximumnum:
688690
case Intrinsic::canonicalize:
689691
// There's a small benefit to using vector ops in the legalized code.
690692
case Intrinsic::round:
@@ -742,6 +744,23 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
742744
break;
743745
case Intrinsic::copysign:
744746
return NElts * getFullRateInstrCost();
747+
case Intrinsic::minimumnum:
748+
case Intrinsic::maximumnum: {
749+
// Instruction + 2 canonicalizes. For cases that need type promotion, we the
750+
// promotion takes the place of the canonicalize.
751+
unsigned NumOps = 3;
752+
if (const IntrinsicInst *II = ICA.getInst()) {
753+
// Directly legal with ieee=0
754+
// TODO: Not directly legal with strictfp
755+
if (fpenvIEEEMode(*II) == KnownIEEEMode::Off)
756+
NumOps = 1;
757+
}
758+
759+
unsigned BaseRate =
760+
SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
761+
InstRate = BaseRate * NumOps;
762+
break;
763+
}
745764
case Intrinsic::canonicalize: {
746765
InstRate =
747766
SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();

0 commit comments

Comments
 (0)