Skip to content

Commit 5bb2cad

Browse files
committed
TTI: Check legalization cost of abs nodes
Also adjust the AMDGPU cost.
1 parent a2900f1 commit 5bb2cad

File tree

5 files changed

+242
-231
lines changed

5 files changed

+242
-231
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21162116
case Intrinsic::vector_reduce_fminimum:
21172117
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
21182118
VecOpTy, ICA.getFlags(), CostKind);
2119-
case Intrinsic::abs: {
2120-
// abs(X) = select(icmp(X,0),X,sub(0,X))
2121-
Type *CondTy = RetTy->getWithNewBitWidth(1);
2122-
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2123-
InstructionCost Cost = 0;
2124-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2125-
Pred, CostKind);
2126-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2127-
Pred, CostKind);
2128-
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2129-
Cost += thisT()->getArithmeticInstrCost(
2130-
BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
2131-
return Cost;
2132-
}
2119+
case Intrinsic::abs:
2120+
ISD = ISD::ABS;
2121+
break;
21332122
case Intrinsic::smax:
21342123
ISD = ISD::SMAX;
21352124
break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23982387
Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
23992388
return Cost;
24002389
}
2390+
case Intrinsic::abs: {
2391+
// abs(X) = select(icmp(X,0),X,sub(0,X))
2392+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2393+
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2394+
InstructionCost Cost = 0;
2395+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2396+
Pred, CostKind);
2397+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2398+
Pred, CostKind);
2399+
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2400+
Cost += thisT()->getArithmeticInstrCost(
2401+
BinaryOperator::Sub, RetTy, CostKind,
2402+
{TTI::OK_UniformConstantValue, TTI::OP_None});
2403+
return Cost;
2404+
}
24012405
case Intrinsic::fptosi_sat:
24022406
case Intrinsic::fptoui_sat: {
24032407
if (Tys.empty())

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
696696
case Intrinsic::usub_sat:
697697
case Intrinsic::sadd_sat:
698698
case Intrinsic::ssub_sat:
699+
case Intrinsic::abs:
699700
return true;
700701
default:
701702
return false;
@@ -724,7 +725,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
724725
if (SLT == MVT::f64)
725726
return LT.first * NElts * get64BitInstrCost(CostKind);
726727

727-
if ((ST->has16BitInsts() && SLT == MVT::f16) ||
728+
if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
728729
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
729730
NElts = (NElts + 1) / 2;
730731

@@ -752,11 +753,17 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
752753
case Intrinsic::usub_sat:
753754
case Intrinsic::sadd_sat:
754755
case Intrinsic::ssub_sat: {
756+
// TODO: Full rate for i32/i16
755757
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
756758
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
757759
NElts = 1;
758760
break;
759761
}
762+
case Intrinsic::abs:
763+
// Expansion takes 2 instructions for VALU
764+
if (SLT == MVT::i16 || SLT == MVT::i32)
765+
InstRate = 2 * getFullRateInstrCost();
766+
break;
760767
default:
761768
break;
762769
}

0 commit comments

Comments
 (0)