Skip to content

Commit b448d7d

Browse files
committed
TTI: Check legalization cost of abs nodes
Also adjust the AMDGPU cost.
1 parent 45aed42 commit b448d7d

File tree

5 files changed

+242
-231
lines changed

5 files changed

+242
-231
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21162116
case Intrinsic::vector_reduce_fminimum:
21172117
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
21182118
VecOpTy, ICA.getFlags(), CostKind);
2119-
case Intrinsic::abs: {
2120-
// abs(X) = select(icmp(X,0),X,sub(0,X))
2121-
Type *CondTy = RetTy->getWithNewBitWidth(1);
2122-
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2123-
InstructionCost Cost = 0;
2124-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2125-
Pred, CostKind);
2126-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2127-
Pred, CostKind);
2128-
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2129-
Cost += thisT()->getArithmeticInstrCost(
2130-
BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
2131-
return Cost;
2132-
}
2119+
case Intrinsic::abs:
2120+
ISD = ISD::ABS;
2121+
break;
21332122
case Intrinsic::smax:
21342123
ISD = ISD::SMAX;
21352124
break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23982387
Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
23992388
return Cost;
24002389
}
2390+
case Intrinsic::abs: {
2391+
// abs(X) = select(icmp(X,0),X,sub(0,X))
2392+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2393+
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2394+
InstructionCost Cost = 0;
2395+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2396+
Pred, CostKind);
2397+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2398+
Pred, CostKind);
2399+
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2400+
Cost += thisT()->getArithmeticInstrCost(
2401+
BinaryOperator::Sub, RetTy, CostKind,
2402+
{TTI::OK_UniformConstantValue, TTI::OP_None});
2403+
return Cost;
2404+
}
24012405
case Intrinsic::fptosi_sat:
24022406
case Intrinsic::fptoui_sat: {
24032407
if (Tys.empty())

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
693693
case Intrinsic::usub_sat:
694694
case Intrinsic::sadd_sat:
695695
case Intrinsic::ssub_sat:
696+
case Intrinsic::abs:
696697
return true;
697698
default:
698699
return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
721722
if (SLT == MVT::f64)
722723
return LT.first * NElts * get64BitInstrCost(CostKind);
723724

724-
if ((ST->has16BitInsts() && SLT == MVT::f16) ||
725+
if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
725726
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
726727
NElts = (NElts + 1) / 2;
727728

@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
737738
case Intrinsic::usub_sat:
738739
case Intrinsic::sadd_sat:
739740
case Intrinsic::ssub_sat:
741+
// TODO: Full rate for i32/i16
740742
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
741743
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
742744
NElts = 1;
743745
break;
746+
case Intrinsic::abs:
747+
// Expansion takes 2 instructions for VALU
748+
if (SLT == MVT::i16 || SLT == MVT::i32)
749+
InstRate = 2 * getFullRateInstrCost();
750+
break;
744751
}
745752

746753
return LT.first * NElts * InstRate;

0 commit comments

Comments
 (0)