@@ -1888,7 +1888,8 @@ ARMTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
1888
1888
InstructionCost
1889
1889
ARMTTIImpl::getIntrinsicInstrCost (const IntrinsicCostAttributes &ICA,
1890
1890
TTI::TargetCostKind CostKind) {
1891
- switch (ICA.getID ()) {
1891
+ unsigned Opc = ICA.getID ();
1892
+ switch (Opc) {
1892
1893
case Intrinsic::get_active_lane_mask:
1893
1894
// Currently we make a somewhat optimistic assumption that
1894
1895
// active_lane_mask's are always free. In reality it may be freely folded
@@ -1904,17 +1905,37 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1904
1905
case Intrinsic::ssub_sat:
1905
1906
case Intrinsic::uadd_sat:
1906
1907
case Intrinsic::usub_sat: {
1908
+ bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1909
+ bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1910
+ Type *RetTy = ICA.getReturnType ();
1911
+
1912
+ if (RetTy->isIntegerTy ()) {
1913
+ if (IsSigned && ST->hasDSP () && RetTy->isIntegerTy (32 ))
1914
+ return 1 ; // qadd / qsub
1915
+ if (ST->hasDSP () && (RetTy->isIntegerTy (8 ) || RetTy->isIntegerTy (16 )))
1916
+ return 2 ; // uqadd16 / qadd16 / uqsub16 / qsub16 + possible extend.
1917
+ // Otherwise return the cost of expanding the node. add +
1918
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
1919
+ Type *CondTy = RetTy->getWithNewBitWidth (1 );
1920
+ return getArithmeticInstrCost (IsAdd ? Instruction::Add : Instruction::Sub,
1921
+ RetTy, CostKind) +
1922
+ 2 * getCmpSelInstrCost (BinaryOperator::ICmp, RetTy, CondTy, Pred,
1923
+ CostKind) +
1924
+ 2 * getCmpSelInstrCost (BinaryOperator::Select, RetTy, CondTy, Pred,
1925
+ CostKind);
1926
+ }
1927
+
1907
1928
if (!ST->hasMVEIntegerOps ())
1908
1929
break ;
1909
- Type *VT = ICA.getReturnType ();
1910
1930
1911
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (VT );
1931
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (RetTy );
1912
1932
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1913
1933
LT.second == MVT::v16i8) {
1914
1934
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
1915
1935
// need to extend the type, as it uses shr(qadd(shl, shl)).
1916
1936
unsigned Instrs =
1917
- LT.second .getScalarSizeInBits () == VT->getScalarSizeInBits () ? 1 : 4 ;
1937
+ LT.second .getScalarSizeInBits () == RetTy->getScalarSizeInBits () ? 1
1938
+ : 4 ;
1918
1939
return LT.first * ST->getMVEVectorCostFactor (CostKind) * Instrs;
1919
1940
}
1920
1941
break ;
@@ -1948,7 +1969,7 @@ ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1948
1969
case Intrinsic::fptoui_sat: {
1949
1970
if (ICA.getArgTypes ().empty ())
1950
1971
break ;
1951
- bool IsSigned = ICA. getID () == Intrinsic::fptosi_sat;
1972
+ bool IsSigned = Opc == Intrinsic::fptosi_sat;
1952
1973
auto LT = getTypeLegalizationCost (ICA.getArgTypes ()[0 ]);
1953
1974
EVT MTy = TLI->getValueType (DL, ICA.getReturnType ());
1954
1975
// Check for the legal types, with the corect subtarget features.
0 commit comments