@@ -426,7 +426,8 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
426
426
(Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
427
427
IsLegalFPType (SrcTy) && IsLegalFPType (DstTy)))
428
428
if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits () > 128 )
429
- return 2 * DstTy.getVectorNumElements () * ST->getMVEVectorCostFactor ();
429
+ return 2 * DstTy.getVectorNumElements () *
430
+ ST->getMVEVectorCostFactor (CostKind);
430
431
431
432
// The extend of other kinds of load is free
432
433
if (CCH == TTI::CastContextHint::Normal ||
@@ -470,7 +471,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
470
471
if (const auto *Entry =
471
472
ConvertCostTableLookup (MVELoadConversionTbl, ISD,
472
473
DstTy.getSimpleVT (), SrcTy.getSimpleVT ()))
473
- return AdjustCost ( Entry->Cost * ST->getMVEVectorCostFactor () );
474
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind );
474
475
}
475
476
476
477
static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
@@ -482,7 +483,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
482
483
if (const auto *Entry =
483
484
ConvertCostTableLookup (MVEFLoadConversionTbl, ISD,
484
485
DstTy.getSimpleVT (), SrcTy.getSimpleVT ()))
485
- return AdjustCost ( Entry->Cost * ST->getMVEVectorCostFactor () );
486
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind );
486
487
}
487
488
488
489
// The truncate of a store is free. This is the mirror of extends above.
@@ -499,7 +500,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
499
500
if (const auto *Entry =
500
501
ConvertCostTableLookup (MVEStoreConversionTbl, ISD,
501
502
SrcTy.getSimpleVT (), DstTy.getSimpleVT ()))
502
- return AdjustCost ( Entry->Cost * ST->getMVEVectorCostFactor () );
503
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind );
503
504
}
504
505
505
506
static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
@@ -510,7 +511,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
510
511
if (const auto *Entry =
511
512
ConvertCostTableLookup (MVEFStoreConversionTbl, ISD,
512
513
SrcTy.getSimpleVT (), DstTy.getSimpleVT ()))
513
- return AdjustCost ( Entry->Cost * ST->getMVEVectorCostFactor () );
514
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind );
514
515
}
515
516
}
516
517
@@ -734,7 +735,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
734
735
if (const auto *Entry = ConvertCostTableLookup (MVEVectorConversionTbl,
735
736
ISD, DstTy.getSimpleVT (),
736
737
SrcTy.getSimpleVT ()))
737
- return AdjustCost ( Entry->Cost * ST->getMVEVectorCostFactor () );
738
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind );
738
739
}
739
740
740
741
if (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND) {
@@ -784,7 +785,7 @@ int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
784
785
}
785
786
786
787
int BaseCost = ST->hasMVEIntegerOps () && Src->isVectorTy ()
787
- ? ST->getMVEVectorCostFactor ()
788
+ ? ST->getMVEVectorCostFactor (CostKind )
788
789
: 1 ;
789
790
return AdjustCost (
790
791
BaseCost * BaseT::getCastInstrCost (Opcode, Dst, Src, CCH, CostKind, I));
@@ -819,7 +820,7 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
819
820
// vector, to prevent vectorising where we end up just scalarising the
820
821
// result anyway.
821
822
return std::max (BaseT::getVectorInstrCost (Opcode, ValTy, Index),
822
- ST->getMVEVectorCostFactor ()) *
823
+ ST->getMVEVectorCostFactor (TTI::TCK_RecipThroughput )) *
823
824
cast<FixedVectorType>(ValTy)->getNumElements () / 2 ;
824
825
}
825
826
@@ -881,9 +882,8 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
881
882
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
882
883
// for "multiple beats" potentially needed by MVE instructions.
883
884
int BaseCost = 1 ;
884
- if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps () &&
885
- ValTy->isVectorTy ())
886
- BaseCost = ST->getMVEVectorCostFactor ();
885
+ if (ST->hasMVEIntegerOps () && ValTy->isVectorTy ())
886
+ BaseCost = ST->getMVEVectorCostFactor (CostKind);
887
887
888
888
return BaseCost *
889
889
BaseT::getCmpSelInstrCost (Opcode, ValTy, CondTy, VecPred, CostKind, I);
@@ -1132,11 +1132,12 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
1132
1132
1133
1133
if (const auto *Entry = CostTableLookup (MVEDupTbl, ISD::VECTOR_SHUFFLE,
1134
1134
LT.second ))
1135
- return LT.first * Entry->Cost * ST->getMVEVectorCostFactor ();
1135
+ return LT.first * Entry->Cost *
1136
+ ST->getMVEVectorCostFactor (TTI::TCK_RecipThroughput);
1136
1137
}
1137
1138
}
1138
1139
int BaseCost = ST->hasMVEIntegerOps () && Tp->isVectorTy ()
1139
- ? ST->getMVEVectorCostFactor ()
1140
+ ? ST->getMVEVectorCostFactor (TTI::TCK_RecipThroughput )
1140
1141
: 1 ;
1141
1142
return BaseCost * BaseT::getShuffleCost (Kind, Tp, Index, SubTp);
1142
1143
}
@@ -1262,9 +1263,8 @@ int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
1262
1263
// Default to cheap (throughput/size of 1 instruction) but adjust throughput
1263
1264
// for "multiple beats" potentially needed by MVE instructions.
1264
1265
int BaseCost = 1 ;
1265
- if (CostKind != TTI::TCK_CodeSize && ST->hasMVEIntegerOps () &&
1266
- Ty->isVectorTy ())
1267
- BaseCost = ST->getMVEVectorCostFactor ();
1266
+ if (ST->hasMVEIntegerOps () && Ty->isVectorTy ())
1267
+ BaseCost = ST->getMVEVectorCostFactor (CostKind);
1268
1268
1269
1269
// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
1270
1270
// without treating floats as more expensive that scalars or increasing the
@@ -1321,11 +1321,11 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
1321
1321
: cast<Instruction>(I->getOperand (0 ))->getOperand (0 )->getType ();
1322
1322
if (SrcVTy->getNumElements () == 4 && SrcVTy->getScalarType ()->isHalfTy () &&
1323
1323
DstTy->getScalarType ()->isFloatTy ())
1324
- return ST->getMVEVectorCostFactor ();
1324
+ return ST->getMVEVectorCostFactor (CostKind );
1325
1325
}
1326
1326
1327
1327
int BaseCost = ST->hasMVEIntegerOps () && Src->isVectorTy ()
1328
- ? ST->getMVEVectorCostFactor ()
1328
+ ? ST->getMVEVectorCostFactor (CostKind )
1329
1329
: 1 ;
1330
1330
return BaseCost * BaseT::getMemoryOpCost (Opcode, Src, Alignment, AddressSpace,
1331
1331
CostKind, I);
@@ -1337,9 +1337,9 @@ unsigned ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1337
1337
TTI::TargetCostKind CostKind) {
1338
1338
if (ST->hasMVEIntegerOps ()) {
1339
1339
if (Opcode == Instruction::Load && isLegalMaskedLoad (Src, Alignment))
1340
- return ST->getMVEVectorCostFactor ();
1340
+ return ST->getMVEVectorCostFactor (CostKind );
1341
1341
if (Opcode == Instruction::Store && isLegalMaskedStore (Src, Alignment))
1342
- return ST->getMVEVectorCostFactor ();
1342
+ return ST->getMVEVectorCostFactor (CostKind );
1343
1343
}
1344
1344
if (!isa<FixedVectorType>(Src))
1345
1345
return BaseT::getMaskedMemoryOpCost (Opcode, Src, Alignment, AddressSpace,
@@ -1368,7 +1368,8 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(
1368
1368
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
1369
1369
// Accesses having vector types that are a multiple of 128 bits can be
1370
1370
// matched to more than one vldN/vstN instruction.
1371
- int BaseCost = ST->hasMVEIntegerOps () ? ST->getMVEVectorCostFactor () : 1 ;
1371
+ int BaseCost =
1372
+ ST->hasMVEIntegerOps () ? ST->getMVEVectorCostFactor (CostKind) : 1 ;
1372
1373
if (NumElts % Factor == 0 &&
1373
1374
TLI->isLegalInterleavedAccessType (Factor, SubVecTy, Alignment, DL))
1374
1375
return Factor * BaseCost * TLI->getNumInterleavedAccesses (SubVecTy, DL);
@@ -1413,7 +1414,8 @@ unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1413
1414
// multiplied by the number of elements being loaded. This is possibly very
1414
1415
// conservative, but even so we still end up vectorising loops because the
1415
1416
// cost per iteration for many loops is lower than for scalar loops.
1416
- unsigned VectorCost = NumElems * LT.first * ST->getMVEVectorCostFactor ();
1417
+ unsigned VectorCost =
1418
+ NumElems * LT.first * ST->getMVEVectorCostFactor (CostKind);
1417
1419
// The scalarization cost should be a lot higher. We use the number of vector
1418
1420
// elements plus the scalarization overhead.
1419
1421
unsigned ScalarCost = NumElems * LT.first +
@@ -1506,7 +1508,7 @@ int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
1506
1508
{ISD::ADD, MVT::v4i32, 1 },
1507
1509
};
1508
1510
if (const auto *Entry = CostTableLookup (CostTblAdd, ISD, LT.second ))
1509
- return Entry->Cost * ST->getMVEVectorCostFactor () * LT.first ;
1511
+ return Entry->Cost * ST->getMVEVectorCostFactor (CostKind ) * LT.first ;
1510
1512
1511
1513
return BaseT::getArithmeticReductionCost (Opcode, ValTy, IsPairwiseForm,
1512
1514
CostKind);
@@ -1524,7 +1526,7 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
1524
1526
(LT.second == MVT::v8i16 &&
1525
1527
ResVT.getSizeInBits () <= (IsMLA ? 64 : 32 )) ||
1526
1528
(LT.second == MVT::v4i32 && ResVT.getSizeInBits () <= 64 ))
1527
- return ST->getMVEVectorCostFactor () * LT.first ;
1529
+ return ST->getMVEVectorCostFactor (CostKind ) * LT.first ;
1528
1530
}
1529
1531
1530
1532
return BaseT::getExtendedAddReductionCost (IsMLA, IsUnsigned, ResTy, ValTy,
@@ -1566,7 +1568,7 @@ int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1566
1568
ICA.getReturnType ()->getScalarSizeInBits ()
1567
1569
? 1
1568
1570
: 4 ;
1569
- return LT.first * ST->getMVEVectorCostFactor () * Instrs;
1571
+ return LT.first * ST->getMVEVectorCostFactor (CostKind ) * Instrs;
1570
1572
}
1571
1573
break ;
1572
1574
}
0 commit comments