@@ -2022,6 +2022,30 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
2022
2022
State.set (this , NewRed, /* IsScalar*/ true );
2023
2023
}
2024
2024
2025
+ InstructionCost VPReductionRecipe::computeCost (ElementCount VF,
2026
+ VPCostContext &Ctx) const {
2027
+ RecurKind RdxKind = RdxDesc.getRecurrenceKind ();
2028
+ Type *ElementTy = RdxDesc.getRecurrenceType ();
2029
+ auto *VectorTy = dyn_cast<VectorType>(ToVectorTy (ElementTy, VF));
2030
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2031
+ unsigned Opcode = RdxDesc.getOpcode ();
2032
+
2033
+ if (VectorTy == nullptr )
2034
+ return InstructionCost::getInvalid ();
2035
+
2036
+ // Cost = Reduction cost + BinOp cost
2037
+ InstructionCost Cost =
2038
+ Ctx.TTI .getArithmeticInstrCost (Opcode, ElementTy, CostKind);
2039
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind (RdxKind)) {
2040
+ Intrinsic::ID Id = getMinMaxReductionIntrinsicOp (RdxKind);
2041
+ return Cost + Ctx.TTI .getMinMaxReductionCost (
2042
+ Id, VectorTy, RdxDesc.getFastMathFlags (), CostKind);
2043
+ }
2044
+
2045
+ return Cost + Ctx.TTI .getArithmeticReductionCost (
2046
+ Opcode, VectorTy, RdxDesc.getFastMathFlags (), CostKind);
2047
+ }
2048
+
2025
2049
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2026
2050
void VPReductionRecipe::print (raw_ostream &O, const Twine &Indent,
2027
2051
VPSlotTracker &SlotTracker) const {
0 commit comments