@@ -2200,6 +2200,24 @@ void VPCSAHeaderPHIRecipe::execute(VPTransformState &State) {
2200
2200
State.set (this , DataPhi, Part);
2201
2201
}
2202
2202
2203
+ InstructionCost VPCSAHeaderPHIRecipe::computeCost (ElementCount VF,
2204
+ VPCostContext &Ctx) const {
2205
+ if (VF.isScalar ())
2206
+ return 0 ;
2207
+
2208
+ InstructionCost C = 0 ;
2209
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2210
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2211
+
2212
+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2213
+ // them here for now since there is no VPInstruction::computeCost support.
2214
+ // CSAInitMask
2215
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2216
+ // CSAInitData
2217
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VTy);
2218
+ return C;
2219
+ }
2220
+
2203
2221
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2204
2222
void VPCSADataUpdateRecipe::print (raw_ostream &O, const Twine &Indent,
2205
2223
VPSlotTracker &SlotTracker) const {
@@ -2228,6 +2246,34 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
2228
2246
}
2229
2247
}
2230
2248
2249
+ InstructionCost VPCSADataUpdateRecipe::computeCost (ElementCount VF,
2250
+ VPCostContext &Ctx) const {
2251
+ if (VF.isScalar ())
2252
+ return 0 ;
2253
+
2254
+ InstructionCost C = 0 ;
2255
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2256
+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2257
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2258
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2259
+
2260
+ // Data Update
2261
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2262
+
2263
+ // FIXME: These costs should be moved into VPInstruction::computeCost. We put
2264
+ // them here for now since they are related to updating the data and there is
2265
+ // no VPInstruction::computeCost support at the moment. CSAInitMask AnyActive
2266
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2267
+ // vp.reduce.or
2268
+ C += TTI.getArithmeticReductionCost (Instruction::Or, VTy, std::nullopt,
2269
+ CostKind);
2270
+ // VPVLSel
2271
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy, CostKind);
2272
+ // MaskUpdate
2273
+ C += TTI.getArithmeticInstrCost (Instruction::Select, MaskTy, CostKind);
2274
+ return C;
2275
+ }
2276
+
2231
2277
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2232
2278
void VPCSAExtractScalarRecipe::print (raw_ostream &O, const Twine &Indent,
2233
2279
VPSlotTracker &SlotTracker) const {
@@ -2288,6 +2334,60 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
2288
2334
State.set (this , ChooseFromVecOrInit, 0 , /* IsScalar=*/ true );
2289
2335
}
2290
2336
2337
+ InstructionCost
2338
+ VPCSAExtractScalarRecipe::computeCost (ElementCount VF,
2339
+ VPCostContext &Ctx) const {
2340
+ if (VF.isScalar ())
2341
+ return 0 ;
2342
+
2343
+ InstructionCost C = 0 ;
2344
+ auto *VTy = VectorType::get (getUnderlyingValue ()->getType (), VF);
2345
+ auto *Int32VTy =
2346
+ VectorType::get (IntegerType::getInt32Ty (VTy->getContext ()), VF);
2347
+ auto *MaskTy = VectorType::get (IntegerType::getInt1Ty (VTy->getContext ()), VF);
2348
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2349
+ const TargetTransformInfo &TTI = Ctx.TTI ;
2350
+
2351
+ // StepVector
2352
+ ArrayRef<Value *> Args;
2353
+ IntrinsicCostAttributes CostAttrs (Intrinsic::stepvector, Int32VTy, Args);
2354
+ C += TTI.getIntrinsicInstrCost (CostAttrs, CostKind);
2355
+ // NegOneSplat
2356
+ C += TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, Int32VTy);
2357
+ // LastIdx
2358
+ if (usesEVL ()) {
2359
+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2360
+ CostKind);
2361
+ } else {
2362
+ // ActiveLaneIdxs
2363
+ C += TTI.getArithmeticInstrCost (Instruction::Select,
2364
+ MaskTy->getScalarType (), CostKind);
2365
+ // MaybeLastIdx
2366
+ C += TTI.getMinMaxReductionCost (Intrinsic::smax, Int32VTy, FastMathFlags (),
2367
+ CostKind);
2368
+ // IsLaneZeroActive
2369
+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, MaskTy,
2370
+ CostKind);
2371
+ // MaybeLastIdxEQZero
2372
+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, MaskTy->getScalarType (),
2373
+ CostKind);
2374
+ // And
2375
+ C += TTI.getArithmeticInstrCost (Instruction::And, MaskTy->getScalarType (),
2376
+ CostKind);
2377
+ // LastIdx
2378
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2379
+ CostKind);
2380
+ }
2381
+ // ExtractFromVec
2382
+ C += TTI.getArithmeticInstrCost (Instruction::ExtractElement, VTy, CostKind);
2383
+ // LastIdxGeZero
2384
+ C += TTI.getArithmeticInstrCost (Instruction::ICmp, Int32VTy, CostKind);
2385
+ // ChooseFromVecOrInit
2386
+ C += TTI.getArithmeticInstrCost (Instruction::Select, VTy->getScalarType (),
2387
+ CostKind);
2388
+ return C;
2389
+ }
2390
+
2291
2391
void VPBranchOnMaskRecipe::execute (VPTransformState &State) {
2292
2392
assert (State.Instance && " Branch on Mask works only on single instance." );
2293
2393
0 commit comments