Skip to content

Commit edc764b

Browse files
committed
[RFC][LV] VPlan-based cost model
This patch follows D89322 to add an initial skeleton of vplan-based cost model. This difference is that instead of incorporating a cost() interface to VPRecipes, all cost implementations are put together in VPlanCostModel. This allows VPlanCostModel to concentrate on assigning costs to vplan, thus seprating the cost model code from the vplan IR, similar to LLVM IR cost modeling. During the transition, it will still use the legacy model to obtain cost until all cost calculation for recipes are implemented. Please let me know if you agree with the main idea of this patch. If there is a general consensus, I'll proceed to implement the cost for the other recipes for review. Differential Revision: https://reviews.llvm.org/D158716 - Address comments - Move VPCM object outside of the loop - Add getElementType() and getReturnElementType()
1 parent a0c0d43 commit edc764b

File tree

5 files changed

+390
-1
lines changed

5 files changed

+390
-1
lines changed

llvm/lib/Transforms/Vectorize/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
66
Vectorize.cpp
77
VectorCombine.cpp
88
VPlan.cpp
9+
VPlanCostModel.cpp
910
VPlanHCFGBuilder.cpp
1011
VPlanRecipes.cpp
1112
VPlanSLP.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include "LoopVectorizationPlanner.h"
5858
#include "VPRecipeBuilder.h"
5959
#include "VPlan.h"
60+
#include "VPlanCostModel.h"
6061
#include "VPlanHCFGBuilder.h"
6162
#include "VPlanTransforms.h"
6263
#include "llvm/ADT/APInt.h"
@@ -363,6 +364,11 @@ cl::opt<bool> EnableVPlanNativePath(
363364
"support for outer loop vectorization."));
364365
}
365366

367+
cl::opt<bool> CostUsingVPlan("vplan-use-vplan-cost-model", cl::init(false),
368+
cl::Hidden,
369+
cl::desc("Enable VPlan based costing path. To "
370+
"become the default in the future."));
371+
366372
// This flag enables the stress testing of the VPlan H-CFG construction in the
367373
// VPlan-native vectorization path. It must be used in conjuction with
368374
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -1171,6 +1177,8 @@ using VectorizationCostTy = std::pair<InstructionCost, bool>;
11711177
/// TargetTransformInfo to query the different backends for the cost of
11721178
/// different operations.
11731179
class LoopVectorizationCostModel {
1180+
friend class VPlanCostModel;
1181+
11741182
public:
11751183
LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
11761184
PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -8648,6 +8656,20 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
86488656
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
86498657
}
86508658

8659+
Type *VPlanCostModel::truncateToMinimalBitwidth(Type *ValTy,
8660+
Instruction *I) const {
8661+
auto MinBWs = CM.getMinimalBitwidths();
8662+
if (MinBWs.contains(I))
8663+
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]);
8664+
return ValTy;
8665+
}
8666+
8667+
InstructionCost VPlanCostModel::getLegacyInstructionCost(Instruction *I,
8668+
ElementCount VF) {
8669+
VectorizationCostTy Cost = CM.getInstructionCost(I, VF);
8670+
return Cost.first;
8671+
}
8672+
86518673
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
86528674
ElementCount MaxVF) {
86538675
assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -8677,10 +8699,16 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
86778699
VF = SubRange.End;
86788700
}
86798701

8702+
VPlanCostModel VPCM(*TTI, PSE.getSE()->getContext(), CM);
86808703
for (const VPlanPtr &Plan : VPlans) {
86818704
SmallVector<VectorizationFactor> Costs;
86828705
for (ElementCount CostVF : Plan->getVFs()) {
8683-
auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts);
8706+
VectorizationCostTy C;
8707+
if (CostUsingVPlan) {
8708+
C.first = VPCM.expectedCost(*Plan, CostVF, C.second);
8709+
} else
8710+
C = CM.expectedCost(CostVF, &InvalidCosts);
8711+
auto [VecCost, IsVec] = C;
86848712
#ifndef NDEBUG
86858713
unsigned AssumedMinimumVscale = 1;
86868714
if (std::optional<unsigned> VScale = getVScaleForTuning())

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
756756
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
757757
}
758758

759+
bool hasUnderlyingInstr() const {
760+
return getNumDefinedValues() == 1 &&
761+
getVPSingleValue()->getUnderlyingValue() != nullptr;
762+
}
763+
759764
/// Method to support type inquiry through isa, cast, and dyn_cast.
760765
static inline bool classof(const VPDef *D) {
761766
// All VPDefs are also VPRecipeBases.
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
//===- VPlanCostModel.h - VPlan-based Vectorizer Cost Model ---------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// VPlan-based cost model
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/ADT/TypeSwitch.h"
15+
#include "llvm/Analysis/TargetTransformInfo.h"
16+
#include "llvm/Analysis/VectorUtils.h"
17+
#include "llvm/IR/Instruction.h"
18+
#include "llvm/IR/Operator.h"
19+
#include "llvm/Support/Debug.h"
20+
21+
#include "VPlan.h"
22+
#include "VPlanCFG.h"
23+
#include "VPlanCostModel.h"
24+
#include "VPlanValue.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "vplan-cost-model"
29+
30+
namespace llvm {
31+
InstructionCost VPlanCostModel::expectedCost(const VPlan &Plan, ElementCount VF,
32+
bool &IsVec) {
33+
InstructionCost VectorIterCost = 0;
34+
for (const VPBlockBase *Block : vp_depth_first_deep(Plan.getEntry()))
35+
VectorIterCost += getCost(Block, VF, IsVec);
36+
37+
return VectorIterCost;
38+
}
39+
40+
InstructionCost VPlanCostModel::getCost(const VPBlockBase *Block,
41+
ElementCount VF, bool &IsVec) {
42+
return TypeSwitch<const VPBlockBase *, InstructionCost>(Block)
43+
.Case<VPBasicBlock>([&](const VPBasicBlock *BBlock) {
44+
InstructionCost Cost = 0;
45+
for (const VPRecipeBase &Recipe : *BBlock)
46+
Cost += getCost(&Recipe, VF, IsVec);
47+
return Cost;
48+
})
49+
.Default([&](const VPBlockBase *BBlock) -> InstructionCost { return 0; });
50+
}
51+
52+
InstructionCost VPlanCostModel::getCost(const VPRecipeBase *Recipe,
53+
ElementCount VF, bool &IsVec) {
54+
auto *ScCondTy = Type::getInt1Ty(Context);
55+
auto *VecCondTy = VectorType::get(ScCondTy, VF);
56+
InstructionCost Cost =
57+
TypeSwitch<const VPRecipeBase *, InstructionCost>(Recipe)
58+
.Case<VPInstruction>([&](const VPInstruction *VPI)
59+
-> InstructionCost {
60+
unsigned Opcode = VPI->getOpcode();
61+
if (Instruction::isBinaryOp(Opcode)) {
62+
// Operands: A, B
63+
IsVec |= true;
64+
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
65+
return TTI.getArithmeticInstrCost(Opcode, VectorTy, CostKind);
66+
}
67+
switch (Opcode) {
68+
case VPInstruction::Not: {
69+
// Operands: A
70+
IsVec |= true;
71+
Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
72+
return TTI.getArithmeticInstrCost(Instruction::Xor, VectorTy,
73+
CostKind);
74+
}
75+
case VPInstruction::ICmpULE: {
76+
// Operands: IV, TripCount
77+
IsVec |= true;
78+
Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
79+
return TTI.getCmpSelInstrCost(Instruction::ICmp, VectorTy,
80+
VecCondTy, CmpInst::ICMP_ULE,
81+
CostKind);
82+
}
83+
case Instruction::Select: {
84+
// Operands: Cond, Op1, Op2
85+
IsVec |= true;
86+
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
87+
return TTI.getCmpSelInstrCost(
88+
Instruction::Select, VectorTy, VecCondTy,
89+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
90+
}
91+
case VPInstruction::ActiveLaneMask: {
92+
// Operands: IV, TripCount
93+
IsVec |= true;
94+
Type *OpTy = Type::getIntNTy(
95+
Context, getElementType(VPI, 0)->getScalarSizeInBits());
96+
IntrinsicCostAttributes ICA(Intrinsic::get_active_lane_mask,
97+
VecCondTy, {OpTy, OpTy});
98+
return TTI.getIntrinsicInstrCost(ICA, CostKind);
99+
}
100+
case VPInstruction::FirstOrderRecurrenceSplice: {
101+
// Operands: FOR, FOR.backedge
102+
IsVec |= true;
103+
Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
104+
SmallVector<int> Mask(VF.getKnownMinValue());
105+
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
106+
return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
107+
cast<VectorType>(VectorTy), Mask,
108+
CostKind, VF.getKnownMinValue() - 1);
109+
}
110+
case VPInstruction::CalculateTripCountMinusVF: {
111+
// Operands: TripCount
112+
Type *ScalarTy = getReturnElementType(VPI);
113+
return TTI.getArithmeticInstrCost(Instruction::Sub, ScalarTy,
114+
CostKind) +
115+
TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
116+
ScCondTy, CmpInst::ICMP_UGT,
117+
CostKind) +
118+
TTI.getCmpSelInstrCost(
119+
Instruction::Select, ScalarTy, ScCondTy,
120+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
121+
}
122+
case VPInstruction::CanonicalIVIncrement:
123+
case VPInstruction::CanonicalIVIncrementNUW:
124+
// Operands: IVPhi, CanonicalIVIncrement
125+
case VPInstruction::CanonicalIVIncrementForPart:
126+
case VPInstruction::CanonicalIVIncrementForPartNUW: {
127+
// Operands: StartV
128+
Type *ScalarTy = getReturnElementType(VPI);
129+
return TTI.getArithmeticInstrCost(Instruction::Add, ScalarTy,
130+
CostKind);
131+
}
132+
case VPInstruction::BranchOnCond:
133+
// Operands: Cond
134+
case VPInstruction::BranchOnCount: {
135+
// Operands: IV, TripCount
136+
Type *ScalarTy = getElementType(VPI, 0);
137+
return TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
138+
ScCondTy, CmpInst::ICMP_EQ,
139+
CostKind) +
140+
TTI.getCFInstrCost(Instruction::Br, CostKind);
141+
}
142+
default:
143+
llvm_unreachable("Unsupported opcode for VPInstruction");
144+
} // end of switch
145+
})
146+
.Case<VPWidenMemoryInstructionRecipe>(
147+
[&](const VPWidenMemoryInstructionRecipe *VPWMIR) {
148+
IsVec |= true;
149+
return getMemoryOpCost(VPWMIR, VF);
150+
})
151+
.Default([&](const VPRecipeBase *R) -> InstructionCost {
152+
if (!R->hasUnderlyingInstr()) {
153+
LLVM_DEBUG(
154+
dbgs() << "VPlanCM: unsupported recipe ";
155+
VPSlotTracker SlotTracker((Recipe->getParent())
156+
? Recipe->getParent()->getPlan()
157+
: nullptr);
158+
Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
159+
return 0;
160+
}
161+
Instruction *I = const_cast<Instruction *>(R->getUnderlyingInstr());
162+
return getLegacyInstructionCost(I, VF);
163+
});
164+
165+
LLVM_DEBUG(dbgs() << "VPlanCM: cost " << Cost << " for VF " << VF
166+
<< " for VPInstruction: ";
167+
VPSlotTracker SlotTracker((Recipe->getParent())
168+
? Recipe->getParent()->getPlan()
169+
: nullptr);
170+
Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
171+
return Cost;
172+
}
173+
174+
InstructionCost VPlanCostModel::getMemoryOpCost(const Instruction *I, Type *Ty,
175+
bool IsConsecutive,
176+
bool IsMasked, bool IsReverse) {
177+
const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(I));
178+
const Value *Ptr = getLoadStorePointerOperand(I);
179+
unsigned AS = getLoadStoreAddressSpace(const_cast<Instruction *>(I));
180+
if (IsConsecutive) {
181+
InstructionCost Cost = 0;
182+
if (IsMasked) {
183+
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
184+
CostKind);
185+
} else {
186+
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
187+
Cost += TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS, CostKind,
188+
OpInfo, I);
189+
}
190+
if (IsReverse)
191+
Cost +=
192+
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
193+
cast<VectorType>(Ty), std::nullopt, CostKind, 0);
194+
return Cost;
195+
}
196+
return TTI.getAddressComputationCost(Ty) +
197+
TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked,
198+
Alignment, CostKind, I);
199+
}
200+
201+
InstructionCost
202+
VPlanCostModel::getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR,
203+
ElementCount VF) {
204+
Instruction *I = &VPWMIR->getIngredient();
205+
const bool IsMasked = VPWMIR->getMask() != nullptr;
206+
Type *VectorTy = VectorType::get(getReturnElementType(VPWMIR), VF);
207+
208+
return getMemoryOpCost(I, VectorTy, VPWMIR->isConsecutive(), IsMasked,
209+
VPWMIR->isReverse());
210+
}
211+
212+
// Return element type the recipe processes since VF is not carried in VPlan
213+
Type *VPlanCostModel::getElementType(const VPRecipeBase *Recipe,
214+
unsigned N) const {
215+
auto TruncatedType = [&](Value *V) -> Type * {
216+
Type *ValTy = V->getType();
217+
;
218+
if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(V))
219+
ValTy = truncateToMinimalBitwidth(V->getType(), Inst);
220+
return ValTy;
221+
};
222+
Value *V = Recipe->getOperand(N)->getUnderlyingValue();
223+
if (V)
224+
return TruncatedType(V);
225+
assert(Recipe->getOperand(N)->hasDefiningRecipe() &&
226+
"VPValue has no live-in and defining recipe");
227+
return getReturnElementType(Recipe->getOperand(N)->getDefiningRecipe());
228+
}
229+
230+
Type *VPlanCostModel::getReturnElementType(const VPRecipeBase *Recipe) const {
231+
auto *Int1Ty = Type::getInt1Ty(Context);
232+
Type *ValTy =
233+
TypeSwitch<const VPRecipeBase *, Type *>(Recipe)
234+
.Case<VPInstruction>([&](const VPInstruction *VPI) -> Type * {
235+
unsigned Opcode = VPI->getOpcode();
236+
if (Instruction::isBinaryOp(Opcode))
237+
// Operands: A, B
238+
return getElementType(VPI, 0);
239+
switch (Opcode) {
240+
case VPInstruction::Not:
241+
// Operands: A
242+
case VPInstruction::ICmpULE:
243+
// Operands: IV, TripCount
244+
return Int1Ty;
245+
case Instruction::Select:
246+
// Operands: Cond, Op1, Op2
247+
return getElementType(VPI, 1);
248+
case VPInstruction::ActiveLaneMask:
249+
// Operands: IV, TripCount
250+
return Int1Ty;
251+
case VPInstruction::FirstOrderRecurrenceSplice:
252+
// Operands: FOR, FOR.backedge
253+
case VPInstruction::CalculateTripCountMinusVF:
254+
// Operands: TripCount
255+
case VPInstruction::CanonicalIVIncrement:
256+
case VPInstruction::CanonicalIVIncrementNUW:
257+
// Operands: IVPhi, CanonicalIVIncrement
258+
case VPInstruction::CanonicalIVIncrementForPart:
259+
case VPInstruction::CanonicalIVIncrementForPartNUW:
260+
// Operands: StartV
261+
return getElementType(VPI, 0);
262+
case VPInstruction::BranchOnCond:
263+
// Operands: Cond
264+
case VPInstruction::BranchOnCount: {
265+
// Operands: IV, TripCount
266+
llvm_unreachable("Operation doesn't have return type");
267+
}
268+
default:
269+
llvm_unreachable("Unsupported opcode for VPInstruction");
270+
}
271+
})
272+
.Case<VPWidenMemoryInstructionRecipe>(
273+
[&](const VPWidenMemoryInstructionRecipe *VPWMIR) -> Type * {
274+
Instruction *I = &VPWMIR->getIngredient();
275+
Type *ValTy = truncateToMinimalBitwidth(getLoadStoreType(I), I);
276+
return ValTy;
277+
})
278+
.Default([&](const VPRecipeBase *R) -> Type * {
279+
llvm_unreachable("Unsupported VPRecipe");
280+
});
281+
return ValTy;
282+
}
283+
284+
} // namespace llvm

0 commit comments

Comments
 (0)