Skip to content

Commit df856e4

Browse files
[SLP]Add GEP cost estimation for gathered loads.
When doing estimation for vectorization of gathered loads, need to estimate the cost of the pointer (vectorization), as it is done for the actual vectorized loads. Otherwise may be too optimistic about the cost of the gathered loads. Reviewers: preames Reviewed By: preames Pull Request: #80867
1 parent af6656c commit df856e4

File tree

2 files changed

+394
-91
lines changed

2 files changed

+394
-91
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7089,6 +7089,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
70897089
}))) &&
70907090
!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
70917091
!isSplat(Gathers)) {
7092+
InstructionCost BaseCost = R.getGatherCost(Gathers, !Root);
70927093
SetVector<Value *> VectorizedLoads;
70937094
SmallVector<unsigned> VectorizedStarts;
70947095
SmallVector<unsigned> ScatterVectorized;
@@ -7170,14 +7171,46 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
71707171
TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
71717172
LI->getPointerAddressSpace(), CostKind,
71727173
TTI::OperandValueInfo(), LI);
7174+
// Estimate GEP cost.
7175+
SmallVector<Value *> PointerOps(VF);
7176+
for (auto [I, V] : enumerate(VL.slice(P, VF)))
7177+
PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
7178+
auto [ScalarGEPCost, VectorGEPCost] =
7179+
getGEPCosts(TTI, PointerOps, LI->getPointerOperand(),
7180+
Instruction::Load, CostKind, LI->getType(), LoadTy);
7181+
GatherCost += VectorGEPCost - ScalarGEPCost;
71737182
}
71747183
for (unsigned P : ScatterVectorized) {
71757184
auto *LI0 = cast<LoadInst>(VL[P]);
7176-
Align CommonAlignment =
7177-
computeCommonAlignment<LoadInst>(VL.slice(P, VF));
7185+
ArrayRef<Value *> Slice = VL.slice(P, VF);
7186+
Align CommonAlignment = computeCommonAlignment<LoadInst>(Slice);
71787187
GatherCost += TTI.getGatherScatterOpCost(
71797188
Instruction::Load, LoadTy, LI0->getPointerOperand(),
71807189
/*VariableMask=*/false, CommonAlignment, CostKind, LI0);
7190+
// Estimate GEP cost.
7191+
SmallVector<Value *> PointerOps(VF);
7192+
for (auto [I, V] : enumerate(Slice))
7193+
PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
7194+
OrdersType Order;
7195+
if (sortPtrAccesses(PointerOps, LI0->getType(), *R.DL, *R.SE,
7196+
Order)) {
7197+
// TODO: improve checks if GEPs can be vectorized.
7198+
Value *Ptr0 = PointerOps.front();
7199+
Type *ScalarTy = Ptr0->getType();
7200+
auto *VecTy = FixedVectorType::get(ScalarTy, VF);
7201+
auto [ScalarGEPCost, VectorGEPCost] =
7202+
getGEPCosts(TTI, PointerOps, Ptr0, Instruction::GetElementPtr,
7203+
CostKind, ScalarTy, VecTy);
7204+
GatherCost += VectorGEPCost - ScalarGEPCost;
7205+
if (!Order.empty()) {
7206+
SmallVector<int> Mask;
7207+
inversePermutation(Order, Mask);
7208+
GatherCost += ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc,
7209+
VecTy, Mask, CostKind);
7210+
}
7211+
} else {
7212+
GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true);
7213+
}
71817214
}
71827215
if (NeedInsertSubvectorAnalysis) {
71837216
// Add the cost for the subvectors insert.
@@ -7187,6 +7220,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
71877220
}
71887221
GatherCost -= ScalarsCost;
71897222
}
7223+
GatherCost = std::min(BaseCost, GatherCost);
71907224
} else if (!Root && isSplat(VL)) {
71917225
// Found the broadcasting of the single scalar, calculate the cost as
71927226
// the broadcast.

0 commit comments

Comments
 (0)