Skip to content

[SLP][NFC]Extract a check for strided loads into separate function, NFC #134876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 69 additions & 46 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5597,6 +5597,71 @@ static bool isMaskedLoadCompress(
return TotalVecCost < GatherCost;
}

/// Checks if strided loads can be generated out of \p VL loads with pointers \p
/// PointerOps:
/// 1. Target with strided load support is detected.
/// 2. The number of loads is greater than MinProfitableStridedLoads, or the
/// potential stride <= MaxProfitableLoadStride and the potential stride is
/// power-of-2 (to avoid perf regressions for the very small number of loads)
/// and max distance > number of loads, or potential stride is -1.
/// 3. The loads are ordered, or number of unordered loads <=
/// MaxProfitableUnorderedLoads, or loads are in reversed order. (this check is
/// to avoid extra costs for very expensive shuffles).
/// 4. Any pointer operand is an instruction with the users outside of the
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
ArrayRef<unsigned> Order,
const TargetTransformInfo &TTI, const DataLayout &DL,
ScalarEvolution &SE,
const bool IsAnyPointerUsedOutGraph, const int Diff) {
const unsigned Sz = VL.size();
const unsigned AbsoluteDiff = std::abs(Diff);
Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsAnyPointerUsedOutGraph ||
(AbsoluteDiff > Sz &&
(Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
Diff == -(static_cast<int>(Sz) - 1)) {
int Stride = Diff / static_cast<int>(Sz - 1);
if (Diff != Stride * static_cast<int>(Sz - 1))
return false;
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
return false;
Value *Ptr0;
Value *PtrN;
if (Order.empty()) {
Ptr0 = PointerOps.front();
PtrN = PointerOps.back();
} else {
Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];
}
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int, 4> Dists;
for (Value *Ptr : PointerOps) {
int Dist = 0;
if (Ptr == PtrN)
Dist = Diff;
else if (Ptr != Ptr0)
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
break;
}
if (Dists.size() == Sz)
return true;
}
return false;
}

BoUpSLP::LoadsState
BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
Expand Down Expand Up @@ -5670,59 +5735,17 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
return LoadsState::Vectorize;
// Simple check if not a strided access - clear order.
bool IsPossibleStrided = *Diff % (Sz - 1) == 0;
// Try to generate strided load node if:
// 1. Target with strided load support is detected.
// 2. The number of loads is greater than MinProfitableStridedLoads,
// or the potential stride <= MaxProfitableLoadStride and the
// potential stride is power-of-2 (to avoid perf regressions for the very
// small number of loads) and max distance > number of loads, or potential
// stride is -1.
// 3. The loads are ordered, or number of unordered loads <=
// MaxProfitableUnorderedLoads, or loads are in reversed order.
// (this check is to avoid extra costs for very expensive shuffles).
// 4. Any pointer operand is an instruction with the users outside of the
// current graph (for masked gathers extra extractelement instructions
// might be required).
// Try to generate strided load node.
auto IsAnyPointerUsedOutGraph =
IsPossibleStrided && any_of(PointerOps, [&](Value *V) {
return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
return !isVectorized(U) && !MustGather.contains(U);
});
});
const unsigned AbsoluteDiff = std::abs(*Diff);
if (IsPossibleStrided &&
(IsAnyPointerUsedOutGraph ||
(AbsoluteDiff > Sz &&
(Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
*Diff == -(static_cast<int>(Sz) - 1))) {
int Stride = *Diff / static_cast<int>(Sz - 1);
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) {
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int, 4> Dists;
for (Value *Ptr : PointerOps) {
int Dist = 0;
if (Ptr == PtrN)
Dist = *Diff;
else if (Ptr != Ptr0)
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist ||
!Dists.insert(Dist).second)
break;
}
if (Dists.size() == Sz)
return LoadsState::StridedVectorize;
}
}
}
isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE,
IsAnyPointerUsedOutGraph, *Diff))
return LoadsState::StridedVectorize;
bool IsMasked;
unsigned InterleaveFactor;
SmallVector<int> CompressMask;
Expand Down
Loading