Skip to content

[SLP]Fix the cost of the adjusted extracts in per-register analysis. #96808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 46 additions & 15 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8304,35 +8304,57 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
});
// FIXME: this must be moved to TTI for better estimation.
unsigned EltsPerVector = getPartNumElems(VL.size(), NumParts);
auto CheckPerRegistersShuffle =
[&](MutableArrayRef<int> Mask,
SmallVector<int> Indices) -> std::optional<TTI::ShuffleKind> {
auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
SmallVectorImpl<unsigned> &Indices)
-> std::optional<TTI::ShuffleKind> {
if (NumElts <= EltsPerVector)
return std::nullopt;
int OffsetReg0 =
alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
[](int S, int I) {
if (I == PoisonMaskElem)
return S;
return std::min(S, I);
}),
EltsPerVector);
int OffsetReg1 = OffsetReg0;
DenseSet<int> RegIndices;
// Check that if trying to permute same single/2 input vectors.
TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
int FirstRegId = -1;
Indices.assign(1, -1);
for (int &I : Mask) {
Indices.assign(1, OffsetReg0);
for (auto [Pos, I] : enumerate(Mask)) {
if (I == PoisonMaskElem)
continue;
int RegId = (I / NumElts) * NumParts + (I % NumElts) / EltsPerVector;
int Idx = I - OffsetReg0;
int RegId =
(Idx / NumElts) * NumParts + (Idx % NumElts) / EltsPerVector;
if (FirstRegId < 0)
FirstRegId = RegId;
RegIndices.insert(RegId);
if (RegIndices.size() > 2)
return std::nullopt;
if (RegIndices.size() == 2) {
ShuffleKind = TTI::SK_PermuteTwoSrc;
if (Indices.size() == 1)
Indices.push_back(-1);
if (Indices.size() == 1) {
OffsetReg1 = alignDown(
std::accumulate(
std::next(Mask.begin(), Pos), Mask.end(), INT_MAX,
[&](int S, int I) {
if (I == PoisonMaskElem)
return S;
int RegId = ((I - OffsetReg0) / NumElts) * NumParts +
((I - OffsetReg0) % NumElts) / EltsPerVector;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this doing? Don't we just need the PartIdx?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We analyze the remaining part of the mask, trying to find the indices from the second register. But we still may have the indices from the first register. So, it calculates the register id for the remaining indices to find the minimal index only(!) from this second register.

if (RegId == FirstRegId)
return S;
return std::min(S, I);
}),
EltsPerVector);
Indices.push_back(OffsetReg1);
}
Idx = I - OffsetReg1;
}
if (RegId == FirstRegId)
Indices.front() = I % NumElts;
else
Indices.back() = I % NumElts;
I = (I % NumElts) % EltsPerVector +
I = (Idx % NumElts) % EltsPerVector +
(RegId == FirstRegId ? 0 : EltsPerVector);
}
return ShuffleKind;
Expand All @@ -8349,7 +8371,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
Part * EltsPerVector, getNumElems(Mask.size(), EltsPerVector, Part));
SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
copy(MaskSlice, SubMask.begin());
SmallVector<int> Indices;
SmallVector<unsigned, 2> Indices;
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask, Indices);
if (!RegShuffleKind) {
Expand All @@ -8367,12 +8389,21 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
FixedVectorType::get(ScalarTy, EltsPerVector),
SubMask);
}
for (int Idx : Indices) {
for (unsigned Idx : Indices) {
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
FixedVectorType::get(ScalarTy, NumElts),
std::nullopt, CostKind, Idx,
FixedVectorType::get(ScalarTy, EltsPerVector));
}
// Second attempt to check, if just a permute is better estimated than
// subvector extract.
SubMask.assign(NumElts, PoisonMaskElem);
copy(MaskSlice, SubMask.begin());
InstructionCost OriginalCost =
::getShuffleCost(TTI, *ShuffleKinds[Part],
FixedVectorType::get(ScalarTy, NumElts), SubMask);
if (OriginalCost < Cost)
Cost = OriginalCost;
}
return Cost;
}
Expand Down
Loading
Loading