-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SLP]Buildvector for alternate instructions with non-profitable gather operands. #84978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ea1918b
b1ad1e5
7c582a3
ce32678
f92ebf3
85981df
a63213d
a2a714d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2995,6 +2995,15 @@ class BoUpSLP { | |
return ScalarToTreeEntry.lookup(V); | ||
} | ||
|
||
/// Check that the operand node of alternate node does not generate | ||
/// buildvector sequence. If it is, then probably not worth it to build | ||
/// alternate shuffle, if number of buildvector operands + alternate | ||
/// instruction > than the number of buildvector instructions. | ||
/// \param S the instructions state of the analyzed values. | ||
/// \param VL list of the instructions with alternate opcodes. | ||
bool areAltOperandsProfitable(const InstructionsState &S, | ||
ArrayRef<Value *> VL) const; | ||
|
||
/// Checks if the specified list of the instructions/values can be vectorized | ||
/// and fills required data before actual scheduling of the instructions. | ||
TreeEntry::EntryState getScalarsVectorizationState( | ||
|
@@ -5777,6 +5786,117 @@ static bool isAlternateInstruction(const Instruction *I, | |
const Instruction *AltOp, | ||
const TargetLibraryInfo &TLI); | ||
|
||
bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, | ||
ArrayRef<Value *> VL) const { | ||
unsigned Opcode0 = S.getOpcode(); | ||
unsigned Opcode1 = S.getAltOpcode(); | ||
// The opcode mask selects between the two opcodes. | ||
SmallBitVector OpcodeMask(VL.size(), false); | ||
for (unsigned Lane : seq<unsigned>(0, VL.size())) | ||
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1) | ||
OpcodeMask.set(Lane); | ||
// If this pattern is supported by the target then consider it profitable. | ||
if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()), | ||
Opcode0, Opcode1, OpcodeMask)) | ||
return true; | ||
SmallVector<ValueList> Operands; | ||
for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) { | ||
Operands.emplace_back(); | ||
// Prepare the operand vector. | ||
for (Value *V : VL) | ||
Operands.back().push_back(cast<Instruction>(V)->getOperand(I)); | ||
} | ||
if (Operands.size() == 2) { | ||
// Try find best operands candidates. | ||
for (unsigned I : seq<unsigned>(0, VL.size() - 1)) { | ||
SmallVector<std::pair<Value *, Value *>> Candidates(3); | ||
Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]); | ||
Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]); | ||
Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]); | ||
std::optional<int> Res = findBestRootPair(Candidates); | ||
switch (Res.value_or(0)) { | ||
case 0: | ||
break; | ||
case 1: | ||
std::swap(Operands[0][I + 1], Operands[1][I + 1]); | ||
break; | ||
case 2: | ||
std::swap(Operands[0][I], Operands[1][I]); | ||
break; | ||
default: | ||
llvm_unreachable("Unexpected index."); | ||
} | ||
} | ||
} | ||
DenseSet<unsigned> UniqueOpcodes; | ||
constexpr unsigned NumAltInsts = 3; // main + alt + shuffle. | ||
unsigned NonInstCnt = 0; | ||
// Estimate number of instructions, required for the vectorized node and for | ||
// the buildvector node. | ||
unsigned UndefCnt = 0; | ||
// Count the number of extra shuffles, required for vector nodes. | ||
unsigned ExtraShuffleInsts = 0; | ||
// Check that operands do not contain same values and create either perfect | ||
// diamond match or shuffled match. | ||
if (Operands.size() == 2) { | ||
// Do not count same operands twice. | ||
if (Operands.front() == Operands.back()) { | ||
Operands.erase(Operands.begin()); | ||
} else if (!allConstant(Operands.front()) && | ||
all_of(Operands.front(), [&](Value *V) { | ||
return is_contained(Operands.back(), V); | ||
})) { | ||
Operands.erase(Operands.begin()); | ||
++ExtraShuffleInsts; | ||
} | ||
} | ||
const Loop *L = LI->getLoopFor(S.MainOp->getParent()); | ||
// Vectorize node, if: | ||
// 1. at least single operand is constant or splat. | ||
// 2. Operands have many loop invariants (the instructions are not loop | ||
// invariants). | ||
// 3. At least single unique operands is supposed to vectorized. | ||
return none_of(Operands, | ||
[&](ArrayRef<Value *> Op) { | ||
if (allConstant(Op) || | ||
(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) && | ||
getSameOpcode(Op, *TLI).MainOp)) | ||
return false; | ||
DenseMap<Value *, unsigned> Uniques; | ||
for (Value *V : Op) { | ||
if (isa<Constant, ExtractElementInst>(V) || | ||
getTreeEntry(V) || (L && L->isLoopInvariant(V))) { | ||
if (isa<UndefValue>(V)) | ||
++UndefCnt; | ||
continue; | ||
} | ||
auto Res = Uniques.try_emplace(V, 0); | ||
// Found first duplicate - need to add shuffle. | ||
if (!Res.second && Res.first->second == 1) | ||
++ExtraShuffleInsts; | ||
++Res.first->getSecond(); | ||
if (auto *I = dyn_cast<Instruction>(V)) | ||
UniqueOpcodes.insert(I->getOpcode()); | ||
else if (Res.second) | ||
++NonInstCnt; | ||
} | ||
return none_of(Uniques, [&](const auto &P) { | ||
return P.first->hasNUsesOrMore(P.second + 1) && | ||
none_of(P.first->users(), [&](User *U) { | ||
return getTreeEntry(U) || Uniques.contains(U); | ||
}); | ||
}); | ||
}) || | ||
// Do not vectorize node, if estimated number of vector instructions is | ||
// more than estimated number of buildvector instructions. Number of | ||
// vector operands is number of vector instructions + number of vector | ||
// instructions for operands (buildvectors). Number of buildvector | ||
// instructions is just number_of_operands * number_of_scalars. | ||
(UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() && | ||
(UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts + | ||
NumAltInsts) < S.MainOp->getNumOperands() * VL.size()); | ||
} | ||
|
||
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( | ||
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE, | ||
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const { | ||
|
@@ -6074,6 +6194,14 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( | |
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); | ||
return TreeEntry::NeedToGather; | ||
} | ||
if (!areAltOperandsProfitable(S, VL)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Alexey. Can you please put this under an option (let it be true by default)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Valery, you can add this option in the downstream compiler, should not be a very big change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course we can. That will just adds another piece of divergence for things that should not really have existed from the very beginning. I believe similar problem may exist in llvm-project. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the example (in this patch): gather-move-out-of-loop There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feel free to prepare the patch There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll prepare it. Thanks. |
||
LLVM_DEBUG( | ||
dbgs() | ||
<< "SLP: ShuffleVector not vectorized, operands are buildvector and " | ||
"the whole alt sequence is not profitable.\n"); | ||
return TreeEntry::NeedToGather; | ||
} | ||
|
||
return TreeEntry::Vectorize; | ||
} | ||
default: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a comment describing this