Skip to content

Commit d4cec1c

Browse files
committed
[SLP][NFCI]Improve compile time by using SmallBitVector and filtering
trees with phis/buildvectors only.
1 parent 1256d1d commit d4cec1c

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4166,24 +4166,27 @@ static bool areTwoInsertFromSameBuildVector(
41664166
// Go through the vector operand of insertelement instructions trying to find
41674167
// either VU as the original vector for IE2 or V as the original vector for
41684168
// IE1.
4169-
SmallSet<int, 8> ReusedIdx;
4169+
SmallBitVector ReusedIdx(
4170+
cast<VectorType>(VU->getType())->getElementCount().getKnownMinValue());
41704171
bool IsReusedIdx = false;
41714172
do {
41724173
if (IE2 == VU && !IE1)
41734174
return VU->hasOneUse();
41744175
if (IE1 == V && !IE2)
41754176
return V->hasOneUse();
41764177
if (IE1 && IE1 != V) {
4177-
IsReusedIdx |=
4178-
!ReusedIdx.insert(getInsertIndex(IE1).value_or(*Idx2)).second;
4178+
unsigned Idx1 = getInsertIndex(IE1).value_or(*Idx2);
4179+
IsReusedIdx |= ReusedIdx.test(Idx1);
4180+
ReusedIdx.set(Idx1);
41794181
if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
41804182
IE1 = nullptr;
41814183
else
41824184
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
41834185
}
41844186
if (IE2 && IE2 != VU) {
4185-
IsReusedIdx |=
4186-
!ReusedIdx.insert(getInsertIndex(IE2).value_or(*Idx1)).second;
4187+
unsigned Idx2 = getInsertIndex(IE2).value_or(*Idx1);
4188+
IsReusedIdx |= ReusedIdx.test(Idx2);
4189+
ReusedIdx.set(Idx2);
41874190
if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
41884191
IE2 = nullptr;
41894192
else
@@ -8630,6 +8633,23 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
86308633
allConstant(VectorizableTree[1]->Scalars))))
86318634
return true;
86328635

8636+
// If the graph includes only PHI nodes and gathers, it is defnitely not
8637+
// profitable for the vectorization, we can skip it, if the cost threshold is
8638+
// default. The cost of vectorized PHI nodes is almost always 0 + the cost of
8639+
// gathers/buildvectors.
8640+
constexpr unsigned Limit = 4;
8641+
if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
8642+
!VectorizableTree.empty() &&
8643+
all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
8644+
return (TE->State == TreeEntry::NeedToGather &&
8645+
TE->getOpcode() != Instruction::ExtractElement &&
8646+
count_if(TE->Scalars,
8647+
[](Value *V) { return isa<ExtractElementInst>(V); }) <=
8648+
Limit) ||
8649+
TE->getOpcode() == Instruction::PHI;
8650+
}))
8651+
return true;
8652+
86338653
// We can vectorize the tree if its size is greater than or equal to the
86348654
// minimum size specified by the MinTreeSize command line option.
86358655
if (VectorizableTree.size() >= MinTreeSize)

0 commit comments

Comments
 (0)