@@ -4166,24 +4166,27 @@ static bool areTwoInsertFromSameBuildVector(
4166
4166
// Go through the vector operand of insertelement instructions trying to find
4167
4167
// either VU as the original vector for IE2 or V as the original vector for
4168
4168
// IE1.
4169
- SmallSet<int, 8> ReusedIdx;
4169
+ SmallBitVector ReusedIdx(
4170
+ cast<VectorType>(VU->getType())->getElementCount().getKnownMinValue());
4170
4171
bool IsReusedIdx = false;
4171
4172
do {
4172
4173
if (IE2 == VU && !IE1)
4173
4174
return VU->hasOneUse();
4174
4175
if (IE1 == V && !IE2)
4175
4176
return V->hasOneUse();
4176
4177
if (IE1 && IE1 != V) {
4177
- IsReusedIdx |=
4178
- !ReusedIdx.insert(getInsertIndex(IE1).value_or(*Idx2)).second;
4178
+ unsigned Idx1 = getInsertIndex(IE1).value_or(*Idx2);
4179
+ IsReusedIdx |= ReusedIdx.test(Idx1);
4180
+ ReusedIdx.set(Idx1);
4179
4181
if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
4180
4182
IE1 = nullptr;
4181
4183
else
4182
4184
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
4183
4185
}
4184
4186
if (IE2 && IE2 != VU) {
4185
- IsReusedIdx |=
4186
- !ReusedIdx.insert(getInsertIndex(IE2).value_or(*Idx1)).second;
4187
+ unsigned Idx2 = getInsertIndex(IE2).value_or(*Idx1);
4188
+ IsReusedIdx |= ReusedIdx.test(Idx2);
4189
+ ReusedIdx.set(Idx2);
4187
4190
if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
4188
4191
IE2 = nullptr;
4189
4192
else
@@ -8630,6 +8633,23 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
8630
8633
allConstant(VectorizableTree[1]->Scalars))))
8631
8634
return true;
8632
8635
8636
+ // If the graph includes only PHI nodes and gathers, it is defnitely not
8637
+ // profitable for the vectorization, we can skip it, if the cost threshold is
8638
+ // default. The cost of vectorized PHI nodes is almost always 0 + the cost of
8639
+ // gathers/buildvectors.
8640
+ constexpr unsigned Limit = 4;
8641
+ if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
8642
+ !VectorizableTree.empty() &&
8643
+ all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
8644
+ return (TE->State == TreeEntry::NeedToGather &&
8645
+ TE->getOpcode() != Instruction::ExtractElement &&
8646
+ count_if(TE->Scalars,
8647
+ [](Value *V) { return isa<ExtractElementInst>(V); }) <=
8648
+ Limit) ||
8649
+ TE->getOpcode() == Instruction::PHI;
8650
+ }))
8651
+ return true;
8652
+
8633
8653
// We can vectorize the tree if its size is greater than or equal to the
8634
8654
// minimum size specified by the MinTreeSize command line option.
8635
8655
if (VectorizableTree.size() >= MinTreeSize)
0 commit comments