@@ -7659,16 +7659,24 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7659
7659
<< "SLP: perfect diamond match for gather bundle "
7660
7660
<< shortBundleName(VL) << ".\n");
7661
7661
// Restore the mask for previous partially matched values.
7662
- for (auto [I, V] : enumerate(E->Scalars)) {
7663
- if (isa<PoisonValue>(V)) {
7664
- Mask[I] = PoisonMaskElem;
7665
- continue;
7662
+ Mask.resize(E->Scalars.size());
7663
+ const TreeEntry *FrontTE = Entries.front().front();
7664
+ if (FrontTE->ReorderIndices.empty() &&
7665
+ ((FrontTE->ReuseShuffleIndices.empty() &&
7666
+ E->Scalars.size() == FrontTE->Scalars.size()) ||
7667
+ (E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) {
7668
+ std::iota(Mask.begin(), Mask.end(), 0);
7669
+ } else {
7670
+ for (auto [I, V] : enumerate(E->Scalars)) {
7671
+ if (isa<PoisonValue>(V)) {
7672
+ Mask[I] = PoisonMaskElem;
7673
+ continue;
7674
+ }
7675
+ Mask[I] = FrontTE->findLaneForValue(V);
7666
7676
}
7667
- if (Mask[I] == PoisonMaskElem)
7668
- Mask[I] = Entries.front().front()->findLaneForValue(V);
7669
7677
}
7670
- Estimator.add(*Entries.front().front() , Mask);
7671
- return Estimator.finalize(E->ReuseShuffleIndices );
7678
+ Estimator.add(*FrontTE , Mask);
7679
+ return Estimator.finalize(E->getCommonMask() );
7672
7680
}
7673
7681
if (!Resized) {
7674
7682
if (GatheredScalars.size() != VF &&
@@ -9460,10 +9468,19 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
9460
9468
auto *It = find_if(FirstEntries, [=](const TreeEntry *EntryPtr) {
9461
9469
return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars);
9462
9470
});
9463
- if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) {
9471
+ if (It != FirstEntries.end() &&
9472
+ ((*It)->getVectorFactor() == VL.size() ||
9473
+ ((*It)->getVectorFactor() == TE->Scalars.size() &&
9474
+ TE->ReuseShuffleIndices.size() == VL.size() &&
9475
+ (*It)->isSame(TE->Scalars)))) {
9464
9476
Entries.push_back(*It);
9465
- std::iota(std::next(Mask.begin(), Part * VL.size()),
9466
- std::next(Mask.begin(), (Part + 1) * VL.size()), 0);
9477
+ if ((*It)->getVectorFactor() == VL.size()) {
9478
+ std::iota(std::next(Mask.begin(), Part * VL.size()),
9479
+ std::next(Mask.begin(), (Part + 1) * VL.size()), 0);
9480
+ } else {
9481
+ SmallVector<int> CommonMask = TE->getCommonMask();
9482
+ copy(CommonMask, Mask.begin());
9483
+ }
9467
9484
// Clear undef scalars.
9468
9485
for (int I = 0, Sz = VL.size(); I < Sz; ++I)
9469
9486
if (isa<PoisonValue>(VL[I]))
@@ -10657,6 +10674,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
10657
10674
<< "SLP: perfect diamond match for gather bundle "
10658
10675
<< shortBundleName(E->Scalars) << ".\n");
10659
10676
// Restore the mask for previous partially matched values.
10677
+ Mask.resize(E->Scalars.size());
10660
10678
const TreeEntry *FrontTE = Entries.front().front();
10661
10679
if (FrontTE->ReorderIndices.empty() &&
10662
10680
((FrontTE->ReuseShuffleIndices.empty() &&
0 commit comments