@@ -13445,14 +13445,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13445
13445
for_each(SubMask, [&](int &Idx) {
13446
13446
if (Idx == PoisonMaskElem)
13447
13447
return;
13448
- Idx = (Idx % VF) - (MinElement % VF) +
13448
+ Idx = (( Idx % VF) - ((( MinElement % VF) / NewVF) * NewVF)) % NewVF +
13449
13449
(Idx >= static_cast<int>(VF) ? NewVF : 0);
13450
13450
});
13451
- VF = NewVF;
13451
+ } else {
13452
+ NewVF = VF;
13452
13453
}
13453
13454
13454
13455
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13455
- auto *VecTy = getWidenedType(VL.front()->getType(), VF );
13456
+ auto *VecTy = getWidenedType(VL.front()->getType(), NewVF );
13456
13457
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
13457
13458
auto GetShuffleCost = [&,
13458
13459
&TTI = *TTI](ArrayRef<int> Mask,
@@ -13477,7 +13478,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13477
13478
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13478
13479
bool IsIdentity = true;
13479
13480
for (auto [I, Idx] : enumerate(FirstMask)) {
13480
- if (Idx >= static_cast<int>(VF )) {
13481
+ if (Idx >= static_cast<int>(NewVF )) {
13481
13482
Idx = PoisonMaskElem;
13482
13483
} else {
13483
13484
DemandedElts.clearBit(I);
@@ -13500,12 +13501,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13500
13501
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13501
13502
bool IsIdentity = true;
13502
13503
for (auto [I, Idx] : enumerate(SecondMask)) {
13503
- if (Idx < static_cast<int>(VF ) && Idx >= 0) {
13504
+ if (Idx < static_cast<int>(NewVF ) && Idx >= 0) {
13504
13505
Idx = PoisonMaskElem;
13505
13506
} else {
13506
13507
DemandedElts.clearBit(I);
13507
13508
if (Idx != PoisonMaskElem) {
13508
- Idx -= VF ;
13509
+ Idx -= NewVF ;
13509
13510
IsIdentity &= static_cast<int>(I) == Idx;
13510
13511
}
13511
13512
}
@@ -13525,12 +13526,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13525
13526
/*Extract=*/false, CostKind);
13526
13527
const TreeEntry *BestEntry = nullptr;
13527
13528
if (FirstShuffleCost < ShuffleCost) {
13528
- copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13529
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13530
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13531
+ [&](int &Idx) {
13532
+ if (Idx >= static_cast<int>(VF))
13533
+ Idx = PoisonMaskElem;
13534
+ });
13529
13535
BestEntry = Entries.front();
13530
13536
ShuffleCost = FirstShuffleCost;
13531
13537
}
13532
13538
if (SecondShuffleCost < ShuffleCost) {
13533
- copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13539
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13540
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13541
+ [&](int &Idx) {
13542
+ if (Idx < static_cast<int>(VF))
13543
+ Idx = PoisonMaskElem;
13544
+ else
13545
+ Idx -= VF;
13546
+ });
13534
13547
BestEntry = Entries[1];
13535
13548
ShuffleCost = SecondShuffleCost;
13536
13549
}
0 commit comments