@@ -13443,14 +13443,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13443
13443
for_each(SubMask, [&](int &Idx) {
13444
13444
if (Idx == PoisonMaskElem)
13445
13445
return;
13446
- Idx = (Idx % VF) - (MinElement % VF ) +
13446
+ Idx = (Idx % VF) - (( MinElement / NewVF) * NewVF ) +
13447
13447
(Idx >= static_cast<int>(VF) ? NewVF : 0);
13448
13448
});
13449
- VF = NewVF;
13449
+ } else {
13450
+ NewVF = VF;
13450
13451
}
13451
13452
13452
13453
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13453
- auto *VecTy = getWidenedType(VL.front()->getType(), VF );
13454
+ auto *VecTy = getWidenedType(VL.front()->getType(), NewVF );
13454
13455
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
13455
13456
auto GetShuffleCost = [&,
13456
13457
&TTI = *TTI](ArrayRef<int> Mask,
@@ -13475,7 +13476,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13475
13476
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13476
13477
bool IsIdentity = true;
13477
13478
for (auto [I, Idx] : enumerate(FirstMask)) {
13478
- if (Idx >= static_cast<int>(VF )) {
13479
+ if (Idx >= static_cast<int>(NewVF )) {
13479
13480
Idx = PoisonMaskElem;
13480
13481
} else {
13481
13482
DemandedElts.clearBit(I);
@@ -13498,12 +13499,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13498
13499
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
13499
13500
bool IsIdentity = true;
13500
13501
for (auto [I, Idx] : enumerate(SecondMask)) {
13501
- if (Idx < static_cast<int>(VF ) && Idx >= 0) {
13502
+ if (Idx < static_cast<int>(NewVF ) && Idx >= 0) {
13502
13503
Idx = PoisonMaskElem;
13503
13504
} else {
13504
13505
DemandedElts.clearBit(I);
13505
13506
if (Idx != PoisonMaskElem) {
13506
- Idx -= VF ;
13507
+ Idx -= NewVF ;
13507
13508
IsIdentity &= static_cast<int>(I) == Idx;
13508
13509
}
13509
13510
}
@@ -13523,12 +13524,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
13523
13524
/*Extract=*/false, CostKind);
13524
13525
const TreeEntry *BestEntry = nullptr;
13525
13526
if (FirstShuffleCost < ShuffleCost) {
13526
- copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13527
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13528
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13529
+ [&](int &Idx) {
13530
+ if (Idx >= static_cast<int>(VF))
13531
+ Idx = PoisonMaskElem;
13532
+ });
13527
13533
BestEntry = Entries.front();
13528
13534
ShuffleCost = FirstShuffleCost;
13529
13535
}
13530
13536
if (SecondShuffleCost < ShuffleCost) {
13531
- copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13537
+ std::for_each(std::next(Mask.begin(), Part * VL.size()),
13538
+ std::next(Mask.begin(), (Part + 1) * VL.size()),
13539
+ [&](int &Idx) {
13540
+ if (Idx < static_cast<int>(VF))
13541
+ Idx = PoisonMaskElem;
13542
+ else
13543
+ Idx -= VF;
13544
+ });
13532
13545
BestEntry = Entries[1];
13533
13546
ShuffleCost = SecondShuffleCost;
13534
13547
}
0 commit comments