Skip to content

Commit 681c83a

Browse files
committed
[SLP]Fix mask generation after cost estimation
When estimating the cost of entries shuffles for buildvectors, need to rebuild original mask, not a generated submask, used for subregisters analysis. Fixes #122430
1 parent cc88a5e commit 681c83a

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13445,14 +13445,15 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1344513445
for_each(SubMask, [&](int &Idx) {
1344613446
if (Idx == PoisonMaskElem)
1344713447
return;
13448-
Idx = (Idx % VF) - (MinElement % VF) +
13448+
Idx = ((Idx % VF) - (((MinElement % VF) / NewVF) * NewVF)) % NewVF +
1344913449
(Idx >= static_cast<int>(VF) ? NewVF : 0);
1345013450
});
13451-
VF = NewVF;
13451+
} else {
13452+
NewVF = VF;
1345213453
}
1345313454

1345413455
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
13455-
auto *VecTy = getWidenedType(VL.front()->getType(), VF);
13456+
auto *VecTy = getWidenedType(VL.front()->getType(), NewVF);
1345613457
auto *MaskVecTy = getWidenedType(VL.front()->getType(), SubMask.size());
1345713458
auto GetShuffleCost = [&,
1345813459
&TTI = *TTI](ArrayRef<int> Mask,
@@ -13477,7 +13478,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1347713478
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1347813479
bool IsIdentity = true;
1347913480
for (auto [I, Idx] : enumerate(FirstMask)) {
13480-
if (Idx >= static_cast<int>(VF)) {
13481+
if (Idx >= static_cast<int>(NewVF)) {
1348113482
Idx = PoisonMaskElem;
1348213483
} else {
1348313484
DemandedElts.clearBit(I);
@@ -13500,12 +13501,12 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1350013501
APInt DemandedElts = APInt::getAllOnes(SubMask.size());
1350113502
bool IsIdentity = true;
1350213503
for (auto [I, Idx] : enumerate(SecondMask)) {
13503-
if (Idx < static_cast<int>(VF) && Idx >= 0) {
13504+
if (Idx < static_cast<int>(NewVF) && Idx >= 0) {
1350413505
Idx = PoisonMaskElem;
1350513506
} else {
1350613507
DemandedElts.clearBit(I);
1350713508
if (Idx != PoisonMaskElem) {
13508-
Idx -= VF;
13509+
Idx -= NewVF;
1350913510
IsIdentity &= static_cast<int>(I) == Idx;
1351013511
}
1351113512
}
@@ -13525,12 +13526,24 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1352513526
/*Extract=*/false, CostKind);
1352613527
const TreeEntry *BestEntry = nullptr;
1352713528
if (FirstShuffleCost < ShuffleCost) {
13528-
copy(FirstMask, std::next(Mask.begin(), Part * VL.size()));
13529+
std::for_each(std::next(Mask.begin(), Part * VL.size()),
13530+
std::next(Mask.begin(), (Part + 1) * VL.size()),
13531+
[&](int &Idx) {
13532+
if (Idx >= static_cast<int>(VF))
13533+
Idx = PoisonMaskElem;
13534+
});
1352913535
BestEntry = Entries.front();
1353013536
ShuffleCost = FirstShuffleCost;
1353113537
}
1353213538
if (SecondShuffleCost < ShuffleCost) {
13533-
copy(SecondMask, std::next(Mask.begin(), Part * VL.size()));
13539+
std::for_each(std::next(Mask.begin(), Part * VL.size()),
13540+
std::next(Mask.begin(), (Part + 1) * VL.size()),
13541+
[&](int &Idx) {
13542+
if (Idx < static_cast<int>(VF))
13543+
Idx = PoisonMaskElem;
13544+
else
13545+
Idx -= VF;
13546+
});
1353413547
BestEntry = Entries[1];
1353513548
ShuffleCost = SecondShuffleCost;
1353613549
}

llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ define i16 @test(i16 %v1, i16 %v2) {
1010
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
1111
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
1212
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
13-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
14-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V2]], i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
14+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
1515
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
1616
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
1717
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer

0 commit comments

Comments
 (0)