@@ -7351,6 +7351,32 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7351
7351
V2 = getAllOnesValue(
7352
7352
*R.DL,
7353
7353
FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
7354
+ } else if (!V1 && V2) {
7355
+ // Shuffle vector and tree node.
7356
+ unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
7357
+ const TreeEntry *E1 = P1.get<const TreeEntry *>();
7358
+ CommonVF = std::max(VF, E1->getVectorFactor());
7359
+ assert(all_of(Mask,
7360
+ [=](int Idx) {
7361
+ return Idx < 2 * static_cast<int>(CommonVF);
7362
+ }) &&
7363
+ "All elements in mask must be less than 2 * CommonVF.");
7364
+ if (E1->Scalars.size() == VF && VF != CommonVF) {
7365
+ SmallVector<int> E1Mask = E1->getCommonMask();
7366
+ assert(!E1Mask.empty() && "Expected non-empty common mask.");
7367
+ for (int &Idx : CommonMask) {
7368
+ if (Idx == PoisonMaskElem)
7369
+ continue;
7370
+ if (Idx >= static_cast<int>(CommonVF))
7371
+ Idx = E1Mask[Idx - CommonVF] + VF;
7372
+ }
7373
+ CommonVF = VF;
7374
+ }
7375
+ V1 = Constant::getNullValue(
7376
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
7377
+ V2 = getAllOnesValue(
7378
+ *R.DL,
7379
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
7354
7380
} else {
7355
7381
assert(V1 && V2 && "Expected both vectors.");
7356
7382
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -7387,7 +7413,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7387
7413
R(R), CheckedExtracts(CheckedExtracts) {}
7388
7414
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
7389
7415
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
7390
- unsigned NumParts) {
7416
+ unsigned NumParts, bool &UseVecBaseAsInput) {
7417
+ UseVecBaseAsInput = false;
7391
7418
if (Mask.empty())
7392
7419
return nullptr;
7393
7420
Value *VecBase = nullptr;
@@ -7410,6 +7437,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7410
7437
Data.value() == VL[Data.index()]);
7411
7438
});
7412
7439
});
7440
+ SmallPtrSet<Value *, 4> UniqueBases;
7413
7441
unsigned SliceSize = VL.size() / NumParts;
7414
7442
for (unsigned Part = 0; Part < NumParts; ++Part) {
7415
7443
ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, SliceSize);
@@ -7424,13 +7452,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7424
7452
// vectorized tree.
7425
7453
// Also, avoid adjusting the cost for extractelements with multiple uses
7426
7454
// in different graph entries.
7455
+ auto *EE = cast<ExtractElementInst>(V);
7456
+ VecBase = EE->getVectorOperand();
7457
+ UniqueBases.insert(VecBase);
7427
7458
const TreeEntry *VE = R.getTreeEntry(V);
7428
7459
if (!CheckedExtracts.insert(V).second ||
7429
7460
!R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) ||
7430
7461
(VE && VE != E))
7431
7462
continue;
7432
- auto *EE = cast<ExtractElementInst>(V);
7433
- VecBase = EE->getVectorOperand();
7434
7463
std::optional<unsigned> EEIdx = getExtractIndex(EE);
7435
7464
if (!EEIdx)
7436
7465
continue;
@@ -7469,6 +7498,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7469
7498
CommonMask.assign(Mask.begin(), Mask.end());
7470
7499
transformMaskAfterShuffle(CommonMask, CommonMask);
7471
7500
SameNodesEstimated = false;
7501
+ if (NumParts != 1 && UniqueBases.size() != 1) {
7502
+ UseVecBaseAsInput = true;
7503
+ VecBase = Constant::getNullValue(
7504
+ FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
7505
+ }
7472
7506
return VecBase;
7473
7507
}
7474
7508
void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
@@ -7518,19 +7552,70 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7518
7552
if (!SameNodesEstimated && InVectors.size() == 1)
7519
7553
InVectors.emplace_back(&E1);
7520
7554
}
7555
+ /// Adds 2 input vectors and the mask for their shuffling.
7556
+ void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
7557
+ // May come only for shuffling of 2 vectors with extractelements, already
7558
+ // handled in adjustExtracts.
7559
+ assert(InVectors.size() == 1 &&
7560
+ all_of(enumerate(CommonMask),
7561
+ [&](auto P) {
7562
+ if (P.value() == PoisonMaskElem)
7563
+ return Mask[P.index()] == PoisonMaskElem;
7564
+ auto *EI =
7565
+ cast<ExtractElementInst>(InVectors.front()
7566
+ .get<const TreeEntry *>()
7567
+ ->Scalars[P.index()]);
7568
+ return EI->getVectorOperand() == V1 ||
7569
+ EI->getVectorOperand() == V2;
7570
+ }) &&
7571
+ "Expected extractelement vectors.");
7572
+ }
7521
7573
/// Adds another one input vector and the mask for the shuffling.
7522
- void add(Value *V1, ArrayRef<int> Mask) {
7574
+ void add(Value *V1, ArrayRef<int> Mask, bool ForExtracts = false ) {
7523
7575
if (InVectors.empty()) {
7524
- assert(CommonMask.empty() && "Expected empty input mask/vectors.");
7576
+ assert(CommonMask.empty() && !ForExtracts &&
7577
+ "Expected empty input mask/vectors.");
7525
7578
CommonMask.assign(Mask.begin(), Mask.end());
7526
7579
InVectors.assign(1, V1);
7527
7580
return;
7528
7581
}
7529
- assert(InVectors.size() == 1 && InVectors.front().is<const TreeEntry *>() &&
7530
- !CommonMask.empty() && "Expected only single entry from extracts.");
7582
+ if (ForExtracts) {
7583
+ // No need to add vectors here, already handled them in adjustExtracts.
7584
+ assert(InVectors.size() == 1 &&
7585
+ InVectors.front().is<const TreeEntry *>() && !CommonMask.empty() &&
7586
+ all_of(enumerate(CommonMask),
7587
+ [&](auto P) {
7588
+ Value *Scalar = InVectors.front()
7589
+ .get<const TreeEntry *>()
7590
+ ->Scalars[P.index()];
7591
+ if (P.value() == PoisonMaskElem)
7592
+ return P.value() == Mask[P.index()] ||
7593
+ isa<UndefValue>(Scalar);
7594
+ if (isa<Constant>(V1))
7595
+ return true;
7596
+ auto *EI = cast<ExtractElementInst>(Scalar);
7597
+ return EI->getVectorOperand() == V1;
7598
+ }) &&
7599
+ "Expected only tree entry for extractelement vectors.");
7600
+ return;
7601
+ }
7602
+ assert(!InVectors.empty() && !CommonMask.empty() &&
7603
+ "Expected only tree entries from extracts/reused buildvectors.");
7604
+ unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
7605
+ if (InVectors.size() == 2) {
7606
+ Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
7607
+ transformMaskAfterShuffle(CommonMask, CommonMask);
7608
+ VF = std::max<unsigned>(VF, CommonMask.size());
7609
+ } else if (const auto *InTE =
7610
+ InVectors.front().dyn_cast<const TreeEntry *>()) {
7611
+ VF = std::max(VF, InTE->getVectorFactor());
7612
+ } else {
7613
+ VF = std::max(
7614
+ VF, cast<FixedVectorType>(InVectors.front().get<Value *>()->getType())
7615
+ ->getNumElements());
7616
+ }
7531
7617
InVectors.push_back(V1);
7532
- unsigned VF = CommonMask.size();
7533
- for (unsigned Idx = 0; Idx < VF; ++Idx)
7618
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
7534
7619
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
7535
7620
CommonMask[Idx] = Mask[Idx] + VF;
7536
7621
}
@@ -7666,6 +7751,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7666
7751
reorderScalars(GatheredScalars, ReorderMask);
7667
7752
SmallVector<int> Mask;
7668
7753
SmallVector<int> ExtractMask;
7754
+ Value *ExtractVecBase = nullptr;
7755
+ bool UseVecBaseAsInput = false;
7669
7756
SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
7670
7757
SmallVector<SmallVector<const TreeEntry *>> Entries;
7671
7758
SmallVector<std::optional<TTI::ShuffleKind>> ExtractShuffles;
@@ -7679,7 +7766,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7679
7766
tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
7680
7767
if (!ExtractShuffles.empty()) {
7681
7768
if (Value *VecBase = Estimator.adjustExtracts(
7682
- E, ExtractMask, ExtractShuffles, NumParts)) {
7769
+ E, ExtractMask, ExtractShuffles, NumParts, UseVecBaseAsInput )) {
7683
7770
if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
7684
7771
if (VF == VecBaseTy->getNumElements() &&
7685
7772
GatheredScalars.size() != VF) {
@@ -7774,6 +7861,48 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7774
7861
ScalarTy, GatheredScalars.size())));
7775
7862
});
7776
7863
}
7864
+ if (!ExtractShuffles.empty()) {
7865
+ Value *Vec1 = nullptr;
7866
+ // Gather of extractelements can be represented as just a shuffle of
7867
+ // a single/two vectors the scalars are extracted from.
7868
+ // Find input vectors.
7869
+ Value *Vec2 = nullptr;
7870
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7871
+ if (!Mask.empty() && Mask[I] != PoisonMaskElem)
7872
+ ExtractMask[I] = PoisonMaskElem;
7873
+ }
7874
+ if (UseVecBaseAsInput) {
7875
+ Vec1 = ExtractVecBase;
7876
+ } else {
7877
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7878
+ if (ExtractMask[I] == PoisonMaskElem)
7879
+ continue;
7880
+ if (isa<UndefValue>(E->Scalars[I]))
7881
+ continue;
7882
+ auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
7883
+ Value *VecOp = EI->getVectorOperand();
7884
+ if (const auto *TE = getTreeEntry(VecOp))
7885
+ if (TE->VectorizedValue)
7886
+ VecOp = TE->VectorizedValue;
7887
+ if (!Vec1) {
7888
+ Vec1 = VecOp;
7889
+ } else if (Vec1 != EI->getVectorOperand()) {
7890
+ assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
7891
+ "Expected only 1 or 2 vectors shuffle.");
7892
+ Vec2 = VecOp;
7893
+ }
7894
+ }
7895
+ }
7896
+ if (Vec2) {
7897
+ Estimator.add(Vec1, Vec2, ExtractMask);
7898
+ } else if (Vec1) {
7899
+ Estimator.add(Vec1, ExtractMask, /*ForExtracts=*/true);
7900
+ } else {
7901
+ Estimator.add(PoisonValue::get(FixedVectorType::get(
7902
+ ScalarTy, GatheredScalars.size())),
7903
+ ExtractMask, /*ForExtracts=*/true);
7904
+ }
7905
+ }
7777
7906
if (!all_of(GatheredScalars, PoisonValue::classof)) {
7778
7907
auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size());
7779
7908
bool SameGathers = VL.equals(Gathers);
@@ -10367,7 +10496,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
10367
10496
InVectors.push_back(V1);
10368
10497
}
10369
10498
/// Adds another one input vector and the mask for the shuffling.
10370
- void add(Value *V1, ArrayRef<int> Mask) {
10499
+ void add(Value *V1, ArrayRef<int> Mask, bool = false ) {
10371
10500
if (InVectors.empty()) {
10372
10501
if (!isa<FixedVectorType>(V1->getType())) {
10373
10502
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -10906,13 +11035,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
10906
11035
IsUsedInExpr &= FindReusedSplat(
10907
11036
ExtractMask,
10908
11037
cast<FixedVectorType>(Vec1->getType())->getNumElements());
10909
- ShuffleBuilder.add(Vec1, ExtractMask);
11038
+ ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true );
10910
11039
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
10911
11040
} else {
10912
11041
IsUsedInExpr = false;
10913
11042
ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
10914
11043
ScalarTy, GatheredScalars.size())),
10915
- ExtractMask);
11044
+ ExtractMask, /*ForExtracts=*/true );
10916
11045
}
10917
11046
}
10918
11047
if (!GatherShuffles.empty()) {
0 commit comments