@@ -3760,87 +3760,61 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
3760
3760
OrdersType CurrentOrder(NumScalars, NumScalars);
3761
3761
SmallVector<int> Positions;
3762
3762
SmallBitVector UsedPositions(NumScalars);
3763
- DenseMap<const TreeEntry *, unsigned> UsedEntries;
3764
- DenseMap<Value *, std::pair<const TreeEntry *, unsigned>> ValueToEntryPos;
3765
- for (Value *V : TE.Scalars) {
3766
- if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
3767
- continue;
3768
- const auto *LocalSTE = getTreeEntry(V);
3769
- if (!LocalSTE)
3770
- continue;
3771
- unsigned Lane =
3772
- std::distance(LocalSTE->Scalars.begin(), find(LocalSTE->Scalars, V));
3773
- if (Lane >= NumScalars)
3774
- continue;
3775
- ++UsedEntries.try_emplace(LocalSTE, 0).first->getSecond();
3776
- ValueToEntryPos.try_emplace(V, LocalSTE, Lane);
3777
- }
3778
- if (UsedEntries.empty())
3779
- return std::nullopt;
3780
- const TreeEntry &BestSTE =
3781
- *std::max_element(UsedEntries.begin(), UsedEntries.end(),
3782
- [](const std::pair<const TreeEntry *, unsigned> &P1,
3783
- const std::pair<const TreeEntry *, unsigned> &P2) {
3784
- return P1.second < P2.second;
3785
- })
3786
- ->first;
3787
- UsedEntries.erase(&BestSTE);
3788
- const TreeEntry *SecondBestSTE = nullptr;
3789
- if (!UsedEntries.empty())
3790
- SecondBestSTE =
3791
- std::max_element(UsedEntries.begin(), UsedEntries.end(),
3792
- [](const std::pair<const TreeEntry *, unsigned> &P1,
3793
- const std::pair<const TreeEntry *, unsigned> &P2) {
3794
- return P1.second < P2.second;
3795
- })
3796
- ->first;
3763
+ const TreeEntry *STE = nullptr;
3797
3764
// Try to find all gathered scalars that are gets vectorized in other
3798
3765
// vectorize node. Here we can have only one single tree vector node to
3799
3766
// correctly identify order of the gathered scalars.
3800
3767
for (unsigned I = 0; I < NumScalars; ++I) {
3801
3768
Value *V = TE.Scalars[I];
3802
3769
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
3803
3770
continue;
3804
- const auto [LocalSTE, Lane] = ValueToEntryPos.lookup(V);
3805
- if (!LocalSTE || (LocalSTE != &BestSTE && LocalSTE != SecondBestSTE))
3806
- continue;
3807
- if (CurrentOrder[Lane] != NumScalars) {
3808
- if ((CurrentOrder[Lane] >= BestSTE.Scalars.size() ||
3809
- BestSTE.Scalars[CurrentOrder[Lane]] == V) &&
3810
- (Lane != I || LocalSTE == SecondBestSTE))
3811
- continue;
3812
- UsedPositions.reset(CurrentOrder[Lane]);
3771
+ if (const auto *LocalSTE = getTreeEntry(V)) {
3772
+ if (!STE)
3773
+ STE = LocalSTE;
3774
+ else if (STE != LocalSTE)
3775
+ // Take the order only from the single vector node.
3776
+ return std::nullopt;
3777
+ unsigned Lane =
3778
+ std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
3779
+ if (Lane >= NumScalars)
3780
+ return std::nullopt;
3781
+ if (CurrentOrder[Lane] != NumScalars) {
3782
+ if (Lane != I)
3783
+ continue;
3784
+ UsedPositions.reset(CurrentOrder[Lane]);
3785
+ }
3786
+ // The partial identity (where only some elements of the gather node are
3787
+ // in the identity order) is good.
3788
+ CurrentOrder[Lane] = I;
3789
+ UsedPositions.set(I);
3813
3790
}
3814
- // The partial identity (where only some elements of the gather node are
3815
- // in the identity order) is good.
3816
- CurrentOrder[Lane] = I;
3817
- UsedPositions.set(I);
3818
3791
}
3819
3792
// Need to keep the order if we have a vector entry and at least 2 scalars or
3820
3793
// the vectorized entry has just 2 scalars.
3821
- if (BestSTE.Scalars.size() != 2 && UsedPositions.count() <= 1)
3822
- return std::nullopt;
3823
- auto IsIdentityOrder = [&](ArrayRef<unsigned> CurrentOrder) {
3824
- for (unsigned I = 0; I < NumScalars; ++I)
3825
- if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3826
- return false;
3827
- return true;
3828
- };
3829
- if (IsIdentityOrder(CurrentOrder))
3830
- return OrdersType();
3831
- auto *It = CurrentOrder.begin();
3832
- for (unsigned I = 0; I < NumScalars;) {
3833
- if (UsedPositions.test(I)) {
3834
- ++I;
3835
- continue;
3836
- }
3837
- if (*It == NumScalars) {
3838
- *It = I;
3839
- ++I;
3794
+ if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
3795
+ auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
3796
+ for (unsigned I = 0; I < NumScalars; ++I)
3797
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
3798
+ return false;
3799
+ return true;
3800
+ };
3801
+ if (IsIdentityOrder(CurrentOrder))
3802
+ return OrdersType();
3803
+ auto *It = CurrentOrder.begin();
3804
+ for (unsigned I = 0; I < NumScalars;) {
3805
+ if (UsedPositions.test(I)) {
3806
+ ++I;
3807
+ continue;
3808
+ }
3809
+ if (*It == NumScalars) {
3810
+ *It = I;
3811
+ ++I;
3812
+ }
3813
+ ++It;
3840
3814
}
3841
- ++It ;
3815
+ return std::move(CurrentOrder) ;
3842
3816
}
3843
- return std::move(CurrentOrder) ;
3817
+ return std::nullopt ;
3844
3818
}
3845
3819
3846
3820
namespace {
0 commit comments