@@ -766,12 +766,6 @@ class BoUpSLP {
766
766
// / Perform LICM and CSE on the newly generated gather sequences.
767
767
void optimizeGatherSequence ();
768
768
769
- // / Checks if the specified gather tree entry \p TE can be represented as a
770
- // / shuffled vector entry + (possibly) permutation with other gathers. It
771
- // / implements the checks only for possibly ordered scalars (Loads,
772
- // / ExtractElement, ExtractValue), which can be part of the graph.
773
- Optional<OrdersType> findReusedOrderedScalars (const TreeEntry &TE);
774
-
775
769
// / Reorders the current graph to the most profitable order starting from the
776
770
// / root node to the leaf nodes. The best order is chosen only from the nodes
777
771
// / of the same size (vectorization factor). Smaller nodes are considered
@@ -2676,72 +2670,6 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
2676
2670
fixupOrderingIndices (Order);
2677
2671
}
2678
2672
2679
- Optional<BoUpSLP::OrdersType>
2680
- BoUpSLP::findReusedOrderedScalars (const BoUpSLP::TreeEntry &TE) {
2681
- assert (TE.State == TreeEntry::NeedToGather && " Expected gather node only." );
2682
- unsigned NumScalars = TE.Scalars .size ();
2683
- OrdersType CurrentOrder (NumScalars, NumScalars);
2684
- SmallVector<int > Positions;
2685
- SmallBitVector UsedPositions (NumScalars);
2686
- const TreeEntry *STE = nullptr ;
2687
- // Try to find all gathered scalars that are gets vectorized in other
2688
- // vectorize node. Here we can have only one single tree vector node to
2689
- // correctly identify order of the gathered scalars.
2690
- for (unsigned I = 0 ; I < NumScalars; ++I) {
2691
- Value *V = TE.Scalars [I];
2692
- if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
2693
- continue ;
2694
- if (const auto *LocalSTE = getTreeEntry (V)) {
2695
- if (!STE)
2696
- STE = LocalSTE;
2697
- else if (STE != LocalSTE)
2698
- // Take the order only from the single vector node.
2699
- return None;
2700
- unsigned Lane =
2701
- std::distance (STE->Scalars .begin (), find (STE->Scalars , V));
2702
- if (Lane >= NumScalars)
2703
- return None;
2704
- if (CurrentOrder[Lane] != NumScalars) {
2705
- if (Lane != I)
2706
- continue ;
2707
- UsedPositions.reset (CurrentOrder[Lane]);
2708
- }
2709
- // The partial identity (where only some elements of the gather node are
2710
- // in the identity order) is good.
2711
- CurrentOrder[Lane] = I;
2712
- UsedPositions.set (I);
2713
- }
2714
- }
2715
- // Need to keep the order if we have a vector entry and at least 2 scalars or
2716
- // the vectorized entry has just 2 scalars.
2717
- if (STE && (UsedPositions.count () > 1 || STE->Scalars .size () == 2 )) {
2718
- auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned > CurrentOrder) {
2719
- for (unsigned I = 0 ; I < NumScalars; ++I)
2720
- if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
2721
- return false ;
2722
- return true ;
2723
- };
2724
- if (IsIdentityOrder (CurrentOrder)) {
2725
- CurrentOrder.clear ();
2726
- return CurrentOrder;
2727
- }
2728
- auto *It = CurrentOrder.begin ();
2729
- for (unsigned I = 0 ; I < NumScalars;) {
2730
- if (UsedPositions.test (I)) {
2731
- ++I;
2732
- continue ;
2733
- }
2734
- if (*It == NumScalars) {
2735
- *It = I;
2736
- ++I;
2737
- }
2738
- ++It;
2739
- }
2740
- return CurrentOrder;
2741
- }
2742
- return None;
2743
- }
2744
-
2745
2673
void BoUpSLP::reorderTopToBottom () {
2746
2674
// Maps VF to the graph nodes.
2747
2675
DenseMap<unsigned , SmallPtrSet<TreeEntry *, 4 >> VFToOrderedEntries;
@@ -2761,29 +2689,19 @@ void BoUpSLP::reorderTopToBottom() {
2761
2689
InsertElementInst>(TE->getMainOp ()) &&
2762
2690
!TE->isAltShuffle ()) {
2763
2691
VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2764
- return ;
2765
- }
2766
- if (TE->State == TreeEntry::NeedToGather) {
2767
- if (TE->getOpcode () == Instruction::ExtractElement &&
2768
- !TE->isAltShuffle () &&
2769
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2770
- ->getVectorOperandType ()) &&
2771
- allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2772
- // Check that gather of extractelements can be represented as
2773
- // just a shuffle of a single vector.
2774
- OrdersType CurrentOrder;
2775
- bool Reuse =
2776
- canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2777
- if (Reuse || !CurrentOrder.empty ()) {
2778
- VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2779
- GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2780
- return ;
2781
- }
2782
- }
2783
- if (Optional<OrdersType> CurrentOrder =
2784
- findReusedOrderedScalars (*TE.get ())) {
2692
+ } else if (TE->State == TreeEntry::NeedToGather &&
2693
+ TE->getOpcode () == Instruction::ExtractElement &&
2694
+ !TE->isAltShuffle () &&
2695
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2696
+ ->getVectorOperandType ()) &&
2697
+ allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2698
+ // Check that gather of extractelements can be represented as
2699
+ // just a shuffle of a single vector.
2700
+ OrdersType CurrentOrder;
2701
+ bool Reuse = canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2702
+ if (Reuse || !CurrentOrder.empty ()) {
2785
2703
VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2786
- GathersToOrders.try_emplace (TE.get (), * CurrentOrder);
2704
+ GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2787
2705
}
2788
2706
}
2789
2707
});
@@ -2835,7 +2753,7 @@ void BoUpSLP::reorderTopToBottom() {
2835
2753
// Choose the most used order.
2836
2754
ArrayRef<unsigned > BestOrder = OrdersUses.begin ()->first ;
2837
2755
unsigned Cnt = OrdersUses.begin ()->second ;
2838
- for (const auto &Pair : drop_begin (OrdersUses)) {
2756
+ for (const auto &Pair : llvm:: drop_begin (OrdersUses)) {
2839
2757
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first .empty ())) {
2840
2758
BestOrder = Pair.first ;
2841
2759
Cnt = Pair.second ;
@@ -2912,8 +2830,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2912
2830
for_each (VectorizableTree, [this , &OrderedEntries, &GathersToOrders,
2913
2831
&NonVectorized](
2914
2832
const std::unique_ptr<TreeEntry> &TE) {
2915
- if (TE->State != TreeEntry::Vectorize)
2916
- NonVectorized.push_back (TE.get ());
2917
2833
// No need to reorder if need to shuffle reuses, still need to shuffle the
2918
2834
// node.
2919
2835
if (!TE->ReuseShuffleIndices .empty ())
@@ -2922,37 +2838,28 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2922
2838
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp ()) &&
2923
2839
!TE->isAltShuffle ()) {
2924
2840
OrderedEntries.insert (TE.get ());
2925
- return ;
2926
- }
2927
- if (TE->State == TreeEntry::NeedToGather) {
2928
- if (TE->getOpcode () == Instruction::ExtractElement &&
2929
- !TE->isAltShuffle () &&
2930
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2931
- ->getVectorOperandType ()) &&
2932
- allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2933
- // Check that gather of extractelements can be represented as
2934
- // just a shuffle of a single vector with a single user only.
2935
- OrdersType CurrentOrder;
2936
- bool Reuse =
2937
- canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2938
- if ((Reuse || !CurrentOrder.empty ()) &&
2939
- !any_of (VectorizableTree,
2940
- [&TE](const std::unique_ptr<TreeEntry> &Entry) {
2941
- return Entry->State == TreeEntry::NeedToGather &&
2942
- Entry.get () != TE.get () &&
2943
- Entry->isSame (TE->Scalars );
2944
- })) {
2945
- OrderedEntries.insert (TE.get ());
2946
- GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2947
- return ;
2948
- }
2949
- }
2950
- if (Optional<OrdersType> CurrentOrder =
2951
- findReusedOrderedScalars (*TE.get ())) {
2841
+ } else if (TE->State == TreeEntry::NeedToGather &&
2842
+ TE->getOpcode () == Instruction::ExtractElement &&
2843
+ !TE->isAltShuffle () &&
2844
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2845
+ ->getVectorOperandType ()) &&
2846
+ allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2847
+ // Check that gather of extractelements can be represented as
2848
+ // just a shuffle of a single vector with a single user only.
2849
+ OrdersType CurrentOrder;
2850
+ bool Reuse = canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2851
+ if ((Reuse || !CurrentOrder.empty ()) &&
2852
+ !any_of (
2853
+ VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
2854
+ return Entry->State == TreeEntry::NeedToGather &&
2855
+ Entry.get () != TE.get () && Entry->isSame (TE->Scalars );
2856
+ })) {
2952
2857
OrderedEntries.insert (TE.get ());
2953
- GathersToOrders.try_emplace (TE.get (), * CurrentOrder);
2858
+ GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2954
2859
}
2955
2860
}
2861
+ if (TE->State != TreeEntry::Vectorize)
2862
+ NonVectorized.push_back (TE.get ());
2956
2863
});
2957
2864
2958
2865
// Checks if the operands of the users are reordarable and have only single
@@ -3004,7 +2911,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
3004
2911
for (TreeEntry *TE : OrderedEntries) {
3005
2912
if (!(TE->State == TreeEntry::Vectorize ||
3006
2913
(TE->State == TreeEntry::NeedToGather &&
3007
- GathersToOrders. count (TE) )) ||
2914
+ TE-> getOpcode () == Instruction::ExtractElement )) ||
3008
2915
TE->UserTreeIndices .empty () || !TE->ReuseShuffleIndices .empty () ||
3009
2916
!all_of (drop_begin (TE->UserTreeIndices ),
3010
2917
[TE](const EdgeInfo &EI) {
@@ -3082,7 +2989,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
3082
2989
// Choose the best order.
3083
2990
ArrayRef<unsigned > BestOrder = OrdersUses.begin ()->first ;
3084
2991
unsigned Cnt = OrdersUses.begin ()->second ;
3085
- for (const auto &Pair : drop_begin (OrdersUses)) {
2992
+ for (const auto &Pair : llvm:: drop_begin (OrdersUses)) {
3086
2993
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first .empty ())) {
3087
2994
BestOrder = Pair.first ;
3088
2995
Cnt = Pair.second ;
@@ -3125,13 +3032,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
3125
3032
}
3126
3033
// For gathers just need to reorder its scalars.
3127
3034
for (TreeEntry *Gather : GatherOps) {
3035
+ if (!Gather->ReuseShuffleIndices .empty ())
3036
+ continue ;
3128
3037
assert (Gather->ReorderIndices .empty () &&
3129
3038
" Unexpected reordering of gathers." );
3130
- if (!Gather->ReuseShuffleIndices .empty ()) {
3131
- // Just reorder reuses indices.
3132
- reorderReuses (Gather->ReuseShuffleIndices , Mask);
3133
- continue ;
3134
- }
3135
3039
reorderScalars (Gather->Scalars , Mask);
3136
3040
OrderedEntries.remove (Gather);
3137
3041
}
@@ -7465,7 +7369,9 @@ struct SLPVectorizer : public FunctionPass {
7465
7369
initializeSLPVectorizerPass (*PassRegistry::getPassRegistry ());
7466
7370
}
7467
7371
7468
- bool doInitialization (Module &M) override { return false ; }
7372
+ bool doInitialization (Module &M) override {
7373
+ return false ;
7374
+ }
7469
7375
7470
7376
bool runOnFunction (Function &F) override {
7471
7377
if (skipFunction (F))
0 commit comments