@@ -766,6 +766,12 @@ class BoUpSLP {
766
766
// / Perform LICM and CSE on the newly generated gather sequences.
767
767
void optimizeGatherSequence ();
768
768
769
+ // / Checks if the specified gather tree entry \p TE can be represented as a
770
+ // / shuffled vector entry + (possibly) permutation with other gathers. It
771
+ // / implements the checks only for possibly ordered scalars (Loads,
772
+ // / ExtractElement, ExtractValue), which can be part of the graph.
773
+ Optional<OrdersType> findReusedOrderedScalars (const TreeEntry &TE);
774
+
769
775
// / Reorders the current graph to the most profitable order starting from the
770
776
// / root node to the leaf nodes. The best order is chosen only from the nodes
771
777
// / of the same size (vectorization factor). Smaller nodes are considered
@@ -2670,6 +2676,72 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
2670
2676
fixupOrderingIndices (Order);
2671
2677
}
2672
2678
2679
+ Optional<BoUpSLP::OrdersType>
2680
+ BoUpSLP::findReusedOrderedScalars (const BoUpSLP::TreeEntry &TE) {
2681
+ assert (TE.State == TreeEntry::NeedToGather && " Expected gather node only." );
2682
+ unsigned NumScalars = TE.Scalars .size ();
2683
+ OrdersType CurrentOrder (NumScalars, NumScalars);
2684
+ SmallVector<int > Positions;
2685
+ SmallBitVector UsedPositions (NumScalars);
2686
+ const TreeEntry *STE = nullptr ;
2687
+ // Try to find all gathered scalars that are gets vectorized in other
2688
+ // vectorize node. Here we can have only one single tree vector node to
2689
+ // correctly identify order of the gathered scalars.
2690
+ for (unsigned I = 0 ; I < NumScalars; ++I) {
2691
+ Value *V = TE.Scalars [I];
2692
+ if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
2693
+ continue ;
2694
+ if (const auto *LocalSTE = getTreeEntry (V)) {
2695
+ if (!STE)
2696
+ STE = LocalSTE;
2697
+ else if (STE != LocalSTE)
2698
+ // Take the order only from the single vector node.
2699
+ return None;
2700
+ unsigned Lane =
2701
+ std::distance (STE->Scalars .begin (), find (STE->Scalars , V));
2702
+ if (Lane >= NumScalars)
2703
+ return None;
2704
+ if (CurrentOrder[Lane] != NumScalars) {
2705
+ if (Lane != I)
2706
+ continue ;
2707
+ UsedPositions.reset (CurrentOrder[Lane]);
2708
+ }
2709
+ // The partial identity (where only some elements of the gather node are
2710
+ // in the identity order) is good.
2711
+ CurrentOrder[Lane] = I;
2712
+ UsedPositions.set (I);
2713
+ }
2714
+ }
2715
+ // Need to keep the order if we have a vector entry and at least 2 scalars or
2716
+ // the vectorized entry has just 2 scalars.
2717
+ if (STE && (UsedPositions.count () > 1 || STE->Scalars .size () == 2 )) {
2718
+ auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned > CurrentOrder) {
2719
+ for (unsigned I = 0 ; I < NumScalars; ++I)
2720
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
2721
+ return false ;
2722
+ return true ;
2723
+ };
2724
+ if (IsIdentityOrder (CurrentOrder)) {
2725
+ CurrentOrder.clear ();
2726
+ return CurrentOrder;
2727
+ }
2728
+ auto *It = CurrentOrder.begin ();
2729
+ for (unsigned I = 0 ; I < NumScalars;) {
2730
+ if (UsedPositions.test (I)) {
2731
+ ++I;
2732
+ continue ;
2733
+ }
2734
+ if (*It == NumScalars) {
2735
+ *It = I;
2736
+ ++I;
2737
+ }
2738
+ ++It;
2739
+ }
2740
+ return CurrentOrder;
2741
+ }
2742
+ return None;
2743
+ }
2744
+
2673
2745
void BoUpSLP::reorderTopToBottom () {
2674
2746
// Maps VF to the graph nodes.
2675
2747
DenseMap<unsigned , SmallPtrSet<TreeEntry *, 4 >> VFToOrderedEntries;
@@ -2689,19 +2761,29 @@ void BoUpSLP::reorderTopToBottom() {
2689
2761
InsertElementInst>(TE->getMainOp ()) &&
2690
2762
!TE->isAltShuffle ()) {
2691
2763
VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2692
- } else if (TE->State == TreeEntry::NeedToGather &&
2693
- TE->getOpcode () == Instruction::ExtractElement &&
2694
- !TE->isAltShuffle () &&
2695
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2696
- ->getVectorOperandType ()) &&
2697
- allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2698
- // Check that gather of extractelements can be represented as
2699
- // just a shuffle of a single vector.
2700
- OrdersType CurrentOrder;
2701
- bool Reuse = canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2702
- if (Reuse || !CurrentOrder.empty ()) {
2764
+ return ;
2765
+ }
2766
+ if (TE->State == TreeEntry::NeedToGather) {
2767
+ if (TE->getOpcode () == Instruction::ExtractElement &&
2768
+ !TE->isAltShuffle () &&
2769
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2770
+ ->getVectorOperandType ()) &&
2771
+ allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2772
+ // Check that gather of extractelements can be represented as
2773
+ // just a shuffle of a single vector.
2774
+ OrdersType CurrentOrder;
2775
+ bool Reuse =
2776
+ canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2777
+ if (Reuse || !CurrentOrder.empty ()) {
2778
+ VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2779
+ GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2780
+ return ;
2781
+ }
2782
+ }
2783
+ if (Optional<OrdersType> CurrentOrder =
2784
+ findReusedOrderedScalars (*TE.get ())) {
2703
2785
VFToOrderedEntries[TE->Scalars .size ()].insert (TE.get ());
2704
- GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2786
+ GathersToOrders.try_emplace (TE.get (), * CurrentOrder);
2705
2787
}
2706
2788
}
2707
2789
});
@@ -2753,7 +2835,7 @@ void BoUpSLP::reorderTopToBottom() {
2753
2835
// Choose the most used order.
2754
2836
ArrayRef<unsigned > BestOrder = OrdersUses.begin ()->first ;
2755
2837
unsigned Cnt = OrdersUses.begin ()->second ;
2756
- for (const auto &Pair : llvm:: drop_begin (OrdersUses)) {
2838
+ for (const auto &Pair : drop_begin (OrdersUses)) {
2757
2839
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first .empty ())) {
2758
2840
BestOrder = Pair.first ;
2759
2841
Cnt = Pair.second ;
@@ -2830,6 +2912,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2830
2912
for_each (VectorizableTree, [this , &OrderedEntries, &GathersToOrders,
2831
2913
&NonVectorized](
2832
2914
const std::unique_ptr<TreeEntry> &TE) {
2915
+ if (TE->State != TreeEntry::Vectorize)
2916
+ NonVectorized.push_back (TE.get ());
2833
2917
// No need to reorder if need to shuffle reuses, still need to shuffle the
2834
2918
// node.
2835
2919
if (!TE->ReuseShuffleIndices .empty ())
@@ -2838,28 +2922,37 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2838
2922
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp ()) &&
2839
2923
!TE->isAltShuffle ()) {
2840
2924
OrderedEntries.insert (TE.get ());
2841
- } else if (TE->State == TreeEntry::NeedToGather &&
2842
- TE->getOpcode () == Instruction::ExtractElement &&
2843
- !TE->isAltShuffle () &&
2844
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2845
- ->getVectorOperandType ()) &&
2846
- allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2847
- // Check that gather of extractelements can be represented as
2848
- // just a shuffle of a single vector with a single user only.
2849
- OrdersType CurrentOrder;
2850
- bool Reuse = canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2851
- if ((Reuse || !CurrentOrder.empty ()) &&
2852
- !any_of (
2853
- VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
2854
- return Entry->State == TreeEntry::NeedToGather &&
2855
- Entry.get () != TE.get () && Entry->isSame (TE->Scalars );
2856
- })) {
2925
+ return ;
2926
+ }
2927
+ if (TE->State == TreeEntry::NeedToGather) {
2928
+ if (TE->getOpcode () == Instruction::ExtractElement &&
2929
+ !TE->isAltShuffle () &&
2930
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp ())
2931
+ ->getVectorOperandType ()) &&
2932
+ allSameType (TE->Scalars ) && allSameBlock (TE->Scalars )) {
2933
+ // Check that gather of extractelements can be represented as
2934
+ // just a shuffle of a single vector with a single user only.
2935
+ OrdersType CurrentOrder;
2936
+ bool Reuse =
2937
+ canReuseExtract (TE->Scalars , TE->getMainOp (), CurrentOrder);
2938
+ if ((Reuse || !CurrentOrder.empty ()) &&
2939
+ !any_of (VectorizableTree,
2940
+ [&TE](const std::unique_ptr<TreeEntry> &Entry) {
2941
+ return Entry->State == TreeEntry::NeedToGather &&
2942
+ Entry.get () != TE.get () &&
2943
+ Entry->isSame (TE->Scalars );
2944
+ })) {
2945
+ OrderedEntries.insert (TE.get ());
2946
+ GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2947
+ return ;
2948
+ }
2949
+ }
2950
+ if (Optional<OrdersType> CurrentOrder =
2951
+ findReusedOrderedScalars (*TE.get ())) {
2857
2952
OrderedEntries.insert (TE.get ());
2858
- GathersToOrders.try_emplace (TE.get (), CurrentOrder);
2953
+ GathersToOrders.try_emplace (TE.get (), * CurrentOrder);
2859
2954
}
2860
2955
}
2861
- if (TE->State != TreeEntry::Vectorize)
2862
- NonVectorized.push_back (TE.get ());
2863
2956
});
2864
2957
2865
2958
// Checks if the operands of the users are reordarable and have only single
@@ -2911,7 +3004,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2911
3004
for (TreeEntry *TE : OrderedEntries) {
2912
3005
if (!(TE->State == TreeEntry::Vectorize ||
2913
3006
(TE->State == TreeEntry::NeedToGather &&
2914
- TE-> getOpcode () == Instruction::ExtractElement )) ||
3007
+ GathersToOrders. count (TE) )) ||
2915
3008
TE->UserTreeIndices .empty () || !TE->ReuseShuffleIndices .empty () ||
2916
3009
!all_of (drop_begin (TE->UserTreeIndices ),
2917
3010
[TE](const EdgeInfo &EI) {
@@ -2989,7 +3082,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
2989
3082
// Choose the best order.
2990
3083
ArrayRef<unsigned > BestOrder = OrdersUses.begin ()->first ;
2991
3084
unsigned Cnt = OrdersUses.begin ()->second ;
2992
- for (const auto &Pair : llvm:: drop_begin (OrdersUses)) {
3085
+ for (const auto &Pair : drop_begin (OrdersUses)) {
2993
3086
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first .empty ())) {
2994
3087
BestOrder = Pair.first ;
2995
3088
Cnt = Pair.second ;
@@ -3032,10 +3125,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
3032
3125
}
3033
3126
// For gathers just need to reorder its scalars.
3034
3127
for (TreeEntry *Gather : GatherOps) {
3035
- if (!Gather->ReuseShuffleIndices .empty ())
3036
- continue ;
3037
3128
assert (Gather->ReorderIndices .empty () &&
3038
3129
" Unexpected reordering of gathers." );
3130
+ if (!Gather->ReuseShuffleIndices .empty ()) {
3131
+ // Just reorder reuses indices.
3132
+ reorderReuses (Gather->ReuseShuffleIndices , Mask);
3133
+ continue ;
3134
+ }
3039
3135
reorderScalars (Gather->Scalars , Mask);
3040
3136
OrderedEntries.remove (Gather);
3041
3137
}
@@ -7369,9 +7465,7 @@ struct SLPVectorizer : public FunctionPass {
7369
7465
initializeSLPVectorizerPass (*PassRegistry::getPassRegistry ());
7370
7466
}
7371
7467
7372
- bool doInitialization (Module &M) override {
7373
- return false ;
7374
- }
7468
+ bool doInitialization (Module &M) override { return false ; }
7375
7469
7376
7470
bool runOnFunction (Function &F) override {
7377
7471
if (skipFunction (F))
0 commit comments