@@ -776,6 +776,57 @@ static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
776
776
Scalars[Mask[I]] = Prev[I];
777
777
}
778
778
779
+ // / Checks if the provided value does not require scheduling. It does not
780
+ // / require scheduling if this is not an instruction or it is an instruction
781
+ // / that does not read/write memory and all operands are either not instructions
782
+ // / or phi nodes or instructions from different blocks.
783
+ static bool areAllOperandsNonInsts (Value *V) {
784
+ auto *I = dyn_cast<Instruction>(V);
785
+ if (!I)
786
+ return true ;
787
+ return !I->mayReadOrWriteMemory () && all_of (I->operands (), [I](Value *V) {
788
+ auto *IO = dyn_cast<Instruction>(V);
789
+ if (!IO)
790
+ return true ;
791
+ return isa<PHINode>(IO) || IO->getParent () != I->getParent ();
792
+ });
793
+ }
794
+
795
+ // / Checks if the provided value does not require scheduling. It does not
796
+ // / require scheduling if this is not an instruction or it is an instruction
797
+ // / that does not read/write memory and all users are phi nodes or instructions
798
+ // / from the different blocks.
799
+ static bool isUsedOutsideBlock (Value *V) {
800
+ auto *I = dyn_cast<Instruction>(V);
801
+ if (!I)
802
+ return true ;
803
+ // Limits the number of uses to save compile time.
804
+ constexpr int UsesLimit = 8 ;
805
+ return !I->mayReadOrWriteMemory () && !I->hasNUsesOrMore (UsesLimit) &&
806
+ all_of (I->users (), [I](User *U) {
807
+ auto *IU = dyn_cast<Instruction>(U);
808
+ if (!IU)
809
+ return true ;
810
+ return IU->getParent () != I->getParent () || isa<PHINode>(IU);
811
+ });
812
+ }
813
+
814
+ // / Checks if the specified value does not require scheduling. It does not
815
+ // / require scheduling if all operands and all users do not need to be scheduled
816
+ // / in the current basic block.
817
+ static bool doesNotNeedToBeScheduled (Value *V) {
818
+ return areAllOperandsNonInsts (V) && isUsedOutsideBlock (V);
819
+ }
820
+
821
+ // / Checks if the specified array of instructions does not require scheduling.
822
+ // / It is so if all either instructions have operands that do not require
823
+ // / scheduling or their users do not require scheduling since they are phis or
824
+ // / in other basic blocks.
825
+ static bool doesNotNeedToSchedule (ArrayRef<Value *> VL) {
826
+ return !VL.empty () &&
827
+ (all_of (VL, isUsedOutsideBlock) || all_of (VL, areAllOperandsNonInsts));
828
+ }
829
+
779
830
namespace slpvectorizer {
780
831
781
832
// / Bottom Up SLP Vectorizer.
@@ -2359,15 +2410,21 @@ class BoUpSLP {
2359
2410
ScalarToTreeEntry[V] = Last;
2360
2411
}
2361
2412
// Update the scheduler bundle to point to this TreeEntry.
2362
- unsigned Lane = 0 ;
2363
- for (ScheduleData *BundleMember = Bundle.getValue (); BundleMember;
2364
- BundleMember = BundleMember->NextInBundle ) {
2365
- BundleMember->TE = Last;
2366
- BundleMember->Lane = Lane;
2367
- ++Lane;
2368
- }
2369
- assert ((!Bundle.getValue () || Lane == VL.size ()) &&
2413
+ ScheduleData *BundleMember = Bundle.getValue ();
2414
+ assert ((BundleMember || isa<PHINode>(S.MainOp ) ||
2415
+ isVectorLikeInstWithConstOps (S.MainOp ) ||
2416
+ doesNotNeedToSchedule (VL)) &&
2370
2417
" Bundle and VL out of sync" );
2418
+ if (BundleMember) {
2419
+ for (Value *V : VL) {
2420
+ if (doesNotNeedToBeScheduled (V))
2421
+ continue ;
2422
+ assert (BundleMember && " Unexpected end of bundle." );
2423
+ BundleMember->TE = Last;
2424
+ BundleMember = BundleMember->NextInBundle ;
2425
+ }
2426
+ }
2427
+ assert (!BundleMember && " Bundle and VL out of sync" );
2371
2428
} else {
2372
2429
MustGather.insert (VL.begin (), VL.end ());
2373
2430
}
@@ -2504,7 +2561,6 @@ class BoUpSLP {
2504
2561
clearDependencies ();
2505
2562
OpValue = OpVal;
2506
2563
TE = nullptr ;
2507
- Lane = -1 ;
2508
2564
}
2509
2565
2510
2566
// / Verify basic self consistency properties
@@ -2544,7 +2600,7 @@ class BoUpSLP {
2544
2600
// / Returns true if it represents an instruction bundle and not only a
2545
2601
// / single instruction.
2546
2602
bool isPartOfBundle () const {
2547
- return NextInBundle != nullptr || FirstInBundle != this ;
2603
+ return NextInBundle != nullptr || FirstInBundle != this || TE ;
2548
2604
}
2549
2605
2550
2606
// / Returns true if it is ready for scheduling, i.e. it has no more
@@ -2649,9 +2705,6 @@ class BoUpSLP {
2649
2705
// / Note that this is negative as long as Dependencies is not calculated.
2650
2706
int UnscheduledDeps = InvalidDeps;
2651
2707
2652
- // / The lane of this node in the TreeEntry.
2653
- int Lane = -1 ;
2654
-
2655
2708
// / True if this instruction is scheduled (or considered as scheduled in the
2656
2709
// / dry-run).
2657
2710
bool IsScheduled = false ;
@@ -2669,6 +2722,21 @@ class BoUpSLP {
2669
2722
friend struct DOTGraphTraits <BoUpSLP *>;
2670
2723
2671
2724
// / Contains all scheduling data for a basic block.
2725
+ // / It does not schedules instructions, which are not memory read/write
2726
+ // / instructions and their operands are either constants, or arguments, or
2727
+ // / phis, or instructions from others blocks, or their users are phis or from
2728
+ // / the other blocks. The resulting vector instructions can be placed at the
2729
+ // / beginning of the basic block without scheduling (if operands does not need
2730
+ // / to be scheduled) or at the end of the block (if users are outside of the
2731
+ // / block). It allows to save some compile time and memory used by the
2732
+ // / compiler.
2733
+ // / ScheduleData is assigned for each instruction in between the boundaries of
2734
+ // / the tree entry, even for those, which are not part of the graph. It is
2735
+ // / required to correctly follow the dependencies between the instructions and
2736
+ // / their correct scheduling. The ScheduleData is not allocated for the
2737
+ // / instructions, which do not require scheduling, like phis, nodes with
2738
+ // / extractelements/insertelements only or nodes with instructions, with
2739
+ // / uses/operands outside of the block.
2672
2740
struct BlockScheduling {
2673
2741
BlockScheduling (BasicBlock *BB)
2674
2742
: BB(BB), ChunkSize(BB->size ()), ChunkPos(ChunkSize) {}
@@ -2696,7 +2764,7 @@ class BoUpSLP {
2696
2764
if (BB != I->getParent ())
2697
2765
// Avoid lookup if can't possibly be in map.
2698
2766
return nullptr ;
2699
- ScheduleData *SD = ScheduleDataMap[I] ;
2767
+ ScheduleData *SD = ScheduleDataMap. lookup (I) ;
2700
2768
if (SD && isInSchedulingRegion (SD))
2701
2769
return SD;
2702
2770
return nullptr ;
@@ -2713,7 +2781,7 @@ class BoUpSLP {
2713
2781
return getScheduleData (V);
2714
2782
auto I = ExtraScheduleDataMap.find (V);
2715
2783
if (I != ExtraScheduleDataMap.end ()) {
2716
- ScheduleData *SD = I->second [ Key] ;
2784
+ ScheduleData *SD = I->second . lookup ( Key) ;
2717
2785
if (SD && isInSchedulingRegion (SD))
2718
2786
return SD;
2719
2787
}
@@ -2735,7 +2803,7 @@ class BoUpSLP {
2735
2803
BundleMember = BundleMember->NextInBundle ) {
2736
2804
if (BundleMember->Inst != BundleMember->OpValue )
2737
2805
continue ;
2738
-
2806
+
2739
2807
// Handle the def-use chain dependencies.
2740
2808
2741
2809
// Decrement the unscheduled counter and insert to ready list if ready.
@@ -2760,7 +2828,9 @@ class BoUpSLP {
2760
2828
// reordered during buildTree(). We therefore need to get its operands
2761
2829
// through the TreeEntry.
2762
2830
if (TreeEntry *TE = BundleMember->TE ) {
2763
- int Lane = BundleMember->Lane ;
2831
+ // Need to search for the lane since the tree entry can be reordered.
2832
+ int Lane = std::distance (TE->Scalars .begin (),
2833
+ find (TE->Scalars , BundleMember->Inst ));
2764
2834
assert (Lane >= 0 && " Lane not set" );
2765
2835
2766
2836
// Since vectorization tree is being built recursively this assertion
@@ -2769,7 +2839,7 @@ class BoUpSLP {
2769
2839
// where their second (immediate) operand is not added. Since
2770
2840
// immediates do not affect scheduler behavior this is considered
2771
2841
// okay.
2772
- auto *In = TE-> getMainOp () ;
2842
+ auto *In = BundleMember-> Inst ;
2773
2843
assert (In &&
2774
2844
(isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
2775
2845
In->getNumOperands () == TE->getNumOperands ()) &&
@@ -2814,7 +2884,8 @@ class BoUpSLP {
2814
2884
2815
2885
for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode ()) {
2816
2886
auto *SD = getScheduleData (I);
2817
- assert (SD && " primary scheduledata must exist in window" );
2887
+ if (!SD)
2888
+ continue ;
2818
2889
assert (isInSchedulingRegion (SD) &&
2819
2890
" primary schedule data not in window?" );
2820
2891
assert (isInSchedulingRegion (SD->FirstInBundle ) &&
@@ -3856,6 +3927,22 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
3856
3927
return LoadsState::Gather;
3857
3928
}
3858
3929
3930
+ // / \return true if the specified list of values has only one instruction that
3931
+ // / requires scheduling, false otherwise.
3932
+ static bool needToScheduleSingleInstruction (ArrayRef<Value *> VL) {
3933
+ Value *NeedsScheduling = nullptr ;
3934
+ for (Value *V : VL) {
3935
+ if (doesNotNeedToBeScheduled (V))
3936
+ continue ;
3937
+ if (!NeedsScheduling) {
3938
+ NeedsScheduling = V;
3939
+ continue ;
3940
+ }
3941
+ return false ;
3942
+ }
3943
+ return NeedsScheduling;
3944
+ }
3945
+
3859
3946
void BoUpSLP::buildTree_rec (ArrayRef<Value *> VL, unsigned Depth,
3860
3947
const EdgeInfo &UserTreeIdx) {
3861
3948
assert ((allConstant (VL) || allSameType (VL)) && " Invalid types!" );
@@ -6398,6 +6485,20 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
6398
6485
return !E->isOpcodeOrAlt (I) || I->getParent () == BB;
6399
6486
}));
6400
6487
6488
+ // Set the insert point to the beginning of the basic block if the entry
6489
+ // should not be scheduled.
6490
+ if (E->State != TreeEntry::NeedToGather &&
6491
+ doesNotNeedToSchedule (E->Scalars )) {
6492
+ BasicBlock::iterator InsertPt;
6493
+ if (all_of (E->Scalars , isUsedOutsideBlock))
6494
+ InsertPt = BB->getTerminator ()->getIterator ();
6495
+ else
6496
+ InsertPt = BB->getFirstInsertionPt ();
6497
+ Builder.SetInsertPoint (BB, InsertPt);
6498
+ Builder.SetCurrentDebugLocation (Front->getDebugLoc ());
6499
+ return ;
6500
+ }
6501
+
6401
6502
// The last instruction in the bundle in program order.
6402
6503
Instruction *LastInst = nullptr ;
6403
6504
@@ -6406,8 +6507,10 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
6406
6507
// VL.back() and iterate over schedule data until we reach the end of the
6407
6508
// bundle. The end of the bundle is marked by null ScheduleData.
6408
6509
if (BlocksSchedules.count (BB)) {
6409
- auto *Bundle =
6410
- BlocksSchedules[BB]->getScheduleData (E->isOneOf (E->Scalars .back ()));
6510
+ Value *V = E->isOneOf (E->Scalars .back ());
6511
+ if (doesNotNeedToBeScheduled (V))
6512
+ V = *find_if_not (E->Scalars , doesNotNeedToBeScheduled);
6513
+ auto *Bundle = BlocksSchedules[BB]->getScheduleData (V);
6411
6514
if (Bundle && Bundle->isPartOfBundle ())
6412
6515
for (; Bundle; Bundle = Bundle->NextInBundle )
6413
6516
if (Bundle->OpValue == Bundle->Inst )
@@ -7633,9 +7736,11 @@ void BoUpSLP::optimizeGatherSequence() {
7633
7736
7634
7737
BoUpSLP::ScheduleData *
7635
7738
BoUpSLP::BlockScheduling::buildBundle (ArrayRef<Value *> VL) {
7636
- ScheduleData *Bundle = nullptr ;
7739
+ ScheduleData *Bundle = nullptr ;
7637
7740
ScheduleData *PrevInBundle = nullptr ;
7638
7741
for (Value *V : VL) {
7742
+ if (doesNotNeedToBeScheduled (V))
7743
+ continue ;
7639
7744
ScheduleData *BundleMember = getScheduleData (V);
7640
7745
assert (BundleMember &&
7641
7746
" no ScheduleData for bundle member "
@@ -7663,7 +7768,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
7663
7768
const InstructionsState &S) {
7664
7769
// No need to schedule PHIs, insertelement, extractelement and extractvalue
7665
7770
// instructions.
7666
- if (isa<PHINode>(S.OpValue ) || isVectorLikeInstWithConstOps (S.OpValue ))
7771
+ if (isa<PHINode>(S.OpValue ) || isVectorLikeInstWithConstOps (S.OpValue ) ||
7772
+ doesNotNeedToSchedule (VL))
7667
7773
return nullptr ;
7668
7774
7669
7775
// Initialize the instruction bundle.
@@ -7709,6 +7815,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
7709
7815
// Make sure that the scheduling region contains all
7710
7816
// instructions of the bundle.
7711
7817
for (Value *V : VL) {
7818
+ if (doesNotNeedToBeScheduled (V))
7819
+ continue ;
7712
7820
if (!extendSchedulingRegion (V, S)) {
7713
7821
// If the scheduling region got new instructions at the lower end (or it
7714
7822
// is a new region for the first bundle). This makes it necessary to
@@ -7723,6 +7831,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
7723
7831
7724
7832
bool ReSchedule = false ;
7725
7833
for (Value *V : VL) {
7834
+ if (doesNotNeedToBeScheduled (V))
7835
+ continue ;
7726
7836
ScheduleData *BundleMember = getScheduleData (V);
7727
7837
assert (BundleMember &&
7728
7838
" no ScheduleData for bundle member (maybe not in same basic block)" );
@@ -7752,14 +7862,18 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
7752
7862
7753
7863
void BoUpSLP::BlockScheduling::cancelScheduling (ArrayRef<Value *> VL,
7754
7864
Value *OpValue) {
7755
- if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps (OpValue))
7865
+ if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps (OpValue) ||
7866
+ doesNotNeedToSchedule (VL))
7756
7867
return ;
7757
7868
7869
+ if (doesNotNeedToBeScheduled (OpValue))
7870
+ OpValue = *find_if_not (VL, doesNotNeedToBeScheduled);
7758
7871
ScheduleData *Bundle = getScheduleData (OpValue);
7759
7872
LLVM_DEBUG (dbgs () << " SLP: cancel scheduling of " << *Bundle << " \n " );
7760
7873
assert (!Bundle->IsScheduled &&
7761
7874
" Can't cancel bundle which is already scheduled" );
7762
- assert (Bundle->isSchedulingEntity () && Bundle->isPartOfBundle () &&
7875
+ assert (Bundle->isSchedulingEntity () &&
7876
+ (Bundle->isPartOfBundle () || needToScheduleSingleInstruction (VL)) &&
7763
7877
" tried to unbundle something which is not a bundle" );
7764
7878
7765
7879
// Remove the bundle from the ready list.
@@ -7773,6 +7887,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
7773
7887
BundleMember->FirstInBundle = BundleMember;
7774
7888
ScheduleData *Next = BundleMember->NextInBundle ;
7775
7889
BundleMember->NextInBundle = nullptr ;
7890
+ BundleMember->TE = nullptr ;
7776
7891
if (BundleMember->unscheduledDepsInBundle () == 0 ) {
7777
7892
ReadyInsts.insert (BundleMember);
7778
7893
}
@@ -7796,6 +7911,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
7796
7911
Instruction *I = dyn_cast<Instruction>(V);
7797
7912
assert (I && " bundle member must be an instruction" );
7798
7913
assert (!isa<PHINode>(I) && !isVectorLikeInstWithConstOps (I) &&
7914
+ !doesNotNeedToBeScheduled (I) &&
7799
7915
" phi nodes/insertelements/extractelements/extractvalues don't need to "
7800
7916
" be scheduled" );
7801
7917
auto &&CheckScheduleForI = [this , &S](Instruction *I) -> bool {
@@ -7872,7 +7988,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
7872
7988
ScheduleData *NextLoadStore) {
7873
7989
ScheduleData *CurrentLoadStore = PrevLoadStore;
7874
7990
for (Instruction *I = FromI; I != ToI; I = I->getNextNode ()) {
7875
- ScheduleData *SD = ScheduleDataMap[I];
7991
+ // No need to allocate data for non-schedulable instructions.
7992
+ if (doesNotNeedToBeScheduled (I))
7993
+ continue ;
7994
+ ScheduleData *SD = ScheduleDataMap.lookup (I);
7876
7995
if (!SD) {
7877
7996
SD = allocateScheduleDataChunks ();
7878
7997
ScheduleDataMap[I] = SD;
@@ -8056,8 +8175,10 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
8056
8175
for (auto *I = BS->ScheduleStart ; I != BS->ScheduleEnd ;
8057
8176
I = I->getNextNode ()) {
8058
8177
BS->doForAllOpcodes (I, [this , &Idx, &NumToSchedule, BS](ScheduleData *SD) {
8178
+ TreeEntry *SDTE = getTreeEntry (SD->Inst );
8059
8179
assert ((isVectorLikeInstWithConstOps (SD->Inst ) ||
8060
- SD->isPartOfBundle () == (getTreeEntry (SD->Inst ) != nullptr )) &&
8180
+ SD->isPartOfBundle () ==
8181
+ (SDTE && !doesNotNeedToSchedule (SDTE->Scalars ))) &&
8061
8182
" scheduler and vectorizer bundle mismatch" );
8062
8183
SD->FirstInBundle ->SchedulingPriority = Idx++;
8063
8184
if (SD->isSchedulingEntity ()) {
0 commit comments