Skip to content

Commit 1eeb2bf

Browse files
committed
[SLP]Do not schedule instructions with constants/argument/phi operands and external users.
No need to schedule entry nodes where all instructions are not memory read/write instructions and their operands are either constants, or arguments, or phis, or instructions from others blocks, or their users are phis or from the other blocks. The resulting vector instructions can be placed at the beginning of the basic block without scheduling (if operands does not need to be scheduled) or at the end of the block (if users are outside of the block). It may save some compile time and scheduling resources. Differential Revision: https://reviews.llvm.org/D121121
1 parent 5977dfb commit 1eeb2bf

28 files changed

+389
-311
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 148 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,57 @@ static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
776776
Scalars[Mask[I]] = Prev[I];
777777
}
778778

779+
/// Checks if the provided value does not require scheduling. It does not
780+
/// require scheduling if this is not an instruction or it is an instruction
781+
/// that does not read/write memory and all operands are either not instructions
782+
/// or phi nodes or instructions from different blocks.
783+
static bool areAllOperandsNonInsts(Value *V) {
784+
auto *I = dyn_cast<Instruction>(V);
785+
if (!I)
786+
return true;
787+
return !I->mayReadOrWriteMemory() && all_of(I->operands(), [I](Value *V) {
788+
auto *IO = dyn_cast<Instruction>(V);
789+
if (!IO)
790+
return true;
791+
return isa<PHINode>(IO) || IO->getParent() != I->getParent();
792+
});
793+
}
794+
795+
/// Checks if the provided value does not require scheduling. It does not
796+
/// require scheduling if this is not an instruction or it is an instruction
797+
/// that does not read/write memory and all users are phi nodes or instructions
798+
/// from the different blocks.
799+
static bool isUsedOutsideBlock(Value *V) {
800+
auto *I = dyn_cast<Instruction>(V);
801+
if (!I)
802+
return true;
803+
// Limits the number of uses to save compile time.
804+
constexpr int UsesLimit = 8;
805+
return !I->mayReadOrWriteMemory() && !I->hasNUsesOrMore(UsesLimit) &&
806+
all_of(I->users(), [I](User *U) {
807+
auto *IU = dyn_cast<Instruction>(U);
808+
if (!IU)
809+
return true;
810+
return IU->getParent() != I->getParent() || isa<PHINode>(IU);
811+
});
812+
}
813+
814+
/// Checks if the specified value does not require scheduling. It does not
815+
/// require scheduling if all operands and all users do not need to be scheduled
816+
/// in the current basic block.
817+
static bool doesNotNeedToBeScheduled(Value *V) {
818+
return areAllOperandsNonInsts(V) && isUsedOutsideBlock(V);
819+
}
820+
821+
/// Checks if the specified array of instructions does not require scheduling.
822+
/// It is so if all either instructions have operands that do not require
823+
/// scheduling or their users do not require scheduling since they are phis or
824+
/// in other basic blocks.
825+
static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
826+
return !VL.empty() &&
827+
(all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts));
828+
}
829+
779830
namespace slpvectorizer {
780831

781832
/// Bottom Up SLP Vectorizer.
@@ -2359,15 +2410,21 @@ class BoUpSLP {
23592410
ScalarToTreeEntry[V] = Last;
23602411
}
23612412
// Update the scheduler bundle to point to this TreeEntry.
2362-
unsigned Lane = 0;
2363-
for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
2364-
BundleMember = BundleMember->NextInBundle) {
2365-
BundleMember->TE = Last;
2366-
BundleMember->Lane = Lane;
2367-
++Lane;
2368-
}
2369-
assert((!Bundle.getValue() || Lane == VL.size()) &&
2413+
ScheduleData *BundleMember = Bundle.getValue();
2414+
assert((BundleMember || isa<PHINode>(S.MainOp) ||
2415+
isVectorLikeInstWithConstOps(S.MainOp) ||
2416+
doesNotNeedToSchedule(VL)) &&
23702417
"Bundle and VL out of sync");
2418+
if (BundleMember) {
2419+
for (Value *V : VL) {
2420+
if (doesNotNeedToBeScheduled(V))
2421+
continue;
2422+
assert(BundleMember && "Unexpected end of bundle.");
2423+
BundleMember->TE = Last;
2424+
BundleMember = BundleMember->NextInBundle;
2425+
}
2426+
}
2427+
assert(!BundleMember && "Bundle and VL out of sync");
23712428
} else {
23722429
MustGather.insert(VL.begin(), VL.end());
23732430
}
@@ -2504,7 +2561,6 @@ class BoUpSLP {
25042561
clearDependencies();
25052562
OpValue = OpVal;
25062563
TE = nullptr;
2507-
Lane = -1;
25082564
}
25092565

25102566
/// Verify basic self consistency properties
@@ -2544,7 +2600,7 @@ class BoUpSLP {
25442600
/// Returns true if it represents an instruction bundle and not only a
25452601
/// single instruction.
25462602
bool isPartOfBundle() const {
2547-
return NextInBundle != nullptr || FirstInBundle != this;
2603+
return NextInBundle != nullptr || FirstInBundle != this || TE;
25482604
}
25492605

25502606
/// Returns true if it is ready for scheduling, i.e. it has no more
@@ -2649,9 +2705,6 @@ class BoUpSLP {
26492705
/// Note that this is negative as long as Dependencies is not calculated.
26502706
int UnscheduledDeps = InvalidDeps;
26512707

2652-
/// The lane of this node in the TreeEntry.
2653-
int Lane = -1;
2654-
26552708
/// True if this instruction is scheduled (or considered as scheduled in the
26562709
/// dry-run).
26572710
bool IsScheduled = false;
@@ -2669,6 +2722,21 @@ class BoUpSLP {
26692722
friend struct DOTGraphTraits<BoUpSLP *>;
26702723

26712724
/// Contains all scheduling data for a basic block.
2725+
/// It does not schedules instructions, which are not memory read/write
2726+
/// instructions and their operands are either constants, or arguments, or
2727+
/// phis, or instructions from others blocks, or their users are phis or from
2728+
/// the other blocks. The resulting vector instructions can be placed at the
2729+
/// beginning of the basic block without scheduling (if operands does not need
2730+
/// to be scheduled) or at the end of the block (if users are outside of the
2731+
/// block). It allows to save some compile time and memory used by the
2732+
/// compiler.
2733+
/// ScheduleData is assigned for each instruction in between the boundaries of
2734+
/// the tree entry, even for those, which are not part of the graph. It is
2735+
/// required to correctly follow the dependencies between the instructions and
2736+
/// their correct scheduling. The ScheduleData is not allocated for the
2737+
/// instructions, which do not require scheduling, like phis, nodes with
2738+
/// extractelements/insertelements only or nodes with instructions, with
2739+
/// uses/operands outside of the block.
26722740
struct BlockScheduling {
26732741
BlockScheduling(BasicBlock *BB)
26742742
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
@@ -2696,7 +2764,7 @@ class BoUpSLP {
26962764
if (BB != I->getParent())
26972765
// Avoid lookup if can't possibly be in map.
26982766
return nullptr;
2699-
ScheduleData *SD = ScheduleDataMap[I];
2767+
ScheduleData *SD = ScheduleDataMap.lookup(I);
27002768
if (SD && isInSchedulingRegion(SD))
27012769
return SD;
27022770
return nullptr;
@@ -2713,7 +2781,7 @@ class BoUpSLP {
27132781
return getScheduleData(V);
27142782
auto I = ExtraScheduleDataMap.find(V);
27152783
if (I != ExtraScheduleDataMap.end()) {
2716-
ScheduleData *SD = I->second[Key];
2784+
ScheduleData *SD = I->second.lookup(Key);
27172785
if (SD && isInSchedulingRegion(SD))
27182786
return SD;
27192787
}
@@ -2735,7 +2803,7 @@ class BoUpSLP {
27352803
BundleMember = BundleMember->NextInBundle) {
27362804
if (BundleMember->Inst != BundleMember->OpValue)
27372805
continue;
2738-
2806+
27392807
// Handle the def-use chain dependencies.
27402808

27412809
// Decrement the unscheduled counter and insert to ready list if ready.
@@ -2760,7 +2828,9 @@ class BoUpSLP {
27602828
// reordered during buildTree(). We therefore need to get its operands
27612829
// through the TreeEntry.
27622830
if (TreeEntry *TE = BundleMember->TE) {
2763-
int Lane = BundleMember->Lane;
2831+
// Need to search for the lane since the tree entry can be reordered.
2832+
int Lane = std::distance(TE->Scalars.begin(),
2833+
find(TE->Scalars, BundleMember->Inst));
27642834
assert(Lane >= 0 && "Lane not set");
27652835

27662836
// Since vectorization tree is being built recursively this assertion
@@ -2769,7 +2839,7 @@ class BoUpSLP {
27692839
// where their second (immediate) operand is not added. Since
27702840
// immediates do not affect scheduler behavior this is considered
27712841
// okay.
2772-
auto *In = TE->getMainOp();
2842+
auto *In = BundleMember->Inst;
27732843
assert(In &&
27742844
(isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
27752845
In->getNumOperands() == TE->getNumOperands()) &&
@@ -2814,7 +2884,8 @@ class BoUpSLP {
28142884

28152885
for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
28162886
auto *SD = getScheduleData(I);
2817-
assert(SD && "primary scheduledata must exist in window");
2887+
if (!SD)
2888+
continue;
28182889
assert(isInSchedulingRegion(SD) &&
28192890
"primary schedule data not in window?");
28202891
assert(isInSchedulingRegion(SD->FirstInBundle) &&
@@ -3856,6 +3927,22 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
38563927
return LoadsState::Gather;
38573928
}
38583929

3930+
/// \return true if the specified list of values has only one instruction that
3931+
/// requires scheduling, false otherwise.
3932+
static bool needToScheduleSingleInstruction(ArrayRef<Value *> VL) {
3933+
Value *NeedsScheduling = nullptr;
3934+
for (Value *V : VL) {
3935+
if (doesNotNeedToBeScheduled(V))
3936+
continue;
3937+
if (!NeedsScheduling) {
3938+
NeedsScheduling = V;
3939+
continue;
3940+
}
3941+
return false;
3942+
}
3943+
return NeedsScheduling;
3944+
}
3945+
38593946
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
38603947
const EdgeInfo &UserTreeIdx) {
38613948
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -6398,6 +6485,20 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
63986485
return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
63996486
}));
64006487

6488+
// Set the insert point to the beginning of the basic block if the entry
6489+
// should not be scheduled.
6490+
if (E->State != TreeEntry::NeedToGather &&
6491+
doesNotNeedToSchedule(E->Scalars)) {
6492+
BasicBlock::iterator InsertPt;
6493+
if (all_of(E->Scalars, isUsedOutsideBlock))
6494+
InsertPt = BB->getTerminator()->getIterator();
6495+
else
6496+
InsertPt = BB->getFirstInsertionPt();
6497+
Builder.SetInsertPoint(BB, InsertPt);
6498+
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
6499+
return;
6500+
}
6501+
64016502
// The last instruction in the bundle in program order.
64026503
Instruction *LastInst = nullptr;
64036504

@@ -6406,8 +6507,10 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
64066507
// VL.back() and iterate over schedule data until we reach the end of the
64076508
// bundle. The end of the bundle is marked by null ScheduleData.
64086509
if (BlocksSchedules.count(BB)) {
6409-
auto *Bundle =
6410-
BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
6510+
Value *V = E->isOneOf(E->Scalars.back());
6511+
if (doesNotNeedToBeScheduled(V))
6512+
V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
6513+
auto *Bundle = BlocksSchedules[BB]->getScheduleData(V);
64116514
if (Bundle && Bundle->isPartOfBundle())
64126515
for (; Bundle; Bundle = Bundle->NextInBundle)
64136516
if (Bundle->OpValue == Bundle->Inst)
@@ -7633,9 +7736,11 @@ void BoUpSLP::optimizeGatherSequence() {
76337736

76347737
BoUpSLP::ScheduleData *
76357738
BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
7636-
ScheduleData *Bundle = nullptr;
7739+
ScheduleData *Bundle = nullptr;
76377740
ScheduleData *PrevInBundle = nullptr;
76387741
for (Value *V : VL) {
7742+
if (doesNotNeedToBeScheduled(V))
7743+
continue;
76397744
ScheduleData *BundleMember = getScheduleData(V);
76407745
assert(BundleMember &&
76417746
"no ScheduleData for bundle member "
@@ -7663,7 +7768,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
76637768
const InstructionsState &S) {
76647769
// No need to schedule PHIs, insertelement, extractelement and extractvalue
76657770
// instructions.
7666-
if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue))
7771+
if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue) ||
7772+
doesNotNeedToSchedule(VL))
76677773
return nullptr;
76687774

76697775
// Initialize the instruction bundle.
@@ -7709,6 +7815,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
77097815
// Make sure that the scheduling region contains all
77107816
// instructions of the bundle.
77117817
for (Value *V : VL) {
7818+
if (doesNotNeedToBeScheduled(V))
7819+
continue;
77127820
if (!extendSchedulingRegion(V, S)) {
77137821
// If the scheduling region got new instructions at the lower end (or it
77147822
// is a new region for the first bundle). This makes it necessary to
@@ -7723,6 +7831,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
77237831

77247832
bool ReSchedule = false;
77257833
for (Value *V : VL) {
7834+
if (doesNotNeedToBeScheduled(V))
7835+
continue;
77267836
ScheduleData *BundleMember = getScheduleData(V);
77277837
assert(BundleMember &&
77287838
"no ScheduleData for bundle member (maybe not in same basic block)");
@@ -7752,14 +7862,18 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
77527862

77537863
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
77547864
Value *OpValue) {
7755-
if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue))
7865+
if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue) ||
7866+
doesNotNeedToSchedule(VL))
77567867
return;
77577868

7869+
if (doesNotNeedToBeScheduled(OpValue))
7870+
OpValue = *find_if_not(VL, doesNotNeedToBeScheduled);
77587871
ScheduleData *Bundle = getScheduleData(OpValue);
77597872
LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
77607873
assert(!Bundle->IsScheduled &&
77617874
"Can't cancel bundle which is already scheduled");
7762-
assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
7875+
assert(Bundle->isSchedulingEntity() &&
7876+
(Bundle->isPartOfBundle() || needToScheduleSingleInstruction(VL)) &&
77637877
"tried to unbundle something which is not a bundle");
77647878

77657879
// Remove the bundle from the ready list.
@@ -7773,6 +7887,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
77737887
BundleMember->FirstInBundle = BundleMember;
77747888
ScheduleData *Next = BundleMember->NextInBundle;
77757889
BundleMember->NextInBundle = nullptr;
7890+
BundleMember->TE = nullptr;
77767891
if (BundleMember->unscheduledDepsInBundle() == 0) {
77777892
ReadyInsts.insert(BundleMember);
77787893
}
@@ -7796,6 +7911,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
77967911
Instruction *I = dyn_cast<Instruction>(V);
77977912
assert(I && "bundle member must be an instruction");
77987913
assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
7914+
!doesNotNeedToBeScheduled(I) &&
77997915
"phi nodes/insertelements/extractelements/extractvalues don't need to "
78007916
"be scheduled");
78017917
auto &&CheckScheduleForI = [this, &S](Instruction *I) -> bool {
@@ -7872,7 +7988,10 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
78727988
ScheduleData *NextLoadStore) {
78737989
ScheduleData *CurrentLoadStore = PrevLoadStore;
78747990
for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
7875-
ScheduleData *SD = ScheduleDataMap[I];
7991+
// No need to allocate data for non-schedulable instructions.
7992+
if (doesNotNeedToBeScheduled(I))
7993+
continue;
7994+
ScheduleData *SD = ScheduleDataMap.lookup(I);
78767995
if (!SD) {
78777996
SD = allocateScheduleDataChunks();
78787997
ScheduleDataMap[I] = SD;
@@ -8056,8 +8175,10 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
80568175
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
80578176
I = I->getNextNode()) {
80588177
BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
8178+
TreeEntry *SDTE = getTreeEntry(SD->Inst);
80598179
assert((isVectorLikeInstWithConstOps(SD->Inst) ||
8060-
SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
8180+
SD->isPartOfBundle() ==
8181+
(SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) &&
80618182
"scheduler and vectorizer bundle mismatch");
80628183
SD->FirstInBundle->SchedulingPriority = Idx++;
80638184
if (SD->isSchedulingEntity()) {

0 commit comments

Comments
 (0)