Skip to content

Commit 61d04f1

Browse files
[SLP][NFC]Extract preliminary checks from buildTree_rec, NFC
Moved check from buildTree_rec function to a separate isLegalToVectorizeScalars function. Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #134132
1 parent e762baf commit 61d04f1

File tree

1 file changed

+113
-108
lines changed

1 file changed

+113
-108
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 113 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -3913,6 +3913,14 @@ class BoUpSLP {
39133913
bool areAltOperandsProfitable(const InstructionsState &S,
39143914
ArrayRef<Value *> VL) const;
39153915

3916+
/// Checks if the specified list of the instructions/values can be vectorized
3917+
/// in general.
3918+
bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
3919+
const EdgeInfo &UserTreeIdx,
3920+
InstructionsState &S,
3921+
bool &TryToFindDuplicates,
3922+
bool &TrySplitVectorize) const;
3923+
39163924
/// Checks if the specified list of the instructions/values can be vectorized
39173925
/// and fills required data before actual scheduling of the instructions.
39183926
TreeEntry::EntryState
@@ -9329,35 +9337,25 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
93299337
return true;
93309338
}
93319339

9332-
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9333-
const EdgeInfo &UserTreeIdx,
9334-
unsigned InterleaveFactor) {
9340+
bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9341+
const EdgeInfo &UserTreeIdx,
9342+
InstructionsState &S,
9343+
bool &TryToFindDuplicates,
9344+
bool &TrySplitVectorize) const {
93359345
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
93369346

9337-
SmallVector<int> ReuseShuffleIndices;
9338-
SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9339-
auto TryToFindDuplicates = [&](const InstructionsState &S,
9340-
bool DoNotFail = false) {
9341-
if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9342-
S, UserTreeIdx, DoNotFail)) {
9343-
VL = NonUniqueValueVL;
9344-
return true;
9345-
}
9346-
auto Invalid = ScheduleBundle::invalid();
9347-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9348-
return false;
9349-
};
9350-
9351-
InstructionsState S = getSameOpcode(VL, *TLI);
9347+
S = getSameOpcode(VL, *TLI);
9348+
TryToFindDuplicates = true;
9349+
TrySplitVectorize = false;
93529350

93539351
// Don't go into catchswitch blocks, which can happen with PHIs.
93549352
// Such blocks can only have PHIs and the catchswitch. There is no
93559353
// place to insert a shuffle if we need to, so just avoid that issue.
93569354
if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
93579355
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
9358-
auto Invalid = ScheduleBundle::invalid();
9359-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9360-
return;
9356+
// Do not try to pack to avoid extra instructions here.
9357+
TryToFindDuplicates = false;
9358+
return false;
93619359
}
93629360

93639361
// Check if this is a duplicate of another entry.
@@ -9367,24 +9365,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
93679365
if (E->isSame(VL)) {
93689366
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
93699367
<< ".\n");
9370-
if (TryToFindDuplicates(S)) {
9371-
auto Invalid = ScheduleBundle::invalid();
9372-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9373-
ReuseShuffleIndices);
9374-
}
9375-
return;
9368+
return false;
93769369
}
93779370
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
93789371
if (all_of(VL, [&](Value *V) {
93799372
return isa<PoisonValue>(V) || Values.contains(V);
93809373
})) {
93819374
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9382-
if (TryToFindDuplicates(S)) {
9383-
auto Invalid = ScheduleBundle::invalid();
9384-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9385-
ReuseShuffleIndices);
9386-
}
9387-
return;
9375+
return false;
93889376
}
93899377
}
93909378
}
@@ -9401,75 +9389,31 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
94019389
cast<Instruction>(I)->getOpcode() == S.getOpcode();
94029390
})))) {
94039391
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
9404-
if (TryToFindDuplicates(S)) {
9405-
auto Invalid = ScheduleBundle::invalid();
9406-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9407-
ReuseShuffleIndices);
9408-
}
9409-
return;
9392+
return false;
94109393
}
94119394

94129395
// Don't handle scalable vectors
94139396
if (S && S.getOpcode() == Instruction::ExtractElement &&
94149397
isa<ScalableVectorType>(
94159398
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
94169399
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
9417-
if (TryToFindDuplicates(S)) {
9418-
auto Invalid = ScheduleBundle::invalid();
9419-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9420-
ReuseShuffleIndices);
9421-
}
9422-
return;
9400+
return false;
94239401
}
94249402

94259403
// Don't handle vectors.
94269404
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
94279405
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
9428-
auto Invalid = ScheduleBundle::invalid();
9429-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9430-
return;
9406+
// Do not try to pack to avoid extra instructions here.
9407+
TryToFindDuplicates = false;
9408+
return false;
94319409
}
94329410

9433-
// Tries to build split node.
9434-
auto TrySplitNode = [&](const InstructionsState &LocalState) {
9435-
SmallVector<Value *> Op1, Op2;
9436-
OrdersType ReorderIndices;
9437-
if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9438-
return false;
9439-
9440-
SmallVector<Value *> NewVL(VL.size());
9441-
copy(Op1, NewVL.begin());
9442-
copy(Op2, std::next(NewVL.begin(), Op1.size()));
9443-
auto Invalid = ScheduleBundle::invalid();
9444-
auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9445-
UserTreeIdx, {}, ReorderIndices);
9446-
LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9447-
auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9448-
InstructionsState S = getSameOpcode(Op, *TLI);
9449-
if (S && (isa<LoadInst>(S.getMainOp()) ||
9450-
getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9451-
// Build gather node for loads, they will be gathered later.
9452-
TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9453-
Idx == 0 ? 0 : Op1.size());
9454-
(void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9455-
} else {
9456-
TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9457-
Idx == 0 ? 0 : Op1.size());
9458-
buildTree_rec(Op, Depth, {TE, Idx});
9459-
}
9460-
};
9461-
AddNode(Op1, 0);
9462-
AddNode(Op2, 1);
9463-
return true;
9464-
};
9465-
94669411
// If all of the operands are identical or constant we have a simple solution.
94679412
// If we deal with insert/extract instructions, they all must have constant
94689413
// indices, otherwise we should gather them, not try to vectorize.
94699414
// If alternate op node with 2 elements with gathered operands - do not
94709415
// vectorize.
9471-
auto &&NotProfitableForVectorization = [&S, this,
9472-
Depth](ArrayRef<Value *> VL) {
9416+
auto NotProfitableForVectorization = [&S, this, Depth](ArrayRef<Value *> VL) {
94739417
if (!S || !S.isAltShuffle() || VL.size() > 2)
94749418
return false;
94759419
if (VectorizableTree.size() < MinTreeSize)
@@ -9549,18 +9493,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
95499493
!all_of(VL, isVectorLikeInstWithConstOps)) ||
95509494
NotProfitableForVectorization(VL)) {
95519495
if (!S) {
9552-
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9553-
// Last chance to try to vectorize alternate node.
9554-
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9555-
return;
9496+
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
9497+
"C,S,B,O, small shuffle. \n");
9498+
TrySplitVectorize = true;
9499+
return false;
95569500
}
95579501
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
9558-
if (TryToFindDuplicates(S)) {
9559-
auto Invalid = ScheduleBundle::invalid();
9560-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9561-
ReuseShuffleIndices);
9562-
}
9563-
return;
9502+
return false;
95649503
}
95659504

95669505
// Don't vectorize ephemeral values.
@@ -9569,9 +9508,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
95699508
if (EphValues.count(V)) {
95709509
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
95719510
<< ") is ephemeral.\n");
9572-
auto Invalid = ScheduleBundle::invalid();
9573-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9574-
return;
9511+
// Do not try to pack to avoid extra instructions here.
9512+
TryToFindDuplicates = false;
9513+
return false;
95759514
}
95769515
}
95779516
}
@@ -9620,12 +9559,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96209559
if (PreferScalarize) {
96219560
LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
96229561
"node is not profitable.\n");
9623-
if (TryToFindDuplicates(S)) {
9624-
auto Invalid = ScheduleBundle::invalid();
9625-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9626-
ReuseShuffleIndices);
9627-
}
9628-
return;
9562+
return false;
96299563
}
96309564
}
96319565

@@ -9634,12 +9568,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96349568
for (Value *V : VL) {
96359569
if (UserIgnoreList->contains(V)) {
96369570
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
9637-
if (TryToFindDuplicates(S)) {
9638-
auto Invalid = ScheduleBundle::invalid();
9639-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9640-
ReuseShuffleIndices);
9641-
}
9642-
return;
9571+
return false;
96439572
}
96449573
}
96459574
}
@@ -9669,8 +9598,79 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96699598
// Do not vectorize EH and non-returning blocks, not profitable in most
96709599
// cases.
96719600
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
9601+
return false;
9602+
}
9603+
return true;
9604+
}
9605+
9606+
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9607+
const EdgeInfo &UserTreeIdx,
9608+
unsigned InterleaveFactor) {
9609+
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9610+
9611+
SmallVector<int> ReuseShuffleIndices;
9612+
SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9613+
auto TryToFindDuplicates = [&](const InstructionsState &S,
9614+
bool DoNotFail = false) {
9615+
if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9616+
S, UserTreeIdx, DoNotFail)) {
9617+
VL = NonUniqueValueVL;
9618+
return true;
9619+
}
96729620
auto Invalid = ScheduleBundle::invalid();
96739621
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9622+
return false;
9623+
};
9624+
9625+
InstructionsState S = InstructionsState::invalid();
9626+
// Tries to build split node.
9627+
auto TrySplitNode = [&](const InstructionsState &LocalState) {
9628+
SmallVector<Value *> Op1, Op2;
9629+
OrdersType ReorderIndices;
9630+
if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9631+
return false;
9632+
9633+
SmallVector<Value *> NewVL(VL.size());
9634+
copy(Op1, NewVL.begin());
9635+
copy(Op2, std::next(NewVL.begin(), Op1.size()));
9636+
auto Invalid = ScheduleBundle::invalid();
9637+
auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9638+
UserTreeIdx, {}, ReorderIndices);
9639+
LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9640+
auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9641+
InstructionsState S = getSameOpcode(Op, *TLI);
9642+
if (S && (isa<LoadInst>(S.getMainOp()) ||
9643+
getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9644+
// Build gather node for loads, they will be gathered later.
9645+
TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9646+
Idx == 0 ? 0 : Op1.size());
9647+
(void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9648+
} else {
9649+
TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9650+
Idx == 0 ? 0 : Op1.size());
9651+
buildTree_rec(Op, Depth, {TE, Idx});
9652+
}
9653+
};
9654+
AddNode(Op1, 0);
9655+
AddNode(Op2, 1);
9656+
return true;
9657+
};
9658+
9659+
bool TryToPackDuplicates;
9660+
bool TrySplitVectorize;
9661+
if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
9662+
TrySplitVectorize)) {
9663+
if (TrySplitVectorize) {
9664+
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9665+
// Last chance to try to vectorize alternate node.
9666+
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9667+
return;
9668+
}
9669+
if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
9670+
auto Invalid = ScheduleBundle::invalid();
9671+
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9672+
ReuseShuffleIndices);
9673+
}
96749674
return;
96759675
}
96769676

@@ -9683,6 +9683,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96839683
return;
96849684

96859685
// Perform specific checks for each particular instruction kind.
9686+
bool IsScatterVectorizeUserTE =
9687+
UserTreeIdx.UserTE &&
9688+
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
96869689
OrdersType CurrentOrder;
96879690
SmallVector<Value *> PointerOps;
96889691
TreeEntry::EntryState State = getScalarsVectorizationState(
@@ -9694,6 +9697,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96949697
return;
96959698
}
96969699

9700+
Instruction *VL0 = S.getMainOp();
9701+
BasicBlock *BB = VL0->getParent();
96979702
auto &BSRef = BlocksSchedules[BB];
96989703
if (!BSRef)
96999704
BSRef = std::make_unique<BlockScheduling>(BB);

0 commit comments

Comments
 (0)