@@ -4063,6 +4063,15 @@ class BoUpSLP {
4063
4063
}
4064
4064
#endif
4065
4065
4066
+ /// Create a new gather TreeEntry
4067
+ TreeEntry *newGatherTreeEntry(ArrayRef<Value *> VL,
4068
+ const InstructionsState &S,
4069
+ const EdgeInfo &UserTreeIdx,
4070
+ ArrayRef<int> ReuseShuffleIndices = {}) {
4071
+ auto Invalid = ScheduleBundle::invalid();
4072
+ return newTreeEntry(VL, Invalid, S, UserTreeIdx, ReuseShuffleIndices);
4073
+ }
4074
+
4066
4075
/// Create a new VectorizableTree entry.
4067
4076
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, ScheduleBundle &Bundle,
4068
4077
const InstructionsState &S,
@@ -4251,13 +4260,34 @@ class BoUpSLP {
4251
4260
bool areAltOperandsProfitable(const InstructionsState &S,
4252
4261
ArrayRef<Value *> VL) const;
4253
4262
4263
+ /// Contains all the outputs of legality analysis for a list of values to
4264
+ /// vectorize.
4265
+ class ScalarsVectorizationLegality {
4266
+ InstructionsState S;
4267
+ bool IsLegal;
4268
+ bool TryToFindDuplicates;
4269
+ bool TrySplitVectorize;
4270
+
4271
+ public:
4272
+ ScalarsVectorizationLegality(InstructionsState S, bool IsLegal,
4273
+ bool TryToFindDuplicates = true,
4274
+ bool TrySplitVectorize = false)
4275
+ : S(S), IsLegal(IsLegal), TryToFindDuplicates(TryToFindDuplicates),
4276
+ TrySplitVectorize(TrySplitVectorize) {
4277
+ assert((!IsLegal || (S.valid() && TryToFindDuplicates)) &&
4278
+ "Inconsistent state");
4279
+ }
4280
+ const InstructionsState &getInstructionsState() const { return S; };
4281
+ bool isLegal() const { return IsLegal; }
4282
+ bool tryToFindDuplicates() const { return TryToFindDuplicates; }
4283
+ bool trySplitVectorize() const { return TrySplitVectorize; }
4284
+ };
4285
+
4254
4286
/// Checks if the specified list of the instructions/values can be vectorized
4255
4287
/// in general.
4256
- bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
4257
- const EdgeInfo &UserTreeIdx,
4258
- InstructionsState &S,
4259
- bool &TryToFindDuplicates,
4260
- bool &TrySplitVectorize) const;
4288
+ ScalarsVectorizationLegality
4289
+ getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
4290
+ const EdgeInfo &UserTreeIdx) const;
4261
4291
4262
4292
/// Checks if the specified list of the instructions/values can be vectorized
4263
4293
/// and fills required data before actual scheduling of the instructions.
@@ -9734,25 +9764,21 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
9734
9764
return true;
9735
9765
}
9736
9766
9737
- bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9738
- const EdgeInfo &UserTreeIdx,
9739
- InstructionsState &S,
9740
- bool &TryToFindDuplicates,
9741
- bool &TrySplitVectorize) const {
9767
+ BoUpSLP::ScalarsVectorizationLegality
9768
+ BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
9769
+ const EdgeInfo &UserTreeIdx) const {
9742
9770
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9743
9771
9744
- S = getSameOpcode(VL, *TLI);
9745
- TryToFindDuplicates = true;
9746
- TrySplitVectorize = false;
9772
+ InstructionsState S = getSameOpcode(VL, *TLI);
9747
9773
9748
9774
// Don't go into catchswitch blocks, which can happen with PHIs.
9749
9775
// Such blocks can only have PHIs and the catchswitch. There is no
9750
9776
// place to insert a shuffle if we need to, so just avoid that issue.
9751
9777
if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
9752
9778
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
9753
9779
// Do not try to pack to avoid extra instructions here.
9754
- TryToFindDuplicates = false;
9755
- return false;
9780
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9781
+ /*TryToFindDuplicates=*/ false) ;
9756
9782
}
9757
9783
9758
9784
// Check if this is a duplicate of another entry.
@@ -9762,14 +9788,14 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9762
9788
if (E->isSame(VL)) {
9763
9789
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
9764
9790
<< ".\n");
9765
- return false;
9791
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9766
9792
}
9767
9793
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
9768
9794
if (all_of(VL, [&](Value *V) {
9769
9795
return isa<PoisonValue>(V) || Values.contains(V);
9770
9796
})) {
9771
9797
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9772
- return false;
9798
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9773
9799
}
9774
9800
}
9775
9801
}
@@ -9786,23 +9812,23 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9786
9812
cast<Instruction>(I)->getOpcode() == S.getOpcode();
9787
9813
})))) {
9788
9814
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
9789
- return false;
9815
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9790
9816
}
9791
9817
9792
9818
// Don't handle scalable vectors
9793
9819
if (S && S.getOpcode() == Instruction::ExtractElement &&
9794
9820
isa<ScalableVectorType>(
9795
9821
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
9796
9822
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
9797
- return false;
9823
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9798
9824
}
9799
9825
9800
9826
// Don't handle vectors.
9801
9827
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
9802
9828
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
9803
9829
// Do not try to pack to avoid extra instructions here.
9804
- TryToFindDuplicates = false;
9805
- return false;
9830
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9831
+ /*TryToFindDuplicates=*/ false) ;
9806
9832
}
9807
9833
9808
9834
// If all of the operands are identical or constant we have a simple solution.
@@ -9892,11 +9918,12 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9892
9918
if (!S) {
9893
9919
LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
9894
9920
"C,S,B,O, small shuffle. \n");
9895
- TrySplitVectorize = true;
9896
- return false;
9921
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
9922
+ /*TryToFindDuplicates=*/true,
9923
+ /*TrySplitVectorize=*/true);
9897
9924
}
9898
9925
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
9899
- return false;
9926
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9900
9927
}
9901
9928
9902
9929
// Don't vectorize ephemeral values.
@@ -9906,8 +9933,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9906
9933
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
9907
9934
<< ") is ephemeral.\n");
9908
9935
// Do not try to pack to avoid extra instructions here.
9909
- TryToFindDuplicates = false;
9910
- return false;
9936
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false,
9937
+ /*TryToFindDuplicates=*/ false) ;
9911
9938
}
9912
9939
}
9913
9940
}
@@ -9956,7 +9983,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9956
9983
if (PreferScalarize) {
9957
9984
LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
9958
9985
"node is not profitable.\n");
9959
- return false;
9986
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9960
9987
}
9961
9988
}
9962
9989
@@ -9965,7 +9992,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9965
9992
for (Value *V : VL) {
9966
9993
if (UserIgnoreList->contains(V)) {
9967
9994
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
9968
- return false;
9995
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9969
9996
}
9970
9997
}
9971
9998
}
@@ -9995,9 +10022,9 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9995
10022
// Do not vectorize EH and non-returning blocks, not profitable in most
9996
10023
// cases.
9997
10024
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
9998
- return false;
10025
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ false) ;
9999
10026
}
10000
- return true;
10027
+ return ScalarsVectorizationLegality(S, /*IsLegal=*/ true) ;
10001
10028
}
10002
10029
10003
10030
void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
@@ -10008,7 +10035,6 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
10008
10035
SmallVector<int> ReuseShuffleIndices;
10009
10036
SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
10010
10037
10011
- InstructionsState S = InstructionsState::invalid();
10012
10038
// Tries to build split node.
10013
10039
auto TrySplitNode = [&](const InstructionsState &LocalState) {
10014
10040
SmallVector<Value *> Op1, Op2;
@@ -10042,22 +10068,20 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
10042
10068
return true;
10043
10069
};
10044
10070
10045
- bool TryToPackDuplicates;
10046
- bool TrySplitVectorize ;
10047
- if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
10048
- TrySplitVectorize )) {
10049
- if (TrySplitVectorize ) {
10071
+ ScalarsVectorizationLegality Legality =
10072
+ getScalarsVectorizationLegality(VL, Depth, UserTreeIdx) ;
10073
+ const InstructionsState &S = Legality.getInstructionsState();
10074
+ if (!Legality.isLegal( )) {
10075
+ if (Legality.trySplitVectorize() ) {
10050
10076
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
10051
10077
// Last chance to try to vectorize alternate node.
10052
10078
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
10053
10079
return;
10054
10080
}
10055
- if (TryToPackDuplicates )
10081
+ if (Legality.tryToFindDuplicates() )
10056
10082
tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
10057
10083
10058
- auto Invalid = ScheduleBundle::invalid();
10059
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10060
- ReuseShuffleIndices);
10084
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
10061
10085
return;
10062
10086
}
10063
10087
@@ -10068,9 +10092,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
10068
10092
// Check that every instruction appears once in this bundle.
10069
10093
if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
10070
10094
/*TryPad=*/true)) {
10071
- auto Invalid = ScheduleBundle::invalid();
10072
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10073
- ReuseShuffleIndices);
10095
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
10074
10096
return;
10075
10097
}
10076
10098
@@ -10083,9 +10105,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
10083
10105
TreeEntry::EntryState State = getScalarsVectorizationState(
10084
10106
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
10085
10107
if (State == TreeEntry::NeedToGather) {
10086
- auto Invalid = ScheduleBundle::invalid();
10087
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10088
- ReuseShuffleIndices);
10108
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
10089
10109
return;
10090
10110
}
10091
10111
@@ -10109,9 +10129,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
10109
10129
// Last chance to try to vectorize alternate node.
10110
10130
if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S))
10111
10131
return;
10112
- auto Invalid = ScheduleBundle::invalid();
10113
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10114
- ReuseShuffleIndices);
10132
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
10115
10133
NonScheduledFirst.insert(VL.front());
10116
10134
if (S.getOpcode() == Instruction::Load &&
10117
10135
BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
0 commit comments