@@ -9531,21 +9531,25 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
9531
9531
}
9532
9532
9533
9533
/// Checks that every instruction appears once in the list and if not, packs
9534
- /// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9535
- /// extended by poison values to the whole register size.
9534
+ /// them, building \p ReuseShuffleIndices mask and mutating \p VL. The list of
9535
+ /// unique scalars is extended by poison values to the whole register size.
9536
+ ///
9537
+ /// \returns false if \p VL could not be uniquified, in which case \p VL is
9538
+ /// unchanged and \p ReuseShuffleIndices is empty.
9536
9539
static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
9537
9540
SmallVectorImpl<int> &ReuseShuffleIndices,
9538
9541
const TargetTransformInfo &TTI,
9539
9542
const TargetLibraryInfo &TLI,
9540
9543
const InstructionsState &S,
9541
9544
const BoUpSLP::EdgeInfo &UserTreeIdx,
9542
- bool DoNotFail ) {
9545
+ bool TryPad = false ) {
9543
9546
// Check that every instruction appears once in this bundle.
9544
9547
SmallVector<Value *> UniqueValues;
9545
- SmallVector<Value *> NonUniqueValueVL;
9546
9548
SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
9547
9549
for (Value *V : VL) {
9548
9550
if (isConstant(V)) {
9551
+ // Constants are always considered distinct, even if the same constant
9552
+ // appears multiple times in VL.
9549
9553
ReuseShuffleIndices.emplace_back(
9550
9554
isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
9551
9555
UniqueValues.emplace_back(V);
@@ -9556,55 +9560,67 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
9556
9560
if (Res.second)
9557
9561
UniqueValues.emplace_back(V);
9558
9562
}
9563
+
9564
+ // Easy case: VL has unique values and a "natural" size
9559
9565
size_t NumUniqueScalarValues = UniqueValues.size();
9560
9566
bool IsFullVectors = hasFullVectorsOrPowerOf2(
9561
9567
TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
9562
9568
if (NumUniqueScalarValues == VL.size() &&
9563
9569
(VectorizeNonPowerOf2 || IsFullVectors)) {
9564
9570
ReuseShuffleIndices.clear();
9565
- } else {
9566
- // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9567
- if ((UserTreeIdx.UserTE &&
9568
- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9569
- !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9570
- LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9571
- "for nodes with padding.\n");
9572
- return false;
9573
- }
9574
- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9575
- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9576
- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9577
- return isa<UndefValue>(V) || !isConstant(V);
9578
- }))) {
9579
- if (DoNotFail && UniquePositions.size() > 1 &&
9580
- NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9581
- all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9582
- // Find the number of elements, which forms full vectors.
9583
- unsigned PWSz = getFullVectorNumberOfElements(
9584
- TTI, UniqueValues.front()->getType(), UniqueValues.size());
9585
- PWSz = std::min<unsigned>(PWSz, VL.size());
9586
- if (PWSz == VL.size()) {
9571
+ return true;
9572
+ }
9573
+
9574
+ // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9575
+ if ((UserTreeIdx.UserTE &&
9576
+ UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9577
+ !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9578
+ LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9579
+ "for nodes with padding.\n");
9580
+ ReuseShuffleIndices.clear();
9581
+ return false;
9582
+ }
9583
+
9584
+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9585
+ if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9586
+ (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9587
+ return isa<UndefValue>(V) || !isConstant(V);
9588
+ }))) {
9589
+ if (TryPad && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 &&
9590
+ S.getMainOp()->isSafeToRemove() &&
9591
+ all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9592
+ // Find the number of elements, which forms full vectors.
9593
+ unsigned PWSz = getFullVectorNumberOfElements(
9594
+ TTI, UniqueValues.front()->getType(), UniqueValues.size());
9595
+ PWSz = std::min<unsigned>(PWSz, VL.size());
9596
+ if (PWSz == VL.size()) {
9597
+ // We ended up with the same size after removing duplicates and
9598
+ // upgrading the resulting vector size to a "nice size". Just keep
9599
+ // the initial VL then.
9600
+ ReuseShuffleIndices.clear();
9601
+ } else {
9602
+ // Pad unique values with poison to grow the vector to a "nice" size
9603
+ SmallVector<Value *> PaddedUniqueValues(UniqueValues.begin(),
9604
+ UniqueValues.end());
9605
+ PaddedUniqueValues.append(
9606
+ PWSz - UniqueValues.size(),
9607
+ PoisonValue::get(UniqueValues.front()->getType()));
9608
+ // Check that extended with poisons operations are still valid for
9609
+ // vectorization (div/rem are not allowed).
9610
+ if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
9611
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9587
9612
ReuseShuffleIndices.clear();
9588
- } else {
9589
- NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9590
- NonUniqueValueVL.append(
9591
- PWSz - UniqueValues.size(),
9592
- PoisonValue::get(UniqueValues.front()->getType()));
9593
- // Check that extended with poisons operations are still valid for
9594
- // vectorization (div/rem are not allowed).
9595
- if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) {
9596
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9597
- return false;
9598
- }
9599
- VL = NonUniqueValueVL;
9613
+ return false;
9600
9614
}
9601
- return true ;
9615
+ VL = std::move(PaddedUniqueValues) ;
9602
9616
}
9603
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9604
- return false;
9617
+ return true;
9605
9618
}
9606
- VL = UniqueValues;
9619
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9620
+ ReuseShuffleIndices.clear();
9621
+ return false;
9607
9622
}
9623
+ VL = std::move(UniqueValues);
9608
9624
return true;
9609
9625
}
9610
9626
@@ -10005,24 +10021,13 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
10005
10021
return true;
10006
10022
}
10007
10023
10008
- void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL , unsigned Depth,
10024
+ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef , unsigned Depth,
10009
10025
const EdgeInfo &UserTreeIdx,
10010
10026
unsigned InterleaveFactor) {
10011
- assert((allConstant(VL ) || allSameType(VL )) && "Invalid types!");
10027
+ assert((allConstant(VLRef ) || allSameType(VLRef )) && "Invalid types!");
10012
10028
10013
10029
SmallVector<int> ReuseShuffleIndices;
10014
- SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
10015
- auto TryToFindDuplicates = [&](const InstructionsState &S,
10016
- bool DoNotFail = false) {
10017
- if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
10018
- S, UserTreeIdx, DoNotFail)) {
10019
- VL = NonUniqueValueVL;
10020
- return true;
10021
- }
10022
- auto Invalid = ScheduleBundle::invalid();
10023
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
10024
- return false;
10025
- };
10030
+ SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
10026
10031
10027
10032
InstructionsState S = InstructionsState::invalid();
10028
10033
// Tries to build split node.
@@ -10068,11 +10073,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
10068
10073
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
10069
10074
return;
10070
10075
}
10071
- if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
10072
- auto Invalid = ScheduleBundle::invalid();
10073
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10074
- ReuseShuffleIndices);
10075
- }
10076
+ if (TryToPackDuplicates)
10077
+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
10078
+
10079
+ auto Invalid = ScheduleBundle::invalid();
10080
+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10081
+ ReuseShuffleIndices);
10076
10082
return;
10077
10083
}
10078
10084
@@ -10081,8 +10087,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
10081
10087
return;
10082
10088
10083
10089
// Check that every instruction appears once in this bundle.
10084
- if (!TryToFindDuplicates(S, /*DoNotFail=*/true))
10090
+ if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
10091
+ /*TryPad=*/true)) {
10092
+ auto Invalid = ScheduleBundle::invalid();
10093
+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10094
+ ReuseShuffleIndices);
10085
10095
return;
10096
+ }
10086
10097
10087
10098
// Perform specific checks for each particular instruction kind.
10088
10099
bool IsScatterVectorizeUserTE =
@@ -10125,7 +10136,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
10125
10136
NonScheduledFirst.insert(VL.front());
10126
10137
if (S.getOpcode() == Instruction::Load &&
10127
10138
BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
10128
- registerNonVectorizableLoads(VL );
10139
+ registerNonVectorizableLoads(ArrayRef(VL) );
10129
10140
return;
10130
10141
}
10131
10142
ScheduleBundle Empty;
0 commit comments