@@ -9062,87 +9062,101 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
9062
9062
return std::make_pair(MainOp, AltOp);
9063
9063
}
9064
9064
9065
+ /// Checks that every instruction appears once in the list and if not, packs
9066
+ /// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9067
+ /// extended by poison values to the whole register size.
9068
+ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
9069
+ SmallVectorImpl<int> &ReuseShuffleIndices,
9070
+ const TargetTransformInfo &TTI,
9071
+ const TargetLibraryInfo &TLI,
9072
+ const InstructionsState &S,
9073
+ const BoUpSLP::EdgeInfo &UserTreeIdx,
9074
+ bool DoNotFail) {
9075
+ // Check that every instruction appears once in this bundle.
9076
+ SmallVector<Value *> UniqueValues;
9077
+ SmallVector<Value *> NonUniqueValueVL;
9078
+ SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
9079
+ for (Value *V : VL) {
9080
+ if (isConstant(V)) {
9081
+ ReuseShuffleIndices.emplace_back(
9082
+ isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
9083
+ UniqueValues.emplace_back(V);
9084
+ continue;
9085
+ }
9086
+ auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
9087
+ ReuseShuffleIndices.emplace_back(Res.first->second);
9088
+ if (Res.second)
9089
+ UniqueValues.emplace_back(V);
9090
+ }
9091
+ size_t NumUniqueScalarValues = UniqueValues.size();
9092
+ bool IsFullVectors = hasFullVectorsOrPowerOf2(
9093
+ TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
9094
+ if (NumUniqueScalarValues == VL.size() &&
9095
+ (VectorizeNonPowerOf2 || IsFullVectors)) {
9096
+ ReuseShuffleIndices.clear();
9097
+ } else {
9098
+ // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9099
+ if ((UserTreeIdx.UserTE &&
9100
+ UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9101
+ !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9102
+ LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9103
+ "for nodes with padding.\n");
9104
+ return false;
9105
+ }
9106
+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9107
+ if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9108
+ (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9109
+ return isa<UndefValue>(V) || !isConstant(V);
9110
+ }))) {
9111
+ if (DoNotFail && UniquePositions.size() > 1 &&
9112
+ NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9113
+ all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9114
+ // Find the number of elements, which forms full vectors.
9115
+ unsigned PWSz = getFullVectorNumberOfElements(
9116
+ TTI, UniqueValues.front()->getType(), UniqueValues.size());
9117
+ PWSz = std::min<unsigned>(PWSz, VL.size());
9118
+ if (PWSz == VL.size()) {
9119
+ ReuseShuffleIndices.clear();
9120
+ } else {
9121
+ NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9122
+ NonUniqueValueVL.append(
9123
+ PWSz - UniqueValues.size(),
9124
+ PoisonValue::get(UniqueValues.front()->getType()));
9125
+ // Check that extended with poisons operations are still valid for
9126
+ // vectorization (div/rem are not allowed).
9127
+ if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) {
9128
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9129
+ return false;
9130
+ }
9131
+ VL = NonUniqueValueVL;
9132
+ }
9133
+ return true;
9134
+ }
9135
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9136
+ return false;
9137
+ }
9138
+ VL = UniqueValues;
9139
+ }
9140
+ return true;
9141
+ }
9142
+
9065
9143
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9066
9144
const EdgeInfo &UserTreeIdx,
9067
9145
unsigned InterleaveFactor) {
9068
9146
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9069
9147
9070
9148
SmallVector<int> ReuseShuffleIndices;
9071
- SmallVector<Value *> UniqueValues;
9072
- SmallVector<Value *> NonUniqueValueVL;
9149
+ SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9073
9150
auto TryToFindDuplicates = [&](const InstructionsState &S,
9074
9151
bool DoNotFail = false) {
9075
- // Check that every instruction appears once in this bundle.
9076
- SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
9077
- for (Value *V : VL) {
9078
- if (isConstant(V)) {
9079
- ReuseShuffleIndices.emplace_back(
9080
- isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
9081
- UniqueValues.emplace_back(V);
9082
- continue;
9083
- }
9084
- auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
9085
- ReuseShuffleIndices.emplace_back(Res.first->second);
9086
- if (Res.second)
9087
- UniqueValues.emplace_back(V);
9088
- }
9089
- size_t NumUniqueScalarValues = UniqueValues.size();
9090
- bool IsFullVectors = hasFullVectorsOrPowerOf2(
9091
- *TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
9092
- if (NumUniqueScalarValues == VL.size() &&
9093
- (VectorizeNonPowerOf2 || IsFullVectors)) {
9094
- ReuseShuffleIndices.clear();
9095
- } else {
9096
- // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9097
- if ((UserTreeIdx.UserTE &&
9098
- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) ||
9099
- !hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()),
9100
- VL.size())) {
9101
- LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9102
- "for nodes with padding.\n");
9103
- auto Invalid = ScheduleBundle::invalid();
9104
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9105
- return false;
9106
- }
9107
- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9108
- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9109
- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9110
- return isa<UndefValue>(V) || !isConstant(V);
9111
- }))) {
9112
- if (DoNotFail && UniquePositions.size() > 1 &&
9113
- NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9114
- all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9115
- // Find the number of elements, which forms full vectors.
9116
- unsigned PWSz = getFullVectorNumberOfElements(
9117
- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
9118
- PWSz = std::min<unsigned>(PWSz, VL.size());
9119
- if (PWSz == VL.size()) {
9120
- ReuseShuffleIndices.clear();
9121
- } else {
9122
- NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9123
- NonUniqueValueVL.append(
9124
- PWSz - UniqueValues.size(),
9125
- PoisonValue::get(UniqueValues.front()->getType()));
9126
- // Check that extended with poisons operations are still valid for
9127
- // vectorization (div/rem are not allowed).
9128
- if (!getSameOpcode(NonUniqueValueVL, *TLI).valid()) {
9129
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9130
- auto Invalid = ScheduleBundle::invalid();
9131
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9132
- return false;
9133
- }
9134
- VL = NonUniqueValueVL;
9135
- }
9136
- return true;
9137
- }
9138
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9139
- auto Invalid = ScheduleBundle::invalid();
9140
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9141
- return false;
9142
- }
9143
- VL = UniqueValues;
9152
+ if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9153
+ S, UserTreeIdx, DoNotFail)) {
9154
+ VL = NonUniqueValueVL;
9155
+ return true;
9144
9156
}
9145
- return true;
9157
+ auto Invalid = ScheduleBundle::invalid();
9158
+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9159
+ return false;
9146
9160
};
9147
9161
9148
9162
InstructionsState S = getSameOpcode(VL, *TLI);
@@ -9610,8 +9624,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9610
9624
9611
9625
BlockScheduling &BS = *BSRef;
9612
9626
9627
+ SetVector<Value *> UniqueValues(VL.begin(), VL.end());
9613
9628
std::optional<ScheduleBundle *> BundlePtr =
9614
- BS.tryScheduleBundle(UniqueValues, this, S);
9629
+ BS.tryScheduleBundle(UniqueValues.getArrayRef() , this, S);
9615
9630
#ifdef EXPENSIVE_CHECKS
9616
9631
// Make sure we didn't break any internal invariants
9617
9632
BS.verify();
0 commit comments