Skip to content

Commit 22c5ea4

Browse files
gbossuGeorgeARM
authored andcommitted
[SLP] Simplify tryToFindDuplicates() (NFC) (llvm#135766)
This NFC aims to simplify the control-flow and interfaces used in tryToFindDuplicates(). The point is to make it easier to understand where decisions for scalar de-duplication are made. In particular: - Limit indentation - Rename some variables to better match their use case - Always give consistent outputs for VL and ReuseShuffleIndices. This makes it possible to use the same code for building gather TreeEntry everywhere. This also allows to remove the TryToFindDuplicates lambda.
1 parent 2b17868 commit 22c5ea4

File tree

1 file changed

+74
-63
lines changed

1 file changed

+74
-63
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9531,21 +9531,25 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
95319531
}
95329532

95339533
/// Checks that every instruction appears once in the list and if not, packs
9534-
/// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9535-
/// extended by poison values to the whole register size.
9534+
/// them, building \p ReuseShuffleIndices mask and mutating \p VL. The list of
9535+
/// unique scalars is extended by poison values to the whole register size.
9536+
///
9537+
/// \returns false if \p VL could not be uniquified, in which case \p VL is
9538+
/// unchanged and \p ReuseShuffleIndices is empty.
95369539
static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95379540
SmallVectorImpl<int> &ReuseShuffleIndices,
95389541
const TargetTransformInfo &TTI,
95399542
const TargetLibraryInfo &TLI,
95409543
const InstructionsState &S,
95419544
const BoUpSLP::EdgeInfo &UserTreeIdx,
9542-
bool DoNotFail) {
9545+
bool TryPad = false) {
95439546
// Check that every instruction appears once in this bundle.
95449547
SmallVector<Value *> UniqueValues;
9545-
SmallVector<Value *> NonUniqueValueVL;
95469548
SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
95479549
for (Value *V : VL) {
95489550
if (isConstant(V)) {
9551+
// Constants are always considered distinct, even if the same constant
9552+
// appears multiple times in VL.
95499553
ReuseShuffleIndices.emplace_back(
95509554
isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
95519555
UniqueValues.emplace_back(V);
@@ -9556,55 +9560,67 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95569560
if (Res.second)
95579561
UniqueValues.emplace_back(V);
95589562
}
9563+
9564+
// Easy case: VL has unique values and a "natural" size
95599565
size_t NumUniqueScalarValues = UniqueValues.size();
95609566
bool IsFullVectors = hasFullVectorsOrPowerOf2(
95619567
TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
95629568
if (NumUniqueScalarValues == VL.size() &&
95639569
(VectorizeNonPowerOf2 || IsFullVectors)) {
95649570
ReuseShuffleIndices.clear();
9565-
} else {
9566-
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9567-
if ((UserTreeIdx.UserTE &&
9568-
UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9569-
!hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9570-
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9571-
"for nodes with padding.\n");
9572-
return false;
9573-
}
9574-
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9575-
if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9576-
(UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9577-
return isa<UndefValue>(V) || !isConstant(V);
9578-
}))) {
9579-
if (DoNotFail && UniquePositions.size() > 1 &&
9580-
NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9581-
all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9582-
// Find the number of elements, which forms full vectors.
9583-
unsigned PWSz = getFullVectorNumberOfElements(
9584-
TTI, UniqueValues.front()->getType(), UniqueValues.size());
9585-
PWSz = std::min<unsigned>(PWSz, VL.size());
9586-
if (PWSz == VL.size()) {
9571+
return true;
9572+
}
9573+
9574+
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9575+
if ((UserTreeIdx.UserTE &&
9576+
UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9577+
!hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9578+
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9579+
"for nodes with padding.\n");
9580+
ReuseShuffleIndices.clear();
9581+
return false;
9582+
}
9583+
9584+
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9585+
if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9586+
(UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9587+
return isa<UndefValue>(V) || !isConstant(V);
9588+
}))) {
9589+
if (TryPad && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 &&
9590+
S.getMainOp()->isSafeToRemove() &&
9591+
all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9592+
// Find the number of elements, which forms full vectors.
9593+
unsigned PWSz = getFullVectorNumberOfElements(
9594+
TTI, UniqueValues.front()->getType(), UniqueValues.size());
9595+
PWSz = std::min<unsigned>(PWSz, VL.size());
9596+
if (PWSz == VL.size()) {
9597+
// We ended up with the same size after removing duplicates and
9598+
// upgrading the resulting vector size to a "nice size". Just keep
9599+
// the initial VL then.
9600+
ReuseShuffleIndices.clear();
9601+
} else {
9602+
// Pad unique values with poison to grow the vector to a "nice" size
9603+
SmallVector<Value *> PaddedUniqueValues(UniqueValues.begin(),
9604+
UniqueValues.end());
9605+
PaddedUniqueValues.append(
9606+
PWSz - UniqueValues.size(),
9607+
PoisonValue::get(UniqueValues.front()->getType()));
9608+
// Check that extended with poisons operations are still valid for
9609+
// vectorization (div/rem are not allowed).
9610+
if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
9611+
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
95879612
ReuseShuffleIndices.clear();
9588-
} else {
9589-
NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9590-
NonUniqueValueVL.append(
9591-
PWSz - UniqueValues.size(),
9592-
PoisonValue::get(UniqueValues.front()->getType()));
9593-
// Check that extended with poisons operations are still valid for
9594-
// vectorization (div/rem are not allowed).
9595-
if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) {
9596-
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9597-
return false;
9598-
}
9599-
VL = NonUniqueValueVL;
9613+
return false;
96009614
}
9601-
return true;
9615+
VL = std::move(PaddedUniqueValues);
96029616
}
9603-
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9604-
return false;
9617+
return true;
96059618
}
9606-
VL = UniqueValues;
9619+
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9620+
ReuseShuffleIndices.clear();
9621+
return false;
96079622
}
9623+
VL = std::move(UniqueValues);
96089624
return true;
96099625
}
96109626

@@ -10005,24 +10021,13 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
1000510021
return true;
1000610022
}
1000710023

10008-
void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
10024+
void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
1000910025
const EdgeInfo &UserTreeIdx,
1001010026
unsigned InterleaveFactor) {
10011-
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
10027+
assert((allConstant(VLRef) || allSameType(VLRef)) && "Invalid types!");
1001210028

1001310029
SmallVector<int> ReuseShuffleIndices;
10014-
SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
10015-
auto TryToFindDuplicates = [&](const InstructionsState &S,
10016-
bool DoNotFail = false) {
10017-
if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
10018-
S, UserTreeIdx, DoNotFail)) {
10019-
VL = NonUniqueValueVL;
10020-
return true;
10021-
}
10022-
auto Invalid = ScheduleBundle::invalid();
10023-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
10024-
return false;
10025-
};
10030+
SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
1002610031

1002710032
InstructionsState S = InstructionsState::invalid();
1002810033
// Tries to build split node.
@@ -10068,11 +10073,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1006810073
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
1006910074
return;
1007010075
}
10071-
if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
10072-
auto Invalid = ScheduleBundle::invalid();
10073-
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10074-
ReuseShuffleIndices);
10075-
}
10076+
if (TryToPackDuplicates)
10077+
tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
10078+
10079+
auto Invalid = ScheduleBundle::invalid();
10080+
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10081+
ReuseShuffleIndices);
1007610082
return;
1007710083
}
1007810084

@@ -10081,8 +10087,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1008110087
return;
1008210088

1008310089
// Check that every instruction appears once in this bundle.
10084-
if (!TryToFindDuplicates(S, /*DoNotFail=*/true))
10090+
if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
10091+
/*TryPad=*/true)) {
10092+
auto Invalid = ScheduleBundle::invalid();
10093+
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10094+
ReuseShuffleIndices);
1008510095
return;
10096+
}
1008610097

1008710098
// Perform specific checks for each particular instruction kind.
1008810099
bool IsScatterVectorizeUserTE =
@@ -10125,7 +10136,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1012510136
NonScheduledFirst.insert(VL.front());
1012610137
if (S.getOpcode() == Instruction::Load &&
1012710138
BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
10128-
registerNonVectorizableLoads(VL);
10139+
registerNonVectorizableLoads(ArrayRef(VL));
1012910140
return;
1013010141
}
1013110142
ScheduleBundle Empty;

0 commit comments

Comments
 (0)