Skip to content

[SLP]Improve reordering for consts, splats and ops from same nodes + improved analysis. #87091

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 76 additions & 17 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1392,12 +1392,19 @@ class BoUpSLP {
return LookAheadHeuristics::ScoreSplat;
}

auto CheckSameEntryOrFail = [&]() {
if (const TreeEntry *TE1 = R.getTreeEntry(V1);
TE1 && TE1 == R.getTreeEntry(V2))
return LookAheadHeuristics::ScoreSplatLoads;
return LookAheadHeuristics::ScoreFail;
};

auto *LI1 = dyn_cast<LoadInst>(V1);
auto *LI2 = dyn_cast<LoadInst>(V2);
if (LI1 && LI2) {
if (LI1->getParent() != LI2->getParent() || !LI1->isSimple() ||
!LI2->isSimple())
return LookAheadHeuristics::ScoreFail;
return CheckSameEntryOrFail();

std::optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
Expand All @@ -1409,7 +1416,7 @@ class BoUpSLP {
FixedVectorType::get(LI1->getType(), NumLanes),
LI1->getAlign()))
return LookAheadHeuristics::ScoreMaskedGatherCandidate;
return LookAheadHeuristics::ScoreFail;
return CheckSameEntryOrFail();
}
// The distance is too large - still may be profitable to use masked
// loads/gathers.
Expand Down Expand Up @@ -1466,14 +1473,14 @@ class BoUpSLP {
}
return LookAheadHeuristics::ScoreAltOpcodes;
}
return LookAheadHeuristics::ScoreFail;
return CheckSameEntryOrFail();
}

auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (I1 && I2) {
if (I1->getParent() != I2->getParent())
return LookAheadHeuristics::ScoreFail;
return CheckSameEntryOrFail();
SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
Ops.push_back(I1);
Ops.push_back(I2);
Expand All @@ -1494,7 +1501,7 @@ class BoUpSLP {
if (isa<UndefValue>(V2))
return LookAheadHeuristics::ScoreUndef;

return LookAheadHeuristics::ScoreFail;
return CheckSameEntryOrFail();
}

/// Go through the operands of \p LHS and \p RHS recursively until
Expand Down Expand Up @@ -1657,6 +1664,7 @@ class BoUpSLP {
const DataLayout &DL;
ScalarEvolution &SE;
const BoUpSLP &R;
const Loop *L = nullptr;

/// \returns the operand data at \p OpIdx and \p Lane.
OperandData &getData(unsigned OpIdx, unsigned Lane) {
Expand Down Expand Up @@ -1825,8 +1833,9 @@ class BoUpSLP {
// Track if the operand must be marked as used. If the operand is set to
// Score 1 explicitly (because of non power-of-2 unique scalars, we may
// want to reestimate the operands again on the following iterations).
bool IsUsed =
RMode == ReorderingMode::Splat || RMode == ReorderingMode::Constant;
bool IsUsed = RMode == ReorderingMode::Splat ||
RMode == ReorderingMode::Constant ||
RMode == ReorderingMode::Load;
// Iterate through all unused operands and look for the best.
for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
// Get the operand at Idx and Lane.
Expand All @@ -1847,23 +1856,44 @@ class BoUpSLP {
// Look for an operand that matches the current mode.
switch (RMode) {
case ReorderingMode::Load:
case ReorderingMode::Constant:
case ReorderingMode::Opcode: {
bool LeftToRight = Lane > LastLane;
Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
Value *OpRight = (LeftToRight) ? Op : OpLastLane;
int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane,
OpIdx, Idx, IsUsed);
if (Score > static_cast<int>(BestOp.Score)) {
if (Score > static_cast<int>(BestOp.Score) ||
(Score > 0 && Score == static_cast<int>(BestOp.Score) &&
Copy link
Contributor

@ElvinaYakubova ElvinaYakubova Jun 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to understand this change. Could you please explain the idea behind this == check?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If scores are equal, prefer original operand.

Idx == OpIdx)) {
BestOp.Idx = Idx;
BestOp.Score = Score;
BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score;
}
break;
}
case ReorderingMode::Constant:
if (isa<Constant>(Op) ||
(!BestOp.Score && L && L->isLoopInvariant(Op))) {
BestOp.Idx = Idx;
if (isa<Constant>(Op)) {
BestOp.Score = LookAheadHeuristics::ScoreConstants;
BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
LookAheadHeuristics::ScoreConstants;
}
if (isa<UndefValue>(Op) || !isa<Constant>(Op))
IsUsed = false;
}
break;
case ReorderingMode::Splat:
if (Op == OpLastLane)
if (Op == OpLastLane || (!BestOp.Score && isa<Constant>(Op))) {
IsUsed = Op == OpLastLane;
if (Op == OpLastLane) {
BestOp.Score = LookAheadHeuristics::ScoreSplat;
BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
LookAheadHeuristics::ScoreSplat;
}
BestOp.Idx = Idx;
}
break;
case ReorderingMode::Failed:
llvm_unreachable("Not expected Failed reordering mode.");
Expand Down Expand Up @@ -2056,10 +2086,12 @@ class BoUpSLP {
void clear() { OpsVec.clear(); }

/// \Returns true if there are enough operands identical to \p Op to fill
/// the whole vector.
/// the whole vector (it is mixed with constants or loop invariant values).
/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
bool OpAPO = getData(OpIdx, Lane).APO;
bool IsInvariant = L && L->isLoopInvariant(Op);
unsigned Cnt = 0;
for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
if (Ln == Lane)
continue;
Expand All @@ -2069,22 +2101,51 @@ class BoUpSLP {
OperandData &Data = getData(OpI, Ln);
if (Data.APO != OpAPO || Data.IsUsed)
continue;
if (Data.V == Op) {
Value *OpILane = getValue(OpI, Lane);
bool IsConstantOp = isa<Constant>(OpILane);
// Consider the broadcast candidate if:
// 1. Same value is found in one of the operands.
if (Data.V == Op ||
// 2. The operand in the given lane is not constant but there is a
// constant operand in another lane (which can be moved to the
// given lane). In this case we can represent it as a simple
// permutation of constant and broadcast.
(!IsConstantOp &&
((Lns > 2 && isa<Constant>(Data.V)) ||
// 2.1. If we have only 2 lanes, need to check that value in the
// next lane does not build same opcode sequence.
(Lns == 2 &&
!getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
.getOpcode() &&
isa<Constant>(Data.V)))) ||
// 3. The operand in the current lane is loop invariant (can be
// hoisted out) and another operand is also a loop invariant
// (though not a constant). In this case the whole vector can be
// hoisted out.
// FIXME: need to teach the cost model about this case for better
// estimation.
(IsInvariant && !isa<Constant>(Data.V) &&
!getSameOpcode({Op, Data.V}, TLI).getOpcode() &&
L->isLoopInvariant(Data.V))) {
FoundCandidate = true;
Data.IsUsed = true;
Data.IsUsed = Data.V == Op;
if (Data.V == Op)
++Cnt;
break;
}
}
if (!FoundCandidate)
return false;
}
return true;
return getNumLanes() == 2 || Cnt > 1;
}

public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R) {
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor(
(cast<Instruction>(RootVL.front())->getParent()))) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL);
}
Expand Down Expand Up @@ -2216,8 +2277,6 @@ class BoUpSLP {
// getBestOperand().
swap(OpIdx, *BestIdx, Lane);
} else {
// We failed to find a best operand, set mode to 'Failed'.
ReorderingModes[OpIdx] = ReorderingMode::Failed;
// Enable the second pass.
StrategyFailed = true;
}
Expand Down
Loading