Skip to content

[SLP]Try to keep operand of external casts as scalars, if profitable #110537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 43 additions & 4 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11571,6 +11571,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
DenseMap<const TreeEntry *, DenseSet<Value *>> ExtractsCount;
SmallPtrSet<Value *, 4> ScalarOpsFromCasts;
for (ExternalUser &EU : ExternalUses) {
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
Expand Down Expand Up @@ -11706,7 +11707,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// Can use original instruction, if no operands vectorized or they are
// marked as externally used already.
auto *Inst = cast<Instruction>(EU.Scalar);
bool CanBeUsedAsScalar = all_of(Inst->operands(), [&](Value *V) {
InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind);
auto OperandIsScalar = [&](Value *V) {
if (!getTreeEntry(V)) {
// Some extractelements might be not vectorized, but
// transformed into shuffle and removed from the function,
Expand All @@ -11716,9 +11718,23 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
return true;
}
return ValueToExtUses->contains(V);
});
};
bool CanBeUsedAsScalar = all_of(Inst->operands(), OperandIsScalar);
bool CanBeUsedAsScalarCast = false;
if (auto *CI = dyn_cast<CastInst>(Inst); CI && !CanBeUsedAsScalar) {
if (auto *Op = dyn_cast<Instruction>(CI->getOperand(0));
Op && all_of(Op->operands(), OperandIsScalar)) {
InstructionCost OpCost =
(getTreeEntry(Op) && !ValueToExtUses->contains(Op))
? TTI->getInstructionCost(Op, CostKind)
: 0;
if (ScalarCost + OpCost <= ExtraCost) {
CanBeUsedAsScalar = CanBeUsedAsScalarCast = true;
ScalarCost += OpCost;
}
}
}
if (CanBeUsedAsScalar) {
InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind);
bool KeepScalar = ScalarCost <= ExtraCost;
// Try to keep original scalar if the user is the phi node from the same
// block as the root phis, currently vectorized. It allows to keep
Expand Down Expand Up @@ -11774,12 +11790,34 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
ExtraCost = ScalarCost;
if (!IsPhiInLoop(EU))
ExtractsCount[Entry].insert(Inst);
if (CanBeUsedAsScalarCast) {
ScalarOpsFromCasts.insert(Inst->getOperand(0));
// Update the users of the operands of the cast operand to avoid
// compiler crash.
if (auto *IOp = dyn_cast<Instruction>(Inst->getOperand(0))) {
for_each(IOp->operands(), [&](Value *V) {
auto It = ValueToExtUses->find(V);
if (It != ValueToExtUses->end()) {
// Replace all uses to avoid compiler crash.
ExternalUses[It->second].User = nullptr;
}
});
}
}
}
}
}

ExtractCost += ExtraCost;
}
// Insert externals for extract of operands of casts to be emitted as scalars
// instead of extractelement.
for (Value *V : ScalarOpsFromCasts) {
ExternalUsesAsOriginalScalar.insert(V);
if (const TreeEntry *E = getTreeEntry(V)) {
ExternalUses.emplace_back(V, nullptr, E->findLaneForValue(V));
}
}
// Add reduced value cost, if resized.
if (!VectorizedVals.empty()) {
const TreeEntry &Root = *VectorizableTree.front();
Expand Down Expand Up @@ -13095,7 +13133,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
UniqueBases.insert(VecBase);
// If the only one use is vectorized - can delete the extractelement
// itself.
if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) ||
if (!EI->hasOneUse() || R.ExternalUsesAsOriginalScalar.contains(EI) ||
(NumParts != 1 && count(E->Scalars, EI) > 1) ||
any_of(EI->users(), [&](User *U) {
const TreeEntry *UTE = R.getTreeEntry(U);
return !UTE || R.MultiNodeScalars.contains(U) ||
Expand Down
Loading
Loading