Skip to content

Commit b16e694

Browse files
[SLP]Try to keep operand of external casts as scalars, if profitable
If the cost of original scalar instruction + cast is better than the extractelement from the vector cast instruction, better to keep original scalar instructions, where possible Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #110537
1 parent e7edd53 commit b16e694

File tree

4 files changed

+247
-217
lines changed

4 files changed

+247
-217
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11571,6 +11571,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1157111571
DenseSet<std::pair<const TreeEntry *, Type *>> VectorCasts;
1157211572
std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
1157311573
DenseMap<const TreeEntry *, DenseSet<Value *>> ExtractsCount;
11574+
SmallPtrSet<Value *, 4> ScalarOpsFromCasts;
1157411575
for (ExternalUser &EU : ExternalUses) {
1157511576
// Uses by ephemeral values are free (because the ephemeral value will be
1157611577
// removed prior to code generation, and so the extraction will be
@@ -11706,7 +11707,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1170611707
// Can use original instruction, if no operands vectorized or they are
1170711708
// marked as externally used already.
1170811709
auto *Inst = cast<Instruction>(EU.Scalar);
11709-
bool CanBeUsedAsScalar = all_of(Inst->operands(), [&](Value *V) {
11710+
InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind);
11711+
auto OperandIsScalar = [&](Value *V) {
1171011712
if (!getTreeEntry(V)) {
1171111713
// Some extractelements might be not vectorized, but
1171211714
// transformed into shuffle and removed from the function,
@@ -11716,9 +11718,23 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1171611718
return true;
1171711719
}
1171811720
return ValueToExtUses->contains(V);
11719-
});
11721+
};
11722+
bool CanBeUsedAsScalar = all_of(Inst->operands(), OperandIsScalar);
11723+
bool CanBeUsedAsScalarCast = false;
11724+
if (auto *CI = dyn_cast<CastInst>(Inst); CI && !CanBeUsedAsScalar) {
11725+
if (auto *Op = dyn_cast<Instruction>(CI->getOperand(0));
11726+
Op && all_of(Op->operands(), OperandIsScalar)) {
11727+
InstructionCost OpCost =
11728+
(getTreeEntry(Op) && !ValueToExtUses->contains(Op))
11729+
? TTI->getInstructionCost(Op, CostKind)
11730+
: 0;
11731+
if (ScalarCost + OpCost <= ExtraCost) {
11732+
CanBeUsedAsScalar = CanBeUsedAsScalarCast = true;
11733+
ScalarCost += OpCost;
11734+
}
11735+
}
11736+
}
1172011737
if (CanBeUsedAsScalar) {
11721-
InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind);
1172211738
bool KeepScalar = ScalarCost <= ExtraCost;
1172311739
// Try to keep original scalar if the user is the phi node from the same
1172411740
// block as the root phis, currently vectorized. It allows to keep
@@ -11774,12 +11790,34 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
1177411790
ExtraCost = ScalarCost;
1177511791
if (!IsPhiInLoop(EU))
1177611792
ExtractsCount[Entry].insert(Inst);
11793+
if (CanBeUsedAsScalarCast) {
11794+
ScalarOpsFromCasts.insert(Inst->getOperand(0));
11795+
// Update the users of the operands of the cast operand to avoid
11796+
// compiler crash.
11797+
if (auto *IOp = dyn_cast<Instruction>(Inst->getOperand(0))) {
11798+
for_each(IOp->operands(), [&](Value *V) {
11799+
auto It = ValueToExtUses->find(V);
11800+
if (It != ValueToExtUses->end()) {
11801+
// Replace all uses to avoid compiler crash.
11802+
ExternalUses[It->second].User = nullptr;
11803+
}
11804+
});
11805+
}
11806+
}
1177711807
}
1177811808
}
1177911809
}
1178011810

1178111811
ExtractCost += ExtraCost;
1178211812
}
11813+
// Insert externals for extract of operands of casts to be emitted as scalars
11814+
// instead of extractelement.
11815+
for (Value *V : ScalarOpsFromCasts) {
11816+
ExternalUsesAsOriginalScalar.insert(V);
11817+
if (const TreeEntry *E = getTreeEntry(V)) {
11818+
ExternalUses.emplace_back(V, nullptr, E->findLaneForValue(V));
11819+
}
11820+
}
1178311821
// Add reduced value cost, if resized.
1178411822
if (!VectorizedVals.empty()) {
1178511823
const TreeEntry &Root = *VectorizableTree.front();
@@ -13095,7 +13133,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1309513133
UniqueBases.insert(VecBase);
1309613134
// If the only one use is vectorized - can delete the extractelement
1309713135
// itself.
13098-
if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) ||
13136+
if (!EI->hasOneUse() || R.ExternalUsesAsOriginalScalar.contains(EI) ||
13137+
(NumParts != 1 && count(E->Scalars, EI) > 1) ||
1309913138
any_of(EI->users(), [&](User *U) {
1310013139
const TreeEntry *UTE = R.getTreeEntry(U);
1310113140
return !UTE || R.MultiNodeScalars.contains(U) ||

0 commit comments

Comments
 (0)