Skip to content

[SLP] Fix PoisonValue in the argument VL of setOperand. #118949

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 67 additions & 149 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2019,6 +2019,9 @@ class BoUpSLP {

/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
unsigned ArgSize = 0;

const TargetLibraryInfo &TLI;
const DataLayout &DL;
Expand Down Expand Up @@ -2402,14 +2405,15 @@ class BoUpSLP {
}

/// Go through the instructions in VL and append their operands.
void appendOperandsOfVL(ArrayRef<Value *> VL) {
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
: VL0->getNumOperands();
unsigned NumOperands = VL0->getNumOperands();
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
Expand Down Expand Up @@ -2442,7 +2446,7 @@ class BoUpSLP {
}

/// \returns the number of operands.
unsigned getNumOperands() const { return OpsVec.size(); }
unsigned getNumOperands() const { return ArgSize; }

/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
Expand Down Expand Up @@ -2543,13 +2547,11 @@ class BoUpSLP {

public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make VL0 a member of the сlass, it is used in too many cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VL0 is only used by the constructor of VLOperands. I think you mean "Make VL0 a member of TreeEntry"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we need to put VL0 into TreeEntry, I suggest we open the original PR (#113880 (comment)) and review there.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, no, no, need it.

: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor(
(cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
->getParent()))) {
L(R.LI->getLoopFor((VL0->getParent()))) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL);
appendOperandsOfVL(RootVL, VL0);
}

/// \Returns a value vector with the operands across all lanes for the
Expand Down Expand Up @@ -2623,7 +2625,8 @@ class BoUpSLP {
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
for (ArrayRef<OperandData> Op :
ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
Expand Down Expand Up @@ -3144,13 +3147,6 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);

/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right,
const BoUpSLP &R);

/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
Expand Down Expand Up @@ -3345,27 +3341,13 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}

/// Set the operands of this bundle in their original order.
void setOperandsInOrder() {
assert(Operands.empty() && "Already initialized?");
auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
Operands.resize(I0->getNumOperands());
unsigned NumLanes = Scalars.size();
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
OpIdx != NumOperands; ++OpIdx) {
Operands[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
if (isa<PoisonValue>(Scalars[Lane])) {
Operands[OpIdx][Lane] =
PoisonValue::get(I0->getOperand(OpIdx)->getType());
continue;
}
auto *I = cast<Instruction>(Scalars[Lane]);
assert(I->getNumOperands() == NumOperands &&
"Expected same number of operands");
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
}
}
/// Set this bundle's operand from Scalars.
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to pass first instruction as an argument rather than redo the search for it

VLOperands Ops(Scalars, MainOp, R);
if (RequireReorder)
Ops.reorder();
for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
setOperand(I, Ops.getVL(I));
}

/// Reorders operands of the node to the given mask \p Mask.
Expand Down Expand Up @@ -8471,7 +8453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");

TE->setOperandsInOrder();
TE->setOperand(*this);
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
Expand All @@ -8492,27 +8474,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
TE->setOperandsInOrder();
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
TE->setOperand(*this);
if (State == TreeEntry::ScatterVectorize)
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
Expand Down Expand Up @@ -8550,8 +8531,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");

TE->setOperandsInOrder();
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
TE->setOperand(*this);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
Expand All @@ -8578,12 +8559,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");

ValueList Left, Right;
VLOperands Ops(VL, VL0, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
Ops.reorder();
Left = Ops.getVL(0);
Right = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
Expand Down Expand Up @@ -8644,20 +8628,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");

// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}

TE->setOperandsInOrder();
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
Expand Down Expand Up @@ -8722,7 +8694,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
TE->setOperandsInOrder();
TE->setOperand(*this);
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
Expand All @@ -8738,46 +8710,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,

TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
if (isCommutative(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
SmallVector<ValueList> Operands;
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
Operands.emplace_back();
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
for (Value *V : VL) {
auto *CI2 = cast<CallInst>(V);
Operands.back().push_back(CI2->getArgOperand(I));
}
TE->setOperand(I, Operands.back());
}
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
if (Operands[I - 2].empty())
continue;
buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
}
return;
}
TE->setOperandsInOrder();
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
TE->setOperand(*this, isCommutative(VL0));
for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL) {
auto *CI2 = cast<CallInst>(V);
Operands.push_back(CI2->getArgOperand(I));
}
buildTree_rec(Operands, Depth + 1, {TE, I});
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
}
return;
}
Expand All @@ -8788,43 +8727,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,

// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
if (CI && any_of(VL, [](Value *V) {
return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
})) {
auto *MainCI = cast<CmpInst>(S.getMainOp());
auto *AltCI = cast<CmpInst>(S.getAltOp());
CmpInst::Predicate MainP = MainCI->getPredicate();
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
})) {
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
} else {
auto *MainCI = cast<CmpInst>(S.getMainOp());
auto *AltCI = cast<CmpInst>(S.getAltOp());
CmpInst::Predicate MainP = MainCI->getPredicate();
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
if (isa<PoisonValue>(V)) {
Left.push_back(
PoisonValue::get(MainCI->getOperand(0)->getType()));
Right.push_back(
PoisonValue::get(MainCI->getOperand(1)->getType()));
continue;
}
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
if (isa<PoisonValue>(V)) {
Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
continue;
}
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);

if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
std::swap(LHS, RHS);
} else {
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
std::swap(LHS, RHS);
}
Left.push_back(LHS);
Right.push_back(RHS);
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
std::swap(LHS, RHS);
} else {
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
std::swap(LHS, RHS);
}
Left.push_back(LHS);
Right.push_back(RHS);
}
TE->setOperand(0, Left);
TE->setOperand(1, Right);
Expand All @@ -8833,8 +8766,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}

TE->setOperandsInOrder();
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
Expand Down Expand Up @@ -13539,21 +13472,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}

// Perform operand reordering on the instructions in VL and return the reordered
// operands in Left and Right.
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right,
const BoUpSLP &R) {
if (VL.empty())
return;
VLOperands Ops(VL, R);
// Reorder the operands in place.
Ops.reorder();
Left = Ops.getVL(0);
Right = Ops.getVL(1);
}

Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/fix-113880.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s

define ptr @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: store <4 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, ptr null, align 8
; CHECK-NEXT: ret ptr null
;
store double poison, ptr null, align 8
%1 = getelementptr i8, ptr null, i64 8
%2 = fmul double 0.000000e+00, 0.000000e+00
store double %2, ptr %1, align 8
%3 = getelementptr i8, ptr null, i64 16
%4 = fmul double 0.000000e+00, 0.000000e+00
store double %4, ptr %3, align 8
%5 = getelementptr i8, ptr null, i64 24
store double %2, ptr %5, align 8
ret ptr null
}
Loading