-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. #113880
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesTo reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. Full diff: https://github.com/llvm/llvm-project/pull/113880.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2afd02dae3a8b8..14220efb65a195 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1948,6 +1948,9 @@ class BoUpSLP {
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
+ /// When VL[0] is IntrinsicInst, Arg_size is CallBase::arg_size. When VL[0]
+ /// is not IntrinsicInst, Arg_size is User::getNumOperands.
+ unsigned Arg_size;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
+ // arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
- ? IntrinsicNumOperands
- : cast<Instruction>(VL[0])->getNumOperands();
+ Arg_size = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
}
/// \returns the number of operands.
- unsigned getNumOperands() const { return OpsVec.size(); }
+ unsigned getNumOperands() const { return Arg_size; }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -3059,13 +3063,6 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);
- /// Reorder commutative or alt operands to get better probability of
- /// generating vectorized code.
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R);
-
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
@@ -3256,24 +3253,6 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set the operands of this bundle in their original order.
- void setOperandsInOrder() {
- assert(Operands.empty() && "Already initialized?");
- auto *I0 = cast<Instruction>(Scalars[0]);
- Operands.resize(I0->getNumOperands());
- unsigned NumLanes = Scalars.size();
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
- OpIdx != NumOperands; ++OpIdx) {
- Operands[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- auto *I = cast<Instruction>(Scalars[Lane]);
- assert(I->getNumOperands() == NumOperands &&
- "Expected same number of operands");
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
- }
- }
- }
-
/// Reorders operands of the node to the given mask \p Mask.
void reorderOperands(ArrayRef<int> Mask) {
for (ValueList &Operand : Operands)
@@ -8294,7 +8273,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8315,27 +8296,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ if (State == TreeEntry::ScatterVectorize)
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
@@ -8373,8 +8355,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8401,12 +8385,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
+ VLOperands Ops(VL, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -8462,20 +8449,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
- }
-
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
+ Ops.reorder();
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -8540,7 +8521,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8556,46 +8539,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- Operands.emplace_back();
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
- continue;
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.back().push_back(CI2->getArgOperand(I));
- }
- TE->setOperand(I, Operands.back());
- }
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- if (Operands[I - 2].empty())
- continue;
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
- }
- return;
- }
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
+ if (isCommutative(VL0))
+ Ops.reorder();
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.push_back(CI2->getArgOperand(I));
- }
- buildTree_rec(Operands, Depth + 1, {TE, I});
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
}
return;
}
@@ -8604,14 +8560,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+ VLOperands Ops(VL, *this);
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
- ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
} else {
auto *MainCI = cast<CmpInst>(S.MainOp);
auto *AltCI = cast<CmpInst>(S.AltOp);
@@ -8619,6 +8575,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
+ ValueList Left, Right;
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
@@ -8636,16 +8593,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Left.push_back(LHS);
Right.push_back(RHS);
}
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ return;
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
}
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -13024,21 +12982,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}
-// Perform operand reordering on the instructions in VL and return the reordered
-// operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R) {
- if (VL.empty())
- return;
- VLOperands Ops(VL, R);
- // Reorder the operands in place.
- Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
-}
-
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
|
@llvm/pr-subscribers-vectorizers Author: Han-Kuan Chen (HanKuanChen) ChangesTo reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. Full diff: https://github.com/llvm/llvm-project/pull/113880.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2afd02dae3a8b8..14220efb65a195 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1948,6 +1948,9 @@ class BoUpSLP {
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
+ /// When VL[0] is IntrinsicInst, Arg_size is CallBase::arg_size. When VL[0]
+ /// is not IntrinsicInst, Arg_size is User::getNumOperands.
+ unsigned Arg_size;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
+ // arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
- ? IntrinsicNumOperands
- : cast<Instruction>(VL[0])->getNumOperands();
+ Arg_size = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
}
/// \returns the number of operands.
- unsigned getNumOperands() const { return OpsVec.size(); }
+ unsigned getNumOperands() const { return Arg_size; }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -3059,13 +3063,6 @@ class BoUpSLP {
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
8> &GatheredLoads);
- /// Reorder commutative or alt operands to get better probability of
- /// generating vectorized code.
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R);
-
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
/// pointers to the collected stores.
@@ -3256,24 +3253,6 @@ class BoUpSLP {
copy(OpVL, Operands[OpIdx].begin());
}
- /// Set the operands of this bundle in their original order.
- void setOperandsInOrder() {
- assert(Operands.empty() && "Already initialized?");
- auto *I0 = cast<Instruction>(Scalars[0]);
- Operands.resize(I0->getNumOperands());
- unsigned NumLanes = Scalars.size();
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
- OpIdx != NumOperands; ++OpIdx) {
- Operands[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- auto *I = cast<Instruction>(Scalars[Lane]);
- assert(I->getNumOperands() == NumOperands &&
- "Expected same number of operands");
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
- }
- }
- }
-
/// Reorders operands of the node to the given mask \p Mask.
void reorderOperands(ArrayRef<int> Mask) {
for (ValueList &Operand : Operands)
@@ -8294,7 +8273,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
{}, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
return;
}
@@ -8315,27 +8296,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- TE->setOperandsInOrder();
break;
case TreeEntry::StridedVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
break;
case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case TreeEntry::CombinedVectorize:
case TreeEntry::NeedToGather:
llvm_unreachable("Unexpected loads state.");
}
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ if (State == TreeEntry::ScatterVectorize)
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
return;
}
case Instruction::ZExt:
@@ -8373,8 +8355,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
if (ShuffleOrOp == Instruction::Trunc) {
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8401,12 +8385,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
+ VLOperands Ops(VL, *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
@@ -8462,20 +8449,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
- }
-
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
+ Ops.reorder();
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -8540,7 +8521,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices, CurrentOrder);
- TE->setOperandsInOrder();
+ VLOperands Ops(VL, *this);
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
if (Consecutive)
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8556,46 +8539,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
+ VLOperands Ops(VL, *this);
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
- if (isCommutative(VL0)) {
- ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- Operands.emplace_back();
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
- continue;
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.back().push_back(CI2->getArgOperand(I));
- }
- TE->setOperand(I, Operands.back());
- }
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
- if (Operands[I - 2].empty())
- continue;
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
- }
- return;
- }
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
+ if (isCommutative(VL0))
+ Ops.reorder();
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
// For scalar operands no need to create an entry since no need to
// vectorize it.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
continue;
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL) {
- auto *CI2 = cast<CallInst>(V);
- Operands.push_back(CI2->getArgOperand(I));
- }
- buildTree_rec(Operands, Depth + 1, {TE, I});
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
}
return;
}
@@ -8604,14 +8560,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+ VLOperands Ops(VL, *this);
// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
- ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+ Ops.reorder();
} else {
auto *MainCI = cast<CmpInst>(S.MainOp);
auto *AltCI = cast<CmpInst>(S.AltOp);
@@ -8619,6 +8575,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CmpInst::Predicate AltP = AltCI->getPredicate();
assert(MainP != AltP &&
"Expected different main/alternate predicates.");
+ ValueList Left, Right;
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
@@ -8636,16 +8593,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Left.push_back(LHS);
Right.push_back(RHS);
}
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
+ return;
}
- TE->setOperand(0, Left);
- TE->setOperand(1, Right);
- buildTree_rec(Left, Depth + 1, {TE, 0});
- buildTree_rec(Right, Depth + 1, {TE, 1});
- return;
}
- TE->setOperandsInOrder();
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+ TE->setOperand(I, Ops.getVL(I));
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
return;
}
@@ -13024,21 +12982,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
return Cost;
}
-// Perform operand reordering on the instructions in VL and return the reordered
-// operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right,
- const BoUpSLP &R) {
- if (VL.empty())
- return;
- VLOperands Ops(VL, R);
- // Reorder the operands in place.
- Ops.reorder();
- Left = Ops.getVL(0);
- Right = Ops.getVL(1);
-}
-
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
if (Res)
|
@@ -1948,6 +1948,9 @@ class BoUpSLP { | |||
|
|||
/// A vector of operand vectors. | |||
SmallVector<OperandDataVec, 4> OpsVec; | |||
/// When VL[0] is IntrinsicInst, Arg_size is CallBase::arg_size. When VL[0] | |||
/// is not IntrinsicInst, Arg_size is User::getNumOperands. | |||
unsigned Arg_size; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why the original code cannot be reused, why need an extra data member here? Why just getNumOperands() or 2 does not work?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For IntrinsicInst
with more than 2 operands, VLOperands::appendOperandsOfVL
stores the first two operands but TreeEntry::setOperandsInOrder
stores all of the operands.
Only the first two operands should be reorder. But all of the operands can be passed into buildTree_rec
.
If we don't use a new data member (Arg_size
), either reorder swap non-swappable operands or we need to keep TreeEntry::setOperandsInOrder
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just mark such operands as non-reordable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right. We can use 2 here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just mark such operands as non-reordable
Mark operands as non-reordable also requires additional data member to store the information.
/// reorder can only work on instructions with 2 operands. If we use | ||
/// OpsVec.size() here, reorder will swap non-swappable operands (because | ||
/// APO is a boolean value). | ||
unsigned getNumOperands() const { return 2; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't like this approach. I plan to extend the reordering to support many operands, better to avoid constant value here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then we still need arg_size here.
On the other hand, I am curious about how reorder support more operands. Only the first two operands of BinaryOperator, CmpInst and IntrinsicInst are commutative. What kinds of instructions can be supported if we enhance reorder?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you have add i32 (add p1, p2), (add p3, p4)
, we can model it as add p1, p2, p3, p4
and reorder operands.
unsigned NumOperands = isa<IntrinsicInst>(VL[0]) | ||
? IntrinsicNumOperands | ||
: cast<Instruction>(VL[0])->getNumOperands(); | ||
Arg_size = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still do not understand why we need Arg_size here. BTW, Better it use NumOperands or ArgSize name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need ArgSize
to make sure reorder
only swap the first two operands. After this PR, OpsVec
also contains non swappable operands (because we want appendOperandsOfVL
can be reused even the operands of VL is not commutative).
13fa3d1
to
87b6e09
Compare
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) | ||
Ops.reorder(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better hide it in setOperand() function
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is a good idea. Current reorder
usage is
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
"Commutative Predicate mismatch");
Ops.reorder();
if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
Ops.reorder();
if (isCommutative(VL0))
Ops.reorder();
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
Ops.reorder();
They use reorder
when VL0 is commutative. But the last one determine the condition in another way.
Move reorder
into other place does not make the code neater (the last one still exist).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pass the flag to do the reordering or not
auto *CI2 = cast<CallInst>(V); | ||
Operands.back().push_back(CI2->getArgOperand(I)); | ||
} | ||
TE->setOperand(I, Operands.back()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about these operands, >= 2?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean? They are still in TE. appendOperandsOfVL
did not forget them.
if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no such check in the new code in setOperand. What if one of the 2 first operands is isVectorIntrinsicWithScalarOpAtArg()?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
It starts at 2.
The only difference is the new code will store it.
The old code for other arguments which is isVectorIntrinsicWithScalarOpAtArg
, setOperand
will not store it.
The new code for other arguments which is isVectorIntrinsicWithScalarOpAtArg
, setOperand
will store it.
But both the old code and the new code will not execute buildTree_rec if the argument is isVectorIntrinsicWithScalarOpAtArg
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to avoid calling setOperand for isVectorIntrinsicWithScalarOpAtArg args
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But for IntrinsicInst
which is NOT commutative, all arguments will be stored into a TreeEntry (and SLP will not call buildTree_rec
for those arguments).
If we have to "avoid calling setOperand for isVectorIntrinsicWithScalarOpAtArg args" for commutative IntrinsicInst
, we have to do something like the following patch.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d8c596eb875d..54210cc0b149 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3267,8 +3267,18 @@ private:
void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
bool RequireReorder = false) {
VLOperands Ops(VL, R);
- if (RequireReorder)
+ if (RequireReorder) {
Ops.reorder();
+ if (auto *CI = dyn_cast<CallInst>(VL[0])) {
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI);
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
+ continue;
+ setOperand(I, Ops.getVL(I));
+ }
+ return;
+ }
+ }
for (unsigned I :
seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
setOperand(I, Ops.getVL(I));
But it does not look elegant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But for
IntrinsicInst
which is NOT commutative, all arguments will be stored into a TreeEntry (and SLP will not callbuildTree_rec
for those arguments).If we have to "avoid calling setOperand for isVectorIntrinsicWithScalarOpAtArg args" for commutative
IntrinsicInst
, we have to do something like the following patch.diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d8c596eb875d..54210cc0b149 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3267,8 +3267,18 @@ private: void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R, bool RequireReorder = false) { VLOperands Ops(VL, R); - if (RequireReorder) + if (RequireReorder) { Ops.reorder(); + if (auto *CI = dyn_cast<CallInst>(VL[0])) { + Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI); + for (unsigned I : seq<unsigned>(CI->arg_size())) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) + continue; + setOperand(I, Ops.getVL(I)); + } + return; + } + } for (unsigned I : seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands())) setOperand(I, Ops.getVL(I));
But it does not look elegant.
We should maintain correctness where possible and should not produce "noise" entries in the graph. They may cause unexpecteв side effects and cause some extra compiler crashes
Ops.reorder(); | ||
if (auto *CI = dyn_cast<CallInst>(VL[0])) { | ||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, R.TLI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to add a check that ID is actually an intrinsic and avoid this check for non-intrinsics
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, why this processing is enabled only if RequireReorder is true?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to add a check that ID is actually an intrinsic and avoid this check for non-intrinsics
Why do we need to check the ID while the old code does not have to?
Also, why this processing is enabled only if RequireReorder is true?
Because only the commutative intrinsics check it in the old code.
I think we should NOT add isVectorIntrinsicWithScalarOpAtArg
check for setOperand
(for commutative and non commutative intrinsics). The reason is it will cause some bugs. I will use llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
as an example here.
If we make non commutative intrinsics check isVectorIntrinsicWithScalarOpAtArg
, then
%add1
will not be added intoTreeEntry
(because the operand 1 for powi is scalar operand).- While SLP is trying to do
schedule
, SLP cannot callDecrUnsched
because the operand ofllvm.powi
is not inside theTreeEntry
(it isisVectorIntrinsicWithScalarOpAtArg
).
for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
OpIdx != NumOperands; ++OpIdx)
if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
DecrUnsched(I);
- Eventually it triggers the assert
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
// Check that all schedulable entities got scheduled
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) {
ScheduleData *SD = BS->getScheduleData(I);
if (SD && SD->isSchedulingEntity() && SD->hasValidDependencies())
assert(SD->IsScheduled && "must be scheduled at this point");
}
#endif
Only smul_fix
, smul_fix_sat
, umul_fix
and umul_fix_sat
are non commutative intrinsic and also isVectorIntrinsicWithScalarOpAtArg
. However, the third operand of intrinsics are Constant
, which will not trigger the assert.
I think we should revert 664f2d3.
Because the load code explicitly worked only for CallInst
The non-commutative intrinsics also need to have a check for isVectorIntrinsicWithScalarOpAtArg
The check should be there or this whole patch should be dropped. We should not create pseudo tree node for the scalar instructions |
But we have a
If we add
If LLVM has an intrinsic in the future and
SLP will get fail because the operand is NOT in
Have 664f2d3 will cause a potential bug in the future. In addition, the current code already has scalar instructions in |
It is not in tree, it is just set as the operand of the treeentry, but there is no treeentry for such operands. Such operands will end up as just broadcast gather treeentries and won't be scheduled anyway |
SLP will call
After that, it will call
Then the assert will be triggered because the
Maybe we need to modify
|
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. ArgSize will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value). In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.
4e9392e
to
b4a9778
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, please do not use force push, use stacked PRs
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/9720 Here is the relevant piece of the build log for the reference
|
Hi, I'm getting a crash after this when building 510.parest_r from SPEC CPU 2017:
This is with an LTO build at -O3 on rva23u64 |
Do you have a test? |
No, it's with an LTO build so there's no single .ll or .c file I can pick out to reduce yet. Do you have access to SPEC CPU 2017? |
Cannot work with |
Oh thanks, I was able to get the whole module out. Running llvm-reduce on it now. |
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"
define ptr @_ZNK6dealii7FE_PolyINS_24TensorProductPolynomialsILi3EEELi3ELi3EE8get_dataENS_11UpdateFlagsERKNS_7MappingILi3ELi3EEERKNS_10QuadratureILi3EEE() #0 personality ptr null {
store double poison, ptr null, align 8
%1 = getelementptr i8, ptr null, i64 8
%2 = fmul double 0.000000e+00, 0.000000e+00
store double %2, ptr %1, align 8
%3 = getelementptr i8, ptr null, i64 16
%4 = fmul double 0.000000e+00, 0.000000e+00
store double %4, ptr %3, align 8
%5 = getelementptr i8, ptr null, i64 24
store double %2, ptr %5, align 8
ret ptr null
}
; uselistorder directives
uselistorder ptr null, { 1, 2, 3, 4, 5, 6, 7, 0 }
attributes #0 = { "target-features"="+64bit,+a,+b,+c,+d,+experimental,+f,+m,+relax,+supm,+v,+za64rs,+zaamo,+zalrsc,+zawrs,+zba,+zbb,+zbs,+zca,+zcb,+zcmop,+zfa,+zfhmin,+zic64b,+zicbom,+zicbop,+zicboz,+ziccamoa,+ziccif,+zicclsm,+ziccrse,+zicntr,+zicond,+zicsr,+zihintntl,+zihintpause,+zihpm,+zimop,+zkt,+zmmul,+zvbb,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvfhmin,+zvkb,+zvkt,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-smctr,-experimental-ssctr,-experimental-svukte,-experimental-xqcia,-experimental-xqcicsr,-experimental-xqcisls,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-zabha,-zacas,-zama16b,-zbc,-zbkb,-zbkc,-zbkx,-zcd,-zce,-zcf,-zcmp,-zcmt,-zdinx,-zfbfmin,-zfh,-zfinx,-zhinx,-zhinxmin,-zifencei,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-ztso,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } Can reproduce the crash with |
Reverted this because it breaks the llvm-test-suite build in ReleaseLTO-g configuration. |
…vm#113880) To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. Arg_size will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value). In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands.
Arg_size will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value).
In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.