Skip to content

Commit 87b6e09

Browse files
committed
[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands.
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. ArgSize will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value). In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.
1 parent 9270328 commit 87b6e09

File tree

1 file changed

+50
-107
lines changed

1 file changed

+50
-107
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 50 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,9 @@ class BoUpSLP {
19491949

19501950
/// A vector of operand vectors.
19511951
SmallVector<OperandDataVec, 4> OpsVec;
1952+
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
1953+
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
1954+
unsigned ArgSize;
19521955

19531956
const TargetLibraryInfo &TLI;
19541957
const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
23372340
assert((empty() || VL.size() == getNumLanes()) &&
23382341
"Expected same number of lanes");
23392342
assert(isa<Instruction>(VL[0]) && "Expected instruction");
2343+
unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
2344+
// IntrinsicInst::isCommutative returns true if swapping the first "two"
2345+
// arguments to the intrinsic produces the same result.
23402346
constexpr unsigned IntrinsicNumOperands = 2;
2341-
unsigned NumOperands = isa<IntrinsicInst>(VL[0])
2342-
? IntrinsicNumOperands
2343-
: cast<Instruction>(VL[0])->getNumOperands();
2347+
ArgSize = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
23442348
OpsVec.resize(NumOperands);
23452349
unsigned NumLanes = VL.size();
23462350
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
23662370
}
23672371

23682372
/// \returns the number of operands.
2369-
unsigned getNumOperands() const { return OpsVec.size(); }
2373+
unsigned getNumOperands() const { return ArgSize; }
23702374

23712375
/// \returns the number of lanes.
23722376
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2542,7 +2546,8 @@ class BoUpSLP {
25422546
ArrayRef<OperandData> Op0 = OpsVec.front();
25432547
for (const OperandData &Data : Op0)
25442548
UniqueValues.insert(Data.V);
2545-
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2549+
for (ArrayRef<OperandData> Op : make_range(
2550+
OpsVec.begin() + 1, OpsVec.begin() + getNumOperands())) {
25462551
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
25472552
return !UniqueValues.contains(Data.V);
25482553
}))
@@ -3064,13 +3069,6 @@ class BoUpSLP {
30643069
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
30653070
8> &GatheredLoads);
30663071

3067-
/// Reorder commutative or alt operands to get better probability of
3068-
/// generating vectorized code.
3069-
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3070-
SmallVectorImpl<Value *> &Left,
3071-
SmallVectorImpl<Value *> &Right,
3072-
const BoUpSLP &R);
3073-
30743072
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
30753073
/// users of \p TE and collects the stores. It returns the map from the store
30763074
/// pointers to the collected stores.
@@ -3265,22 +3263,10 @@ class BoUpSLP {
32653263
copy(OpVL, Operands[OpIdx].begin());
32663264
}
32673265

3268-
/// Set the operands of this bundle in their original order.
3269-
void setOperandsInOrder() {
3270-
assert(Operands.empty() && "Already initialized?");
3271-
auto *I0 = cast<Instruction>(Scalars[0]);
3272-
Operands.resize(I0->getNumOperands());
3273-
unsigned NumLanes = Scalars.size();
3274-
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3275-
OpIdx != NumOperands; ++OpIdx) {
3276-
Operands[OpIdx].resize(NumLanes);
3277-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3278-
auto *I = cast<Instruction>(Scalars[Lane]);
3279-
assert(I->getNumOperands() == NumOperands &&
3280-
"Expected same number of operands");
3281-
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3282-
}
3283-
}
3266+
/// Set this bundle's operand from \p Ops.
3267+
void setOperand(const VLOperands &Ops, unsigned NumOperands) {
3268+
for (unsigned I : seq(NumOperands))
3269+
setOperand(I, Ops.getVL(I));
32843270
}
32853271

32863272
/// Reorders operands of the node to the given mask \p Mask.
@@ -8329,7 +8315,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
83298315
{}, CurrentOrder);
83308316
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
83318317

8332-
TE->setOperandsInOrder();
8318+
VLOperands Ops(VL, *this);
8319+
TE->setOperand(Ops, VL0->getNumOperands());
83338320
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
83348321
return;
83358322
}
@@ -8350,27 +8337,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
83508337
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
83518338
else
83528339
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8353-
TE->setOperandsInOrder();
83548340
break;
83558341
case TreeEntry::StridedVectorize:
83568342
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
83578343
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
83588344
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8359-
TE->setOperandsInOrder();
83608345
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
83618346
break;
83628347
case TreeEntry::ScatterVectorize:
83638348
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
83648349
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
83658350
UserTreeIdx, ReuseShuffleIndices);
8366-
TE->setOperandsInOrder();
8367-
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
83688351
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
83698352
break;
83708353
case TreeEntry::CombinedVectorize:
83718354
case TreeEntry::NeedToGather:
83728355
llvm_unreachable("Unexpected loads state.");
83738356
}
8357+
VLOperands Ops(VL, *this);
8358+
TE->setOperand(Ops, VL0->getNumOperands());
8359+
if (State == TreeEntry::ScatterVectorize)
8360+
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
83748361
return;
83758362
}
83768363
case Instruction::ZExt:
@@ -8408,8 +8395,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84088395
ReuseShuffleIndices);
84098396
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
84108397

8411-
TE->setOperandsInOrder();
8412-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8398+
VLOperands Ops(VL, *this);
8399+
TE->setOperand(Ops, VL0->getNumOperands());
8400+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
84138401
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
84148402
if (ShuffleOrOp == Instruction::Trunc) {
84158403
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8436,12 +8424,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84368424
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
84378425

84388426
ValueList Left, Right;
8427+
VLOperands Ops(VL, *this);
84398428
if (cast<CmpInst>(VL0)->isCommutative()) {
84408429
// Commutative predicate - collect + sort operands of the instructions
84418430
// so that each side is more likely to have the same opcode.
84428431
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
84438432
"Commutative Predicate mismatch");
8444-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8433+
Ops.reorder();
8434+
Left = Ops.getVL(0);
8435+
Right = Ops.getVL(1);
84458436
} else {
84468437
// Collect operands - commute if it uses the swapped predicate.
84478438
for (Value *V : VL) {
@@ -8497,20 +8488,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84978488
ReuseShuffleIndices);
84988489
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
84998490

8491+
VLOperands Ops(VL, *this);
85008492
// Sort operands of the instructions so that each side is more likely to
85018493
// have the same opcode.
8502-
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8503-
ValueList Left, Right;
8504-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8505-
TE->setOperand(0, Left);
8506-
TE->setOperand(1, Right);
8507-
buildTree_rec(Left, Depth + 1, {TE, 0});
8508-
buildTree_rec(Right, Depth + 1, {TE, 1});
8509-
return;
8510-
}
8511-
8512-
TE->setOperandsInOrder();
8513-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8494+
if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
8495+
Ops.reorder();
8496+
TE->setOperand(Ops, VL0->getNumOperands());
8497+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85148498
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85158499
return;
85168500
}
@@ -8575,7 +8559,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85758559
fixupOrderingIndices(CurrentOrder);
85768560
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
85778561
ReuseShuffleIndices, CurrentOrder);
8578-
TE->setOperandsInOrder();
8562+
VLOperands Ops(VL, *this);
8563+
TE->setOperand(Ops, VL0->getNumOperands());
85798564
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
85808565
if (Consecutive)
85818566
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8591,46 +8576,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85918576

85928577
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
85938578
ReuseShuffleIndices);
8579+
VLOperands Ops(VL, *this);
85948580
// Sort operands of the instructions so that each side is more likely to
85958581
// have the same opcode.
8596-
if (isCommutative(VL0)) {
8597-
ValueList Left, Right;
8598-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8599-
TE->setOperand(0, Left);
8600-
TE->setOperand(1, Right);
8601-
SmallVector<ValueList> Operands;
8602-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8603-
Operands.emplace_back();
8604-
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8605-
continue;
8606-
for (Value *V : VL) {
8607-
auto *CI2 = cast<CallInst>(V);
8608-
Operands.back().push_back(CI2->getArgOperand(I));
8609-
}
8610-
TE->setOperand(I, Operands.back());
8611-
}
8612-
buildTree_rec(Left, Depth + 1, {TE, 0});
8613-
buildTree_rec(Right, Depth + 1, {TE, 1});
8614-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8615-
if (Operands[I - 2].empty())
8616-
continue;
8617-
buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8618-
}
8619-
return;
8620-
}
8621-
TE->setOperandsInOrder();
8622-
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8582+
if (isCommutative(VL0))
8583+
Ops.reorder();
8584+
TE->setOperand(Ops, VL0->getNumOperands());
8585+
for (unsigned I : seq<unsigned>(CI->arg_size())) {
86238586
// For scalar operands no need to create an entry since no need to
86248587
// vectorize it.
86258588
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
86268589
continue;
8627-
ValueList Operands;
8628-
// Prepare the operand vector.
8629-
for (Value *V : VL) {
8630-
auto *CI2 = cast<CallInst>(V);
8631-
Operands.push_back(CI2->getArgOperand(I));
8632-
}
8633-
buildTree_rec(Operands, Depth + 1, {TE, I});
8590+
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86348591
}
86358592
return;
86368593
}
@@ -8639,21 +8596,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86398596
ReuseShuffleIndices);
86408597
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
86418598

8599+
VLOperands Ops(VL, *this);
86428600
// Reorder operands if reordering would enable vectorization.
86438601
auto *CI = dyn_cast<CmpInst>(VL0);
86448602
if (isa<BinaryOperator>(VL0) || CI) {
8645-
ValueList Left, Right;
86468603
if (!CI || all_of(VL, [](Value *V) {
86478604
return cast<CmpInst>(V)->isCommutative();
86488605
})) {
8649-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8606+
Ops.reorder();
86508607
} else {
86518608
auto *MainCI = cast<CmpInst>(S.MainOp);
86528609
auto *AltCI = cast<CmpInst>(S.AltOp);
86538610
CmpInst::Predicate MainP = MainCI->getPredicate();
86548611
CmpInst::Predicate AltP = AltCI->getPredicate();
86558612
assert(MainP != AltP &&
86568613
"Expected different main/alternate predicates.");
8614+
ValueList Left, Right;
86578615
// Collect operands - commute if it uses the swapped predicate or
86588616
// alternate operation.
86598617
for (Value *V : VL) {
@@ -8671,16 +8629,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86718629
Left.push_back(LHS);
86728630
Right.push_back(RHS);
86738631
}
8632+
TE->setOperand(0, Left);
8633+
TE->setOperand(1, Right);
8634+
buildTree_rec(Left, Depth + 1, {TE, 0});
8635+
buildTree_rec(Right, Depth + 1, {TE, 1});
8636+
return;
86748637
}
8675-
TE->setOperand(0, Left);
8676-
TE->setOperand(1, Right);
8677-
buildTree_rec(Left, Depth + 1, {TE, 0});
8678-
buildTree_rec(Right, Depth + 1, {TE, 1});
8679-
return;
86808638
}
86818639

8682-
TE->setOperandsInOrder();
8683-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8640+
TE->setOperand(Ops, VL0->getNumOperands());
8641+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86848642
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86858643
return;
86868644
}
@@ -13300,21 +13258,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1330013258
return Cost;
1330113259
}
1330213260

13303-
// Perform operand reordering on the instructions in VL and return the reordered
13304-
// operands in Left and Right.
13305-
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13306-
SmallVectorImpl<Value *> &Left,
13307-
SmallVectorImpl<Value *> &Right,
13308-
const BoUpSLP &R) {
13309-
if (VL.empty())
13310-
return;
13311-
VLOperands Ops(VL, R);
13312-
// Reorder the operands in place.
13313-
Ops.reorder();
13314-
Left = Ops.getVL(0);
13315-
Right = Ops.getVL(1);
13316-
}
13317-
1331813261
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1331913262
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1332013263
if (Res)

0 commit comments

Comments
 (0)