Skip to content

Commit 51a0c1b

Browse files
authored
[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#118949)
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. Arg_size will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value). In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.
1 parent b759020 commit 51a0c1b

File tree

2 files changed

+86
-149
lines changed

2 files changed

+86
-149
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 67 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -2019,6 +2019,9 @@ class BoUpSLP {
20192019

20202020
/// A vector of operand vectors.
20212021
SmallVector<OperandDataVec, 4> OpsVec;
2022+
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2023+
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
2024+
unsigned ArgSize = 0;
20222025

20232026
const TargetLibraryInfo &TLI;
20242027
const DataLayout &DL;
@@ -2402,14 +2405,15 @@ class BoUpSLP {
24022405
}
24032406

24042407
/// Go through the instructions in VL and append their operands.
2405-
void appendOperandsOfVL(ArrayRef<Value *> VL) {
2408+
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
24062409
assert(!VL.empty() && "Bad VL");
24072410
assert((empty() || VL.size() == getNumLanes()) &&
24082411
"Expected same number of lanes");
2412+
// IntrinsicInst::isCommutative returns true if swapping the first "two"
2413+
// arguments to the intrinsic produces the same result.
24092414
constexpr unsigned IntrinsicNumOperands = 2;
2410-
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2411-
unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2412-
: VL0->getNumOperands();
2415+
unsigned NumOperands = VL0->getNumOperands();
2416+
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
24132417
OpsVec.resize(NumOperands);
24142418
unsigned NumLanes = VL.size();
24152419
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2446,7 @@ class BoUpSLP {
24422446
}
24432447

24442448
/// \returns the number of operands.
2445-
unsigned getNumOperands() const { return OpsVec.size(); }
2449+
unsigned getNumOperands() const { return ArgSize; }
24462450

24472451
/// \returns the number of lanes.
24482452
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2543,13 +2547,11 @@ class BoUpSLP {
25432547

25442548
public:
25452549
/// Initialize with all the operands of the instruction vector \p RootVL.
2546-
VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
2550+
VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
25472551
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
2548-
L(R.LI->getLoopFor(
2549-
(cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
2550-
->getParent()))) {
2552+
L(R.LI->getLoopFor((VL0->getParent()))) {
25512553
// Append all the operands of RootVL.
2552-
appendOperandsOfVL(RootVL);
2554+
appendOperandsOfVL(RootVL, VL0);
25532555
}
25542556

25552557
/// \Returns a value vector with the operands across all lanes for the
@@ -2623,7 +2625,8 @@ class BoUpSLP {
26232625
ArrayRef<OperandData> Op0 = OpsVec.front();
26242626
for (const OperandData &Data : Op0)
26252627
UniqueValues.insert(Data.V);
2626-
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2628+
for (ArrayRef<OperandData> Op :
2629+
ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
26272630
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26282631
return !UniqueValues.contains(Data.V);
26292632
}))
@@ -3144,13 +3147,6 @@ class BoUpSLP {
31443147
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31453148
8> &GatheredLoads);
31463149

3147-
/// Reorder commutative or alt operands to get better probability of
3148-
/// generating vectorized code.
3149-
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3150-
SmallVectorImpl<Value *> &Left,
3151-
SmallVectorImpl<Value *> &Right,
3152-
const BoUpSLP &R);
3153-
31543150
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31553151
/// users of \p TE and collects the stores. It returns the map from the store
31563152
/// pointers to the collected stores.
@@ -3345,27 +3341,13 @@ class BoUpSLP {
33453341
copy(OpVL, Operands[OpIdx].begin());
33463342
}
33473343

3348-
/// Set the operands of this bundle in their original order.
3349-
void setOperandsInOrder() {
3350-
assert(Operands.empty() && "Already initialized?");
3351-
auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3352-
Operands.resize(I0->getNumOperands());
3353-
unsigned NumLanes = Scalars.size();
3354-
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3355-
OpIdx != NumOperands; ++OpIdx) {
3356-
Operands[OpIdx].resize(NumLanes);
3357-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3358-
if (isa<PoisonValue>(Scalars[Lane])) {
3359-
Operands[OpIdx][Lane] =
3360-
PoisonValue::get(I0->getOperand(OpIdx)->getType());
3361-
continue;
3362-
}
3363-
auto *I = cast<Instruction>(Scalars[Lane]);
3364-
assert(I->getNumOperands() == NumOperands &&
3365-
"Expected same number of operands");
3366-
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3367-
}
3368-
}
3344+
/// Set this bundle's operand from Scalars.
3345+
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
3346+
VLOperands Ops(Scalars, MainOp, R);
3347+
if (RequireReorder)
3348+
Ops.reorder();
3349+
for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
3350+
setOperand(I, Ops.getVL(I));
33693351
}
33703352

33713353
/// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84718453
{}, CurrentOrder);
84728454
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84738455

8474-
TE->setOperandsInOrder();
8456+
TE->setOperand(*this);
84758457
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84768458
return;
84778459
}
@@ -8492,27 +8474,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84928474
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84938475
else
84948476
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8495-
TE->setOperandsInOrder();
84968477
break;
84978478
case TreeEntry::StridedVectorize:
84988479
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
84998480
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
85008481
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8501-
TE->setOperandsInOrder();
85028482
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
85038483
break;
85048484
case TreeEntry::ScatterVectorize:
85058485
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
85068486
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
85078487
UserTreeIdx, ReuseShuffleIndices);
8508-
TE->setOperandsInOrder();
8509-
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85108488
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
85118489
break;
85128490
case TreeEntry::CombinedVectorize:
85138491
case TreeEntry::NeedToGather:
85148492
llvm_unreachable("Unexpected loads state.");
85158493
}
8494+
TE->setOperand(*this);
8495+
if (State == TreeEntry::ScatterVectorize)
8496+
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85168497
return;
85178498
}
85188499
case Instruction::ZExt:
@@ -8550,8 +8531,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85508531
ReuseShuffleIndices);
85518532
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85528533

8553-
TE->setOperandsInOrder();
8554-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8534+
TE->setOperand(*this);
8535+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85558536
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85568537
if (ShuffleOrOp == Instruction::Trunc) {
85578538
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8559,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85788559
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85798560

85808561
ValueList Left, Right;
8562+
VLOperands Ops(VL, VL0, *this);
85818563
if (cast<CmpInst>(VL0)->isCommutative()) {
85828564
// Commutative predicate - collect + sort operands of the instructions
85838565
// so that each side is more likely to have the same opcode.
85848566
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85858567
"Commutative Predicate mismatch");
8586-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8568+
Ops.reorder();
8569+
Left = Ops.getVL(0);
8570+
Right = Ops.getVL(1);
85878571
} else {
85888572
// Collect operands - commute if it uses the swapped predicate.
85898573
for (Value *V : VL) {
@@ -8644,20 +8628,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86448628
ReuseShuffleIndices);
86458629
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86468630

8647-
// Sort operands of the instructions so that each side is more likely to
8648-
// have the same opcode.
8649-
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8650-
ValueList Left, Right;
8651-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8652-
TE->setOperand(0, Left);
8653-
TE->setOperand(1, Right);
8654-
buildTree_rec(Left, Depth + 1, {TE, 0});
8655-
buildTree_rec(Right, Depth + 1, {TE, 1});
8656-
return;
8657-
}
8658-
8659-
TE->setOperandsInOrder();
8660-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8631+
TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8632+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86618633
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86628634
return;
86638635
}
@@ -8722,7 +8694,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87228694
fixupOrderingIndices(CurrentOrder);
87238695
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87248696
ReuseShuffleIndices, CurrentOrder);
8725-
TE->setOperandsInOrder();
8697+
TE->setOperand(*this);
87268698
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
87278699
if (Consecutive)
87288700
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8710,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87388710

87398711
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87408712
ReuseShuffleIndices);
8741-
// Sort operands of the instructions so that each side is more likely to
8742-
// have the same opcode.
8743-
if (isCommutative(VL0)) {
8744-
ValueList Left, Right;
8745-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8746-
TE->setOperand(0, Left);
8747-
TE->setOperand(1, Right);
8748-
SmallVector<ValueList> Operands;
8749-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750-
Operands.emplace_back();
8751-
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8752-
continue;
8753-
for (Value *V : VL) {
8754-
auto *CI2 = cast<CallInst>(V);
8755-
Operands.back().push_back(CI2->getArgOperand(I));
8756-
}
8757-
TE->setOperand(I, Operands.back());
8758-
}
8759-
buildTree_rec(Left, Depth + 1, {TE, 0});
8760-
buildTree_rec(Right, Depth + 1, {TE, 1});
8761-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8762-
if (Operands[I - 2].empty())
8763-
continue;
8764-
buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8765-
}
8766-
return;
8767-
}
8768-
TE->setOperandsInOrder();
8769-
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8713+
TE->setOperand(*this, isCommutative(VL0));
8714+
for (unsigned I : seq<unsigned>(CI->arg_size())) {
87708715
// For scalar operands no need to create an entry since no need to
87718716
// vectorize it.
87728717
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87738718
continue;
8774-
ValueList Operands;
8775-
// Prepare the operand vector.
8776-
for (Value *V : VL) {
8777-
auto *CI2 = cast<CallInst>(V);
8778-
Operands.push_back(CI2->getArgOperand(I));
8779-
}
8780-
buildTree_rec(Operands, Depth + 1, {TE, I});
8719+
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87818720
}
87828721
return;
87838722
}
@@ -8788,43 +8727,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87888727

87898728
// Reorder operands if reordering would enable vectorization.
87908729
auto *CI = dyn_cast<CmpInst>(VL0);
8791-
if (isa<BinaryOperator>(VL0) || CI) {
8730+
if (CI && any_of(VL, [](Value *V) {
8731+
return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8732+
})) {
8733+
auto *MainCI = cast<CmpInst>(S.getMainOp());
8734+
auto *AltCI = cast<CmpInst>(S.getAltOp());
8735+
CmpInst::Predicate MainP = MainCI->getPredicate();
8736+
CmpInst::Predicate AltP = AltCI->getPredicate();
8737+
assert(MainP != AltP &&
8738+
"Expected different main/alternate predicates.");
87928739
ValueList Left, Right;
8793-
if (!CI || all_of(VL, [](Value *V) {
8794-
return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8795-
})) {
8796-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8797-
} else {
8798-
auto *MainCI = cast<CmpInst>(S.getMainOp());
8799-
auto *AltCI = cast<CmpInst>(S.getAltOp());
8800-
CmpInst::Predicate MainP = MainCI->getPredicate();
8801-
CmpInst::Predicate AltP = AltCI->getPredicate();
8802-
assert(MainP != AltP &&
8803-
"Expected different main/alternate predicates.");
8804-
// Collect operands - commute if it uses the swapped predicate or
8805-
// alternate operation.
8806-
for (Value *V : VL) {
8807-
if (isa<PoisonValue>(V)) {
8808-
Left.push_back(
8809-
PoisonValue::get(MainCI->getOperand(0)->getType()));
8810-
Right.push_back(
8811-
PoisonValue::get(MainCI->getOperand(1)->getType()));
8812-
continue;
8813-
}
8814-
auto *Cmp = cast<CmpInst>(V);
8815-
Value *LHS = Cmp->getOperand(0);
8816-
Value *RHS = Cmp->getOperand(1);
8740+
// Collect operands - commute if it uses the swapped predicate or
8741+
// alternate operation.
8742+
for (Value *V : VL) {
8743+
if (isa<PoisonValue>(V)) {
8744+
Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8745+
Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8746+
continue;
8747+
}
8748+
auto *Cmp = cast<CmpInst>(V);
8749+
Value *LHS = Cmp->getOperand(0);
8750+
Value *RHS = Cmp->getOperand(1);
88178751

8818-
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8819-
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8820-
std::swap(LHS, RHS);
8821-
} else {
8822-
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8823-
std::swap(LHS, RHS);
8824-
}
8825-
Left.push_back(LHS);
8826-
Right.push_back(RHS);
8752+
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8753+
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8754+
std::swap(LHS, RHS);
8755+
} else {
8756+
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8757+
std::swap(LHS, RHS);
88278758
}
8759+
Left.push_back(LHS);
8760+
Right.push_back(RHS);
88288761
}
88298762
TE->setOperand(0, Left);
88308763
TE->setOperand(1, Right);
@@ -8833,8 +8766,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
88338766
return;
88348767
}
88358768

8836-
TE->setOperandsInOrder();
8837-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8769+
TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI);
8770+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
88388771
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
88398772
return;
88408773
}
@@ -13539,21 +13472,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1353913472
return Cost;
1354013473
}
1354113474

13542-
// Perform operand reordering on the instructions in VL and return the reordered
13543-
// operands in Left and Right.
13544-
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13545-
SmallVectorImpl<Value *> &Left,
13546-
SmallVectorImpl<Value *> &Right,
13547-
const BoUpSLP &R) {
13548-
if (VL.empty())
13549-
return;
13550-
VLOperands Ops(VL, R);
13551-
// Reorder the operands in place.
13552-
Ops.reorder();
13553-
Left = Ops.getVL(0);
13554-
Right = Ops.getVL(1);
13555-
}
13556-
1355713475
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1355813476
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1355913477
if (Res)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=slp-vectorizer -S -slp-max-reg-size=1024 %s | FileCheck %s
3+
4+
define ptr @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: store <4 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, ptr null, align 8
7+
; CHECK-NEXT: ret ptr null
8+
;
9+
store double poison, ptr null, align 8
10+
%1 = getelementptr i8, ptr null, i64 8
11+
%2 = fmul double 0.000000e+00, 0.000000e+00
12+
store double %2, ptr %1, align 8
13+
%3 = getelementptr i8, ptr null, i64 16
14+
%4 = fmul double 0.000000e+00, 0.000000e+00
15+
store double %4, ptr %3, align 8
16+
%5 = getelementptr i8, ptr null, i64 24
17+
store double %2, ptr %5, align 8
18+
ret ptr null
19+
}

0 commit comments

Comments
 (0)