@@ -2019,6 +2019,9 @@ class BoUpSLP {
2019
2019
2020
2020
/// A vector of operand vectors.
2021
2021
SmallVector<OperandDataVec, 4> OpsVec;
2022
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2023
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2024
+ unsigned ArgSize = 0;
2022
2025
2023
2026
const TargetLibraryInfo &TLI;
2024
2027
const DataLayout &DL;
@@ -2402,14 +2405,15 @@ class BoUpSLP {
2402
2405
}
2403
2406
2404
2407
/// Go through the instructions in VL and append their operands.
2405
- void appendOperandsOfVL(ArrayRef<Value *> VL) {
2408
+ void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0 ) {
2406
2409
assert(!VL.empty() && "Bad VL");
2407
2410
assert((empty() || VL.size() == getNumLanes()) &&
2408
2411
"Expected same number of lanes");
2412
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2413
+ // arguments to the intrinsic produces the same result.
2409
2414
constexpr unsigned IntrinsicNumOperands = 2;
2410
- auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2411
- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2412
- : VL0->getNumOperands();
2415
+ unsigned NumOperands = VL0->getNumOperands();
2416
+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
2413
2417
OpsVec.resize(NumOperands);
2414
2418
unsigned NumLanes = VL.size();
2415
2419
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2446,7 @@ class BoUpSLP {
2442
2446
}
2443
2447
2444
2448
/// \returns the number of operands.
2445
- unsigned getNumOperands() const { return OpsVec.size() ; }
2449
+ unsigned getNumOperands() const { return ArgSize ; }
2446
2450
2447
2451
/// \returns the number of lanes.
2448
2452
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2543,13 +2547,11 @@ class BoUpSLP {
2543
2547
2544
2548
public:
2545
2549
/// Initialize with all the operands of the instruction vector \p RootVL.
2546
- VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
2550
+ VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
2547
2551
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
2548
- L(R.LI->getLoopFor(
2549
- (cast<Instruction>(*find_if(RootVL, IsaPred<Instruction>))
2550
- ->getParent()))) {
2552
+ L(R.LI->getLoopFor((VL0->getParent()))) {
2551
2553
// Append all the operands of RootVL.
2552
- appendOperandsOfVL(RootVL);
2554
+ appendOperandsOfVL(RootVL, VL0 );
2553
2555
}
2554
2556
2555
2557
/// \Returns a value vector with the operands across all lanes for the
@@ -2623,7 +2625,8 @@ class BoUpSLP {
2623
2625
ArrayRef<OperandData> Op0 = OpsVec.front();
2624
2626
for (const OperandData &Data : Op0)
2625
2627
UniqueValues.insert(Data.V);
2626
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2628
+ for (ArrayRef<OperandData> Op :
2629
+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2627
2630
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
2628
2631
return !UniqueValues.contains(Data.V);
2629
2632
}))
@@ -3144,13 +3147,6 @@ class BoUpSLP {
3144
3147
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
3145
3148
8> &GatheredLoads);
3146
3149
3147
- /// Reorder commutative or alt operands to get better probability of
3148
- /// generating vectorized code.
3149
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3150
- SmallVectorImpl<Value *> &Left,
3151
- SmallVectorImpl<Value *> &Right,
3152
- const BoUpSLP &R);
3153
-
3154
3150
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
3155
3151
/// users of \p TE and collects the stores. It returns the map from the store
3156
3152
/// pointers to the collected stores.
@@ -3345,27 +3341,13 @@ class BoUpSLP {
3345
3341
copy(OpVL, Operands[OpIdx].begin());
3346
3342
}
3347
3343
3348
- /// Set the operands of this bundle in their original order.
3349
- void setOperandsInOrder() {
3350
- assert(Operands.empty() && "Already initialized?");
3351
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3352
- Operands.resize(I0->getNumOperands());
3353
- unsigned NumLanes = Scalars.size();
3354
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3355
- OpIdx != NumOperands; ++OpIdx) {
3356
- Operands[OpIdx].resize(NumLanes);
3357
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3358
- if (isa<PoisonValue>(Scalars[Lane])) {
3359
- Operands[OpIdx][Lane] =
3360
- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3361
- continue;
3362
- }
3363
- auto *I = cast<Instruction>(Scalars[Lane]);
3364
- assert(I->getNumOperands() == NumOperands &&
3365
- "Expected same number of operands");
3366
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3367
- }
3368
- }
3344
+ /// Set this bundle's operand from Scalars.
3345
+ void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
3346
+ VLOperands Ops(Scalars, MainOp, R);
3347
+ if (RequireReorder)
3348
+ Ops.reorder();
3349
+ for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
3350
+ setOperand(I, Ops.getVL(I));
3369
3351
}
3370
3352
3371
3353
/// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8453,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8471
8453
{}, CurrentOrder);
8472
8454
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
8473
8455
8474
- TE->setOperandsInOrder( );
8456
+ TE->setOperand(*this );
8475
8457
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
8476
8458
return;
8477
8459
}
@@ -8492,27 +8474,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8492
8474
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
8493
8475
else
8494
8476
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8495
- TE->setOperandsInOrder();
8496
8477
break;
8497
8478
case TreeEntry::StridedVectorize:
8498
8479
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8499
8480
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
8500
8481
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8501
- TE->setOperandsInOrder();
8502
8482
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
8503
8483
break;
8504
8484
case TreeEntry::ScatterVectorize:
8505
8485
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8506
8486
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
8507
8487
UserTreeIdx, ReuseShuffleIndices);
8508
- TE->setOperandsInOrder();
8509
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8510
8488
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
8511
8489
break;
8512
8490
case TreeEntry::CombinedVectorize:
8513
8491
case TreeEntry::NeedToGather:
8514
8492
llvm_unreachable("Unexpected loads state.");
8515
8493
}
8494
+ TE->setOperand(*this);
8495
+ if (State == TreeEntry::ScatterVectorize)
8496
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8516
8497
return;
8517
8498
}
8518
8499
case Instruction::ZExt:
@@ -8550,8 +8531,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8550
8531
ReuseShuffleIndices);
8551
8532
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
8552
8533
8553
- TE->setOperandsInOrder( );
8554
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8534
+ TE->setOperand(*this );
8535
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8555
8536
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8556
8537
if (ShuffleOrOp == Instruction::Trunc) {
8557
8538
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8559,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8578
8559
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
8579
8560
8580
8561
ValueList Left, Right;
8562
+ VLOperands Ops(VL, VL0, *this);
8581
8563
if (cast<CmpInst>(VL0)->isCommutative()) {
8582
8564
// Commutative predicate - collect + sort operands of the instructions
8583
8565
// so that each side is more likely to have the same opcode.
8584
8566
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
8585
8567
"Commutative Predicate mismatch");
8586
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8568
+ Ops.reorder();
8569
+ Left = Ops.getVL(0);
8570
+ Right = Ops.getVL(1);
8587
8571
} else {
8588
8572
// Collect operands - commute if it uses the swapped predicate.
8589
8573
for (Value *V : VL) {
@@ -8644,20 +8628,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8644
8628
ReuseShuffleIndices);
8645
8629
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
8646
8630
8647
- // Sort operands of the instructions so that each side is more likely to
8648
- // have the same opcode.
8649
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8650
- ValueList Left, Right;
8651
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8652
- TE->setOperand(0, Left);
8653
- TE->setOperand(1, Right);
8654
- buildTree_rec(Left, Depth + 1, {TE, 0});
8655
- buildTree_rec(Right, Depth + 1, {TE, 1});
8656
- return;
8657
- }
8658
-
8659
- TE->setOperandsInOrder();
8660
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8631
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8632
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8661
8633
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8662
8634
return;
8663
8635
}
@@ -8722,7 +8694,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8722
8694
fixupOrderingIndices(CurrentOrder);
8723
8695
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8724
8696
ReuseShuffleIndices, CurrentOrder);
8725
- TE->setOperandsInOrder( );
8697
+ TE->setOperand(*this );
8726
8698
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
8727
8699
if (Consecutive)
8728
8700
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8710,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8738
8710
8739
8711
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8740
8712
ReuseShuffleIndices);
8741
- // Sort operands of the instructions so that each side is more likely to
8742
- // have the same opcode.
8743
- if (isCommutative(VL0)) {
8744
- ValueList Left, Right;
8745
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8746
- TE->setOperand(0, Left);
8747
- TE->setOperand(1, Right);
8748
- SmallVector<ValueList> Operands;
8749
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750
- Operands.emplace_back();
8751
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8752
- continue;
8753
- for (Value *V : VL) {
8754
- auto *CI2 = cast<CallInst>(V);
8755
- Operands.back().push_back(CI2->getArgOperand(I));
8756
- }
8757
- TE->setOperand(I, Operands.back());
8758
- }
8759
- buildTree_rec(Left, Depth + 1, {TE, 0});
8760
- buildTree_rec(Right, Depth + 1, {TE, 1});
8761
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8762
- if (Operands[I - 2].empty())
8763
- continue;
8764
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8765
- }
8766
- return;
8767
- }
8768
- TE->setOperandsInOrder();
8769
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8713
+ TE->setOperand(*this, isCommutative(VL0));
8714
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
8770
8715
// For scalar operands no need to create an entry since no need to
8771
8716
// vectorize it.
8772
8717
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8773
8718
continue;
8774
- ValueList Operands;
8775
- // Prepare the operand vector.
8776
- for (Value *V : VL) {
8777
- auto *CI2 = cast<CallInst>(V);
8778
- Operands.push_back(CI2->getArgOperand(I));
8779
- }
8780
- buildTree_rec(Operands, Depth + 1, {TE, I});
8719
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8781
8720
}
8782
8721
return;
8783
8722
}
@@ -8788,43 +8727,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8788
8727
8789
8728
// Reorder operands if reordering would enable vectorization.
8790
8729
auto *CI = dyn_cast<CmpInst>(VL0);
8791
- if (isa<BinaryOperator>(VL0) || CI) {
8730
+ if (CI && any_of(VL, [](Value *V) {
8731
+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8732
+ })) {
8733
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
8734
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
8735
+ CmpInst::Predicate MainP = MainCI->getPredicate();
8736
+ CmpInst::Predicate AltP = AltCI->getPredicate();
8737
+ assert(MainP != AltP &&
8738
+ "Expected different main/alternate predicates.");
8792
8739
ValueList Left, Right;
8793
- if (!CI || all_of(VL, [](Value *V) {
8794
- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8795
- })) {
8796
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8797
- } else {
8798
- auto *MainCI = cast<CmpInst>(S.getMainOp());
8799
- auto *AltCI = cast<CmpInst>(S.getAltOp());
8800
- CmpInst::Predicate MainP = MainCI->getPredicate();
8801
- CmpInst::Predicate AltP = AltCI->getPredicate();
8802
- assert(MainP != AltP &&
8803
- "Expected different main/alternate predicates.");
8804
- // Collect operands - commute if it uses the swapped predicate or
8805
- // alternate operation.
8806
- for (Value *V : VL) {
8807
- if (isa<PoisonValue>(V)) {
8808
- Left.push_back(
8809
- PoisonValue::get(MainCI->getOperand(0)->getType()));
8810
- Right.push_back(
8811
- PoisonValue::get(MainCI->getOperand(1)->getType()));
8812
- continue;
8813
- }
8814
- auto *Cmp = cast<CmpInst>(V);
8815
- Value *LHS = Cmp->getOperand(0);
8816
- Value *RHS = Cmp->getOperand(1);
8740
+ // Collect operands - commute if it uses the swapped predicate or
8741
+ // alternate operation.
8742
+ for (Value *V : VL) {
8743
+ if (isa<PoisonValue>(V)) {
8744
+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8745
+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8746
+ continue;
8747
+ }
8748
+ auto *Cmp = cast<CmpInst>(V);
8749
+ Value *LHS = Cmp->getOperand(0);
8750
+ Value *RHS = Cmp->getOperand(1);
8817
8751
8818
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8819
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8820
- std::swap(LHS, RHS);
8821
- } else {
8822
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8823
- std::swap(LHS, RHS);
8824
- }
8825
- Left.push_back(LHS);
8826
- Right.push_back(RHS);
8752
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8753
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8754
+ std::swap(LHS, RHS);
8755
+ } else {
8756
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8757
+ std::swap(LHS, RHS);
8827
8758
}
8759
+ Left.push_back(LHS);
8760
+ Right.push_back(RHS);
8828
8761
}
8829
8762
TE->setOperand(0, Left);
8830
8763
TE->setOperand(1, Right);
@@ -8833,8 +8766,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8833
8766
return;
8834
8767
}
8835
8768
8836
- TE->setOperandsInOrder( );
8837
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8769
+ TE->setOperand(*this, isa<BinaryOperator>(VL0) || CI );
8770
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8838
8771
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8839
8772
return;
8840
8773
}
@@ -13539,21 +13472,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
13539
13472
return Cost;
13540
13473
}
13541
13474
13542
- // Perform operand reordering on the instructions in VL and return the reordered
13543
- // operands in Left and Right.
13544
- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13545
- SmallVectorImpl<Value *> &Left,
13546
- SmallVectorImpl<Value *> &Right,
13547
- const BoUpSLP &R) {
13548
- if (VL.empty())
13549
- return;
13550
- VLOperands Ops(VL, R);
13551
- // Reorder the operands in place.
13552
- Ops.reorder();
13553
- Left = Ops.getVL(0);
13554
- Right = Ops.getVL(1);
13555
- }
13556
-
13557
13475
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
13558
13476
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
13559
13477
if (Res)
0 commit comments