@@ -2019,6 +2019,9 @@ class BoUpSLP {
2019
2019
2020
2020
/// A vector of operand vectors.
2021
2021
SmallVector<OperandDataVec, 4> OpsVec;
2022
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2023
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2024
+ unsigned ArgSize = 0;
2022
2025
2023
2026
const TargetLibraryInfo &TLI;
2024
2027
const DataLayout &DL;
@@ -2406,10 +2409,12 @@ class BoUpSLP {
2406
2409
assert(!VL.empty() && "Bad VL");
2407
2410
assert((empty() || VL.size() == getNumLanes()) &&
2408
2411
"Expected same number of lanes");
2412
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2413
+ // arguments to the intrinsic produces the same result.
2409
2414
constexpr unsigned IntrinsicNumOperands = 2;
2410
2415
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2411
- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2412
- : VL0->getNumOperands() ;
2416
+ unsigned NumOperands = VL0->getNumOperands();
2417
+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
2413
2418
OpsVec.resize(NumOperands);
2414
2419
unsigned NumLanes = VL.size();
2415
2420
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2442,7 +2447,7 @@ class BoUpSLP {
2442
2447
}
2443
2448
2444
2449
/// \returns the number of operands.
2445
- unsigned getNumOperands() const { return OpsVec.size() ; }
2450
+ unsigned getNumOperands() const { return ArgSize ; }
2446
2451
2447
2452
/// \returns the number of lanes.
2448
2453
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2623,7 +2628,8 @@ class BoUpSLP {
2623
2628
ArrayRef<OperandData> Op0 = OpsVec.front();
2624
2629
for (const OperandData &Data : Op0)
2625
2630
UniqueValues.insert(Data.V);
2626
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2631
+ for (ArrayRef<OperandData> Op :
2632
+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2627
2633
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
2628
2634
return !UniqueValues.contains(Data.V);
2629
2635
}))
@@ -3144,13 +3150,6 @@ class BoUpSLP {
3144
3150
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
3145
3151
8> &GatheredLoads);
3146
3152
3147
- /// Reorder commutative or alt operands to get better probability of
3148
- /// generating vectorized code.
3149
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3150
- SmallVectorImpl<Value *> &Left,
3151
- SmallVectorImpl<Value *> &Right,
3152
- const BoUpSLP &R);
3153
-
3154
3153
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
3155
3154
/// users of \p TE and collects the stores. It returns the map from the store
3156
3155
/// pointers to the collected stores.
@@ -3345,27 +3344,15 @@ class BoUpSLP {
3345
3344
copy(OpVL, Operands[OpIdx].begin());
3346
3345
}
3347
3346
3348
- /// Set the operands of this bundle in their original order.
3349
- void setOperandsInOrder() {
3350
- assert(Operands.empty() && "Already initialized?");
3351
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3352
- Operands.resize(I0->getNumOperands());
3353
- unsigned NumLanes = Scalars.size();
3354
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3355
- OpIdx != NumOperands; ++OpIdx) {
3356
- Operands[OpIdx].resize(NumLanes);
3357
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3358
- if (isa<PoisonValue>(Scalars[Lane])) {
3359
- Operands[OpIdx][Lane] =
3360
- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3361
- continue;
3362
- }
3363
- auto *I = cast<Instruction>(Scalars[Lane]);
3364
- assert(I->getNumOperands() == NumOperands &&
3365
- "Expected same number of operands");
3366
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3367
- }
3368
- }
3347
+ /// Set this bundle's operand from \p VL.
3348
+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3349
+ bool RequireReorder = false) {
3350
+ VLOperands Ops(VL, R);
3351
+ if (RequireReorder)
3352
+ Ops.reorder();
3353
+ for (unsigned I :
3354
+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3355
+ setOperand(I, Ops.getVL(I));
3369
3356
}
3370
3357
3371
3358
/// Reorders operands of the node to the given mask \p Mask.
@@ -8471,7 +8458,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8471
8458
{}, CurrentOrder);
8472
8459
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
8473
8460
8474
- TE->setOperandsInOrder( );
8461
+ TE->setOperand(VL, *this );
8475
8462
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
8476
8463
return;
8477
8464
}
@@ -8492,27 +8479,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8492
8479
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
8493
8480
else
8494
8481
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8495
- TE->setOperandsInOrder();
8496
8482
break;
8497
8483
case TreeEntry::StridedVectorize:
8498
8484
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8499
8485
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
8500
8486
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8501
- TE->setOperandsInOrder();
8502
8487
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
8503
8488
break;
8504
8489
case TreeEntry::ScatterVectorize:
8505
8490
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8506
8491
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
8507
8492
UserTreeIdx, ReuseShuffleIndices);
8508
- TE->setOperandsInOrder();
8509
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8510
8493
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
8511
8494
break;
8512
8495
case TreeEntry::CombinedVectorize:
8513
8496
case TreeEntry::NeedToGather:
8514
8497
llvm_unreachable("Unexpected loads state.");
8515
8498
}
8499
+ TE->setOperand(VL, *this);
8500
+ if (State == TreeEntry::ScatterVectorize)
8501
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8516
8502
return;
8517
8503
}
8518
8504
case Instruction::ZExt:
@@ -8550,8 +8536,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8550
8536
ReuseShuffleIndices);
8551
8537
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
8552
8538
8553
- TE->setOperandsInOrder( );
8554
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8539
+ TE->setOperand(VL, *this );
8540
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8555
8541
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8556
8542
if (ShuffleOrOp == Instruction::Trunc) {
8557
8543
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8578,12 +8564,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8578
8564
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
8579
8565
8580
8566
ValueList Left, Right;
8567
+ VLOperands Ops(VL, *this);
8581
8568
if (cast<CmpInst>(VL0)->isCommutative()) {
8582
8569
// Commutative predicate - collect + sort operands of the instructions
8583
8570
// so that each side is more likely to have the same opcode.
8584
8571
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
8585
8572
"Commutative Predicate mismatch");
8586
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8573
+ Ops.reorder();
8574
+ Left = Ops.getVL(0);
8575
+ Right = Ops.getVL(1);
8587
8576
} else {
8588
8577
// Collect operands - commute if it uses the swapped predicate.
8589
8578
for (Value *V : VL) {
@@ -8644,20 +8633,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8644
8633
ReuseShuffleIndices);
8645
8634
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
8646
8635
8647
- // Sort operands of the instructions so that each side is more likely to
8648
- // have the same opcode.
8649
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8650
- ValueList Left, Right;
8651
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8652
- TE->setOperand(0, Left);
8653
- TE->setOperand(1, Right);
8654
- buildTree_rec(Left, Depth + 1, {TE, 0});
8655
- buildTree_rec(Right, Depth + 1, {TE, 1});
8656
- return;
8657
- }
8658
-
8659
- TE->setOperandsInOrder();
8660
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8636
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8637
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8661
8638
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8662
8639
return;
8663
8640
}
@@ -8722,7 +8699,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8722
8699
fixupOrderingIndices(CurrentOrder);
8723
8700
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8724
8701
ReuseShuffleIndices, CurrentOrder);
8725
- TE->setOperandsInOrder( );
8702
+ TE->setOperand(VL, *this );
8726
8703
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
8727
8704
if (Consecutive)
8728
8705
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8738,46 +8715,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8738
8715
8739
8716
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8740
8717
ReuseShuffleIndices);
8741
- // Sort operands of the instructions so that each side is more likely to
8742
- // have the same opcode.
8743
- if (isCommutative(VL0)) {
8744
- ValueList Left, Right;
8745
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8746
- TE->setOperand(0, Left);
8747
- TE->setOperand(1, Right);
8748
- SmallVector<ValueList> Operands;
8749
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750
- Operands.emplace_back();
8751
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8752
- continue;
8753
- for (Value *V : VL) {
8754
- auto *CI2 = cast<CallInst>(V);
8755
- Operands.back().push_back(CI2->getArgOperand(I));
8756
- }
8757
- TE->setOperand(I, Operands.back());
8758
- }
8759
- buildTree_rec(Left, Depth + 1, {TE, 0});
8760
- buildTree_rec(Right, Depth + 1, {TE, 1});
8761
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8762
- if (Operands[I - 2].empty())
8763
- continue;
8764
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8765
- }
8766
- return;
8767
- }
8768
- TE->setOperandsInOrder();
8769
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8718
+ TE->setOperand(VL, *this, isCommutative(VL0));
8719
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
8770
8720
// For scalar operands no need to create an entry since no need to
8771
8721
// vectorize it.
8772
8722
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8773
8723
continue;
8774
- ValueList Operands;
8775
- // Prepare the operand vector.
8776
- for (Value *V : VL) {
8777
- auto *CI2 = cast<CallInst>(V);
8778
- Operands.push_back(CI2->getArgOperand(I));
8779
- }
8780
- buildTree_rec(Operands, Depth + 1, {TE, I});
8724
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8781
8725
}
8782
8726
return;
8783
8727
}
@@ -8788,43 +8732,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8788
8732
8789
8733
// Reorder operands if reordering would enable vectorization.
8790
8734
auto *CI = dyn_cast<CmpInst>(VL0);
8791
- if (isa<BinaryOperator>(VL0) || CI) {
8735
+ if (CI && any_of(VL, [](Value *V) {
8736
+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8737
+ })) {
8738
+ auto *MainCI = cast<CmpInst>(S.getMainOp());
8739
+ auto *AltCI = cast<CmpInst>(S.getAltOp());
8740
+ CmpInst::Predicate MainP = MainCI->getPredicate();
8741
+ CmpInst::Predicate AltP = AltCI->getPredicate();
8742
+ assert(MainP != AltP &&
8743
+ "Expected different main/alternate predicates.");
8792
8744
ValueList Left, Right;
8793
- if (!CI || all_of(VL, [](Value *V) {
8794
- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8795
- })) {
8796
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8797
- } else {
8798
- auto *MainCI = cast<CmpInst>(S.getMainOp());
8799
- auto *AltCI = cast<CmpInst>(S.getAltOp());
8800
- CmpInst::Predicate MainP = MainCI->getPredicate();
8801
- CmpInst::Predicate AltP = AltCI->getPredicate();
8802
- assert(MainP != AltP &&
8803
- "Expected different main/alternate predicates.");
8804
- // Collect operands - commute if it uses the swapped predicate or
8805
- // alternate operation.
8806
- for (Value *V : VL) {
8807
- if (isa<PoisonValue>(V)) {
8808
- Left.push_back(
8809
- PoisonValue::get(MainCI->getOperand(0)->getType()));
8810
- Right.push_back(
8811
- PoisonValue::get(MainCI->getOperand(1)->getType()));
8812
- continue;
8813
- }
8814
- auto *Cmp = cast<CmpInst>(V);
8815
- Value *LHS = Cmp->getOperand(0);
8816
- Value *RHS = Cmp->getOperand(1);
8745
+ // Collect operands - commute if it uses the swapped predicate or
8746
+ // alternate operation.
8747
+ for (Value *V : VL) {
8748
+ if (isa<PoisonValue>(V)) {
8749
+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8750
+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8751
+ continue;
8752
+ }
8753
+ auto *Cmp = cast<CmpInst>(V);
8754
+ Value *LHS = Cmp->getOperand(0);
8755
+ Value *RHS = Cmp->getOperand(1);
8817
8756
8818
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8819
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8820
- std::swap(LHS, RHS);
8821
- } else {
8822
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8823
- std::swap(LHS, RHS);
8824
- }
8825
- Left.push_back(LHS);
8826
- Right.push_back(RHS);
8757
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8758
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8759
+ std::swap(LHS, RHS);
8760
+ } else {
8761
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8762
+ std::swap(LHS, RHS);
8827
8763
}
8764
+ Left.push_back(LHS);
8765
+ Right.push_back(RHS);
8828
8766
}
8829
8767
TE->setOperand(0, Left);
8830
8768
TE->setOperand(1, Right);
@@ -8833,8 +8771,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8833
8771
return;
8834
8772
}
8835
8773
8836
- TE->setOperandsInOrder( );
8837
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8774
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8775
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8838
8776
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8839
8777
return;
8840
8778
}
@@ -13539,21 +13477,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
13539
13477
return Cost;
13540
13478
}
13541
13479
13542
- // Perform operand reordering on the instructions in VL and return the reordered
13543
- // operands in Left and Right.
13544
- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13545
- SmallVectorImpl<Value *> &Left,
13546
- SmallVectorImpl<Value *> &Right,
13547
- const BoUpSLP &R) {
13548
- if (VL.empty())
13549
- return;
13550
- VLOperands Ops(VL, R);
13551
- // Reorder the operands in place.
13552
- Ops.reorder();
13553
- Left = Ops.getVL(0);
13554
- Right = Ops.getVL(1);
13555
- }
13556
-
13557
13480
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
13558
13481
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
13559
13482
if (Res)
0 commit comments