@@ -1949,6 +1949,9 @@ class BoUpSLP {
1949
1949
1950
1950
/// A vector of operand vectors.
1951
1951
SmallVector<OperandDataVec, 4> OpsVec;
1952
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
1953
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
1954
+ unsigned ArgSize;
1952
1955
1953
1956
const TargetLibraryInfo &TLI;
1954
1957
const DataLayout &DL;
@@ -2337,10 +2340,11 @@ class BoUpSLP {
2337
2340
assert((empty() || VL.size() == getNumLanes()) &&
2338
2341
"Expected same number of lanes");
2339
2342
assert(isa<Instruction>(VL[0]) && "Expected instruction");
2343
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
2344
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2345
+ // arguments to the intrinsic produces the same result.
2340
2346
constexpr unsigned IntrinsicNumOperands = 2;
2341
- unsigned NumOperands = isa<IntrinsicInst>(VL[0])
2342
- ? IntrinsicNumOperands
2343
- : cast<Instruction>(VL[0])->getNumOperands();
2347
+ ArgSize = isa<IntrinsicInst>(VL[0]) ? IntrinsicNumOperands : NumOperands;
2344
2348
OpsVec.resize(NumOperands);
2345
2349
unsigned NumLanes = VL.size();
2346
2350
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2366,7 +2370,7 @@ class BoUpSLP {
2366
2370
}
2367
2371
2368
2372
/// \returns the number of operands.
2369
- unsigned getNumOperands() const { return OpsVec.size() ; }
2373
+ unsigned getNumOperands() const { return ArgSize ; }
2370
2374
2371
2375
/// \returns the number of lanes.
2372
2376
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2542,7 +2546,8 @@ class BoUpSLP {
2542
2546
ArrayRef<OperandData> Op0 = OpsVec.front();
2543
2547
for (const OperandData &Data : Op0)
2544
2548
UniqueValues.insert(Data.V);
2545
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2549
+ for (ArrayRef<OperandData> Op : make_range(
2550
+ OpsVec.begin() + 1, OpsVec.begin() + getNumOperands())) {
2546
2551
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
2547
2552
return !UniqueValues.contains(Data.V);
2548
2553
}))
@@ -3064,13 +3069,6 @@ class BoUpSLP {
3064
3069
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
3065
3070
8> &GatheredLoads);
3066
3071
3067
- /// Reorder commutative or alt operands to get better probability of
3068
- /// generating vectorized code.
3069
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3070
- SmallVectorImpl<Value *> &Left,
3071
- SmallVectorImpl<Value *> &Right,
3072
- const BoUpSLP &R);
3073
-
3074
3072
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
3075
3073
/// users of \p TE and collects the stores. It returns the map from the store
3076
3074
/// pointers to the collected stores.
@@ -3265,22 +3263,10 @@ class BoUpSLP {
3265
3263
copy(OpVL, Operands[OpIdx].begin());
3266
3264
}
3267
3265
3268
- /// Set the operands of this bundle in their original order.
3269
- void setOperandsInOrder() {
3270
- assert(Operands.empty() && "Already initialized?");
3271
- auto *I0 = cast<Instruction>(Scalars[0]);
3272
- Operands.resize(I0->getNumOperands());
3273
- unsigned NumLanes = Scalars.size();
3274
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3275
- OpIdx != NumOperands; ++OpIdx) {
3276
- Operands[OpIdx].resize(NumLanes);
3277
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3278
- auto *I = cast<Instruction>(Scalars[Lane]);
3279
- assert(I->getNumOperands() == NumOperands &&
3280
- "Expected same number of operands");
3281
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3282
- }
3283
- }
3266
+ /// Set this bundle's operand from \p Ops.
3267
+ void setOperand(const VLOperands &Ops, unsigned NumOperands) {
3268
+ for (unsigned I : seq(NumOperands))
3269
+ setOperand(I, Ops.getVL(I));
3284
3270
}
3285
3271
3286
3272
/// Reorders operands of the node to the given mask \p Mask.
@@ -8329,7 +8315,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8329
8315
{}, CurrentOrder);
8330
8316
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
8331
8317
8332
- TE->setOperandsInOrder();
8318
+ VLOperands Ops(VL, *this);
8319
+ TE->setOperand(Ops, VL0->getNumOperands());
8333
8320
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
8334
8321
return;
8335
8322
}
@@ -8350,27 +8337,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8350
8337
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
8351
8338
else
8352
8339
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8353
- TE->setOperandsInOrder();
8354
8340
break;
8355
8341
case TreeEntry::StridedVectorize:
8356
8342
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8357
8343
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
8358
8344
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8359
- TE->setOperandsInOrder();
8360
8345
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
8361
8346
break;
8362
8347
case TreeEntry::ScatterVectorize:
8363
8348
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8364
8349
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
8365
8350
UserTreeIdx, ReuseShuffleIndices);
8366
- TE->setOperandsInOrder();
8367
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8368
8351
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
8369
8352
break;
8370
8353
case TreeEntry::CombinedVectorize:
8371
8354
case TreeEntry::NeedToGather:
8372
8355
llvm_unreachable("Unexpected loads state.");
8373
8356
}
8357
+ VLOperands Ops(VL, *this);
8358
+ TE->setOperand(Ops, VL0->getNumOperands());
8359
+ if (State == TreeEntry::ScatterVectorize)
8360
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8374
8361
return;
8375
8362
}
8376
8363
case Instruction::ZExt:
@@ -8408,8 +8395,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8408
8395
ReuseShuffleIndices);
8409
8396
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
8410
8397
8411
- TE->setOperandsInOrder();
8412
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8398
+ VLOperands Ops(VL, *this);
8399
+ TE->setOperand(Ops, VL0->getNumOperands());
8400
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8413
8401
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8414
8402
if (ShuffleOrOp == Instruction::Trunc) {
8415
8403
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8436,12 +8424,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8436
8424
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
8437
8425
8438
8426
ValueList Left, Right;
8427
+ VLOperands Ops(VL, *this);
8439
8428
if (cast<CmpInst>(VL0)->isCommutative()) {
8440
8429
// Commutative predicate - collect + sort operands of the instructions
8441
8430
// so that each side is more likely to have the same opcode.
8442
8431
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
8443
8432
"Commutative Predicate mismatch");
8444
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8433
+ Ops.reorder();
8434
+ Left = Ops.getVL(0);
8435
+ Right = Ops.getVL(1);
8445
8436
} else {
8446
8437
// Collect operands - commute if it uses the swapped predicate.
8447
8438
for (Value *V : VL) {
@@ -8497,20 +8488,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8497
8488
ReuseShuffleIndices);
8498
8489
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
8499
8490
8491
+ VLOperands Ops(VL, *this);
8500
8492
// Sort operands of the instructions so that each side is more likely to
8501
8493
// have the same opcode.
8502
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8503
- ValueList Left, Right;
8504
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8505
- TE->setOperand(0, Left);
8506
- TE->setOperand(1, Right);
8507
- buildTree_rec(Left, Depth + 1, {TE, 0});
8508
- buildTree_rec(Right, Depth + 1, {TE, 1});
8509
- return;
8510
- }
8511
-
8512
- TE->setOperandsInOrder();
8513
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8494
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0))
8495
+ Ops.reorder();
8496
+ TE->setOperand(Ops, VL0->getNumOperands());
8497
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8514
8498
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8515
8499
return;
8516
8500
}
@@ -8575,7 +8559,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8575
8559
fixupOrderingIndices(CurrentOrder);
8576
8560
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8577
8561
ReuseShuffleIndices, CurrentOrder);
8578
- TE->setOperandsInOrder();
8562
+ VLOperands Ops(VL, *this);
8563
+ TE->setOperand(Ops, VL0->getNumOperands());
8579
8564
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
8580
8565
if (Consecutive)
8581
8566
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8591,46 +8576,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8591
8576
8592
8577
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8593
8578
ReuseShuffleIndices);
8579
+ VLOperands Ops(VL, *this);
8594
8580
// Sort operands of the instructions so that each side is more likely to
8595
8581
// have the same opcode.
8596
- if (isCommutative(VL0)) {
8597
- ValueList Left, Right;
8598
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8599
- TE->setOperand(0, Left);
8600
- TE->setOperand(1, Right);
8601
- SmallVector<ValueList> Operands;
8602
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8603
- Operands.emplace_back();
8604
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8605
- continue;
8606
- for (Value *V : VL) {
8607
- auto *CI2 = cast<CallInst>(V);
8608
- Operands.back().push_back(CI2->getArgOperand(I));
8609
- }
8610
- TE->setOperand(I, Operands.back());
8611
- }
8612
- buildTree_rec(Left, Depth + 1, {TE, 0});
8613
- buildTree_rec(Right, Depth + 1, {TE, 1});
8614
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8615
- if (Operands[I - 2].empty())
8616
- continue;
8617
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8618
- }
8619
- return;
8620
- }
8621
- TE->setOperandsInOrder();
8622
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8582
+ if (isCommutative(VL0))
8583
+ Ops.reorder();
8584
+ TE->setOperand(Ops, VL0->getNumOperands());
8585
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
8623
8586
// For scalar operands no need to create an entry since no need to
8624
8587
// vectorize it.
8625
8588
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8626
8589
continue;
8627
- ValueList Operands;
8628
- // Prepare the operand vector.
8629
- for (Value *V : VL) {
8630
- auto *CI2 = cast<CallInst>(V);
8631
- Operands.push_back(CI2->getArgOperand(I));
8632
- }
8633
- buildTree_rec(Operands, Depth + 1, {TE, I});
8590
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8634
8591
}
8635
8592
return;
8636
8593
}
@@ -8639,21 +8596,22 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8639
8596
ReuseShuffleIndices);
8640
8597
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
8641
8598
8599
+ VLOperands Ops(VL, *this);
8642
8600
// Reorder operands if reordering would enable vectorization.
8643
8601
auto *CI = dyn_cast<CmpInst>(VL0);
8644
8602
if (isa<BinaryOperator>(VL0) || CI) {
8645
- ValueList Left, Right;
8646
8603
if (!CI || all_of(VL, [](Value *V) {
8647
8604
return cast<CmpInst>(V)->isCommutative();
8648
8605
})) {
8649
- reorderInputsAccordingToOpcode(VL, Left, Right, *this );
8606
+ Ops.reorder( );
8650
8607
} else {
8651
8608
auto *MainCI = cast<CmpInst>(S.MainOp);
8652
8609
auto *AltCI = cast<CmpInst>(S.AltOp);
8653
8610
CmpInst::Predicate MainP = MainCI->getPredicate();
8654
8611
CmpInst::Predicate AltP = AltCI->getPredicate();
8655
8612
assert(MainP != AltP &&
8656
8613
"Expected different main/alternate predicates.");
8614
+ ValueList Left, Right;
8657
8615
// Collect operands - commute if it uses the swapped predicate or
8658
8616
// alternate operation.
8659
8617
for (Value *V : VL) {
@@ -8671,16 +8629,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8671
8629
Left.push_back(LHS);
8672
8630
Right.push_back(RHS);
8673
8631
}
8632
+ TE->setOperand(0, Left);
8633
+ TE->setOperand(1, Right);
8634
+ buildTree_rec(Left, Depth + 1, {TE, 0});
8635
+ buildTree_rec(Right, Depth + 1, {TE, 1});
8636
+ return;
8674
8637
}
8675
- TE->setOperand(0, Left);
8676
- TE->setOperand(1, Right);
8677
- buildTree_rec(Left, Depth + 1, {TE, 0});
8678
- buildTree_rec(Right, Depth + 1, {TE, 1});
8679
- return;
8680
8638
}
8681
8639
8682
- TE->setOperandsInOrder( );
8683
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8640
+ TE->setOperand(Ops, VL0->getNumOperands() );
8641
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8684
8642
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8685
8643
return;
8686
8644
}
@@ -13300,21 +13258,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
13300
13258
return Cost;
13301
13259
}
13302
13260
13303
- // Perform operand reordering on the instructions in VL and return the reordered
13304
- // operands in Left and Right.
13305
- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13306
- SmallVectorImpl<Value *> &Left,
13307
- SmallVectorImpl<Value *> &Right,
13308
- const BoUpSLP &R) {
13309
- if (VL.empty())
13310
- return;
13311
- VLOperands Ops(VL, R);
13312
- // Reorder the operands in place.
13313
- Ops.reorder();
13314
- Left = Ops.getVL(0);
13315
- Right = Ops.getVL(1);
13316
- }
13317
-
13318
13261
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
13319
13262
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
13320
13263
if (Res)
0 commit comments