@@ -3913,6 +3913,14 @@ class BoUpSLP {
3913
3913
bool areAltOperandsProfitable(const InstructionsState &S,
3914
3914
ArrayRef<Value *> VL) const;
3915
3915
3916
+ /// Checks if the specified list of the instructions/values can be vectorized
3917
+ /// in general.
3918
+ bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
3919
+ const EdgeInfo &UserTreeIdx,
3920
+ InstructionsState &S,
3921
+ bool &TryToFindDuplicates,
3922
+ bool &TrySplitVectorize) const;
3923
+
3916
3924
/// Checks if the specified list of the instructions/values can be vectorized
3917
3925
/// and fills required data before actual scheduling of the instructions.
3918
3926
TreeEntry::EntryState
@@ -9329,35 +9337,25 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
9329
9337
return true;
9330
9338
}
9331
9339
9332
- void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9333
- const EdgeInfo &UserTreeIdx,
9334
- unsigned InterleaveFactor) {
9340
+ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
9341
+ const EdgeInfo &UserTreeIdx,
9342
+ InstructionsState &S,
9343
+ bool &TryToFindDuplicates,
9344
+ bool &TrySplitVectorize) const {
9335
9345
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9336
9346
9337
- SmallVector<int> ReuseShuffleIndices;
9338
- SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9339
- auto TryToFindDuplicates = [&](const InstructionsState &S,
9340
- bool DoNotFail = false) {
9341
- if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9342
- S, UserTreeIdx, DoNotFail)) {
9343
- VL = NonUniqueValueVL;
9344
- return true;
9345
- }
9346
- auto Invalid = ScheduleBundle::invalid();
9347
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9348
- return false;
9349
- };
9350
-
9351
- InstructionsState S = getSameOpcode(VL, *TLI);
9347
+ S = getSameOpcode(VL, *TLI);
9348
+ TryToFindDuplicates = true;
9349
+ TrySplitVectorize = false;
9352
9350
9353
9351
// Don't go into catchswitch blocks, which can happen with PHIs.
9354
9352
// Such blocks can only have PHIs and the catchswitch. There is no
9355
9353
// place to insert a shuffle if we need to, so just avoid that issue.
9356
9354
if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
9357
9355
LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
9358
- auto Invalid = ScheduleBundle::invalid();
9359
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9360
- return;
9356
+ // Do not try to pack to avoid extra instructions here.
9357
+ TryToFindDuplicates = false ;
9358
+ return false ;
9361
9359
}
9362
9360
9363
9361
// Check if this is a duplicate of another entry.
@@ -9367,24 +9365,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9367
9365
if (E->isSame(VL)) {
9368
9366
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
9369
9367
<< ".\n");
9370
- if (TryToFindDuplicates(S)) {
9371
- auto Invalid = ScheduleBundle::invalid();
9372
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9373
- ReuseShuffleIndices);
9374
- }
9375
- return;
9368
+ return false;
9376
9369
}
9377
9370
SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
9378
9371
if (all_of(VL, [&](Value *V) {
9379
9372
return isa<PoisonValue>(V) || Values.contains(V);
9380
9373
})) {
9381
9374
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
9382
- if (TryToFindDuplicates(S)) {
9383
- auto Invalid = ScheduleBundle::invalid();
9384
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9385
- ReuseShuffleIndices);
9386
- }
9387
- return;
9375
+ return false;
9388
9376
}
9389
9377
}
9390
9378
}
@@ -9401,75 +9389,31 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9401
9389
cast<Instruction>(I)->getOpcode() == S.getOpcode();
9402
9390
})))) {
9403
9391
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
9404
- if (TryToFindDuplicates(S)) {
9405
- auto Invalid = ScheduleBundle::invalid();
9406
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9407
- ReuseShuffleIndices);
9408
- }
9409
- return;
9392
+ return false;
9410
9393
}
9411
9394
9412
9395
// Don't handle scalable vectors
9413
9396
if (S && S.getOpcode() == Instruction::ExtractElement &&
9414
9397
isa<ScalableVectorType>(
9415
9398
cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
9416
9399
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
9417
- if (TryToFindDuplicates(S)) {
9418
- auto Invalid = ScheduleBundle::invalid();
9419
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9420
- ReuseShuffleIndices);
9421
- }
9422
- return;
9400
+ return false;
9423
9401
}
9424
9402
9425
9403
// Don't handle vectors.
9426
9404
if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
9427
9405
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
9428
- auto Invalid = ScheduleBundle::invalid();
9429
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9430
- return;
9406
+ // Do not try to pack to avoid extra instructions here.
9407
+ TryToFindDuplicates = false ;
9408
+ return false ;
9431
9409
}
9432
9410
9433
- // Tries to build split node.
9434
- auto TrySplitNode = [&](const InstructionsState &LocalState) {
9435
- SmallVector<Value *> Op1, Op2;
9436
- OrdersType ReorderIndices;
9437
- if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9438
- return false;
9439
-
9440
- SmallVector<Value *> NewVL(VL.size());
9441
- copy(Op1, NewVL.begin());
9442
- copy(Op2, std::next(NewVL.begin(), Op1.size()));
9443
- auto Invalid = ScheduleBundle::invalid();
9444
- auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9445
- UserTreeIdx, {}, ReorderIndices);
9446
- LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9447
- auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9448
- InstructionsState S = getSameOpcode(Op, *TLI);
9449
- if (S && (isa<LoadInst>(S.getMainOp()) ||
9450
- getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9451
- // Build gather node for loads, they will be gathered later.
9452
- TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9453
- Idx == 0 ? 0 : Op1.size());
9454
- (void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9455
- } else {
9456
- TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9457
- Idx == 0 ? 0 : Op1.size());
9458
- buildTree_rec(Op, Depth, {TE, Idx});
9459
- }
9460
- };
9461
- AddNode(Op1, 0);
9462
- AddNode(Op2, 1);
9463
- return true;
9464
- };
9465
-
9466
9411
// If all of the operands are identical or constant we have a simple solution.
9467
9412
// If we deal with insert/extract instructions, they all must have constant
9468
9413
// indices, otherwise we should gather them, not try to vectorize.
9469
9414
// If alternate op node with 2 elements with gathered operands - do not
9470
9415
// vectorize.
9471
- auto &&NotProfitableForVectorization = [&S, this,
9472
- Depth](ArrayRef<Value *> VL) {
9416
+ auto NotProfitableForVectorization = [&S, this, Depth](ArrayRef<Value *> VL) {
9473
9417
if (!S || !S.isAltShuffle() || VL.size() > 2)
9474
9418
return false;
9475
9419
if (VectorizableTree.size() < MinTreeSize)
@@ -9549,18 +9493,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9549
9493
!all_of(VL, isVectorLikeInstWithConstOps)) ||
9550
9494
NotProfitableForVectorization(VL)) {
9551
9495
if (!S) {
9552
- auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9553
- // Last chance to try to vectorize alternate node.
9554
- if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9555
- return;
9496
+ LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
9497
+ "C,S,B,O, small shuffle. \n");
9498
+ TrySplitVectorize = true;
9499
+ return false ;
9556
9500
}
9557
9501
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
9558
- if (TryToFindDuplicates(S)) {
9559
- auto Invalid = ScheduleBundle::invalid();
9560
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9561
- ReuseShuffleIndices);
9562
- }
9563
- return;
9502
+ return false;
9564
9503
}
9565
9504
9566
9505
// Don't vectorize ephemeral values.
@@ -9569,9 +9508,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9569
9508
if (EphValues.count(V)) {
9570
9509
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
9571
9510
<< ") is ephemeral.\n");
9572
- auto Invalid = ScheduleBundle::invalid();
9573
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx) ;
9574
- return;
9511
+ // Do not try to pack to avoid extra instructions here.
9512
+ TryToFindDuplicates = false ;
9513
+ return false ;
9575
9514
}
9576
9515
}
9577
9516
}
@@ -9620,12 +9559,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9620
9559
if (PreferScalarize) {
9621
9560
LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
9622
9561
"node is not profitable.\n");
9623
- if (TryToFindDuplicates(S)) {
9624
- auto Invalid = ScheduleBundle::invalid();
9625
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9626
- ReuseShuffleIndices);
9627
- }
9628
- return;
9562
+ return false;
9629
9563
}
9630
9564
}
9631
9565
@@ -9634,12 +9568,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9634
9568
for (Value *V : VL) {
9635
9569
if (UserIgnoreList->contains(V)) {
9636
9570
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
9637
- if (TryToFindDuplicates(S)) {
9638
- auto Invalid = ScheduleBundle::invalid();
9639
- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9640
- ReuseShuffleIndices);
9641
- }
9642
- return;
9571
+ return false;
9643
9572
}
9644
9573
}
9645
9574
}
@@ -9669,8 +9598,79 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9669
9598
// Do not vectorize EH and non-returning blocks, not profitable in most
9670
9599
// cases.
9671
9600
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
9601
+ return false;
9602
+ }
9603
+ return true;
9604
+ }
9605
+
9606
+ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9607
+ const EdgeInfo &UserTreeIdx,
9608
+ unsigned InterleaveFactor) {
9609
+ assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
9610
+
9611
+ SmallVector<int> ReuseShuffleIndices;
9612
+ SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
9613
+ auto TryToFindDuplicates = [&](const InstructionsState &S,
9614
+ bool DoNotFail = false) {
9615
+ if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9616
+ S, UserTreeIdx, DoNotFail)) {
9617
+ VL = NonUniqueValueVL;
9618
+ return true;
9619
+ }
9672
9620
auto Invalid = ScheduleBundle::invalid();
9673
9621
newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9622
+ return false;
9623
+ };
9624
+
9625
+ InstructionsState S = InstructionsState::invalid();
9626
+ // Tries to build split node.
9627
+ auto TrySplitNode = [&](const InstructionsState &LocalState) {
9628
+ SmallVector<Value *> Op1, Op2;
9629
+ OrdersType ReorderIndices;
9630
+ if (!canBuildSplitNode(VL, LocalState, Op1, Op2, ReorderIndices))
9631
+ return false;
9632
+
9633
+ SmallVector<Value *> NewVL(VL.size());
9634
+ copy(Op1, NewVL.begin());
9635
+ copy(Op2, std::next(NewVL.begin(), Op1.size()));
9636
+ auto Invalid = ScheduleBundle::invalid();
9637
+ auto *TE = newTreeEntry(VL, TreeEntry::SplitVectorize, Invalid, LocalState,
9638
+ UserTreeIdx, {}, ReorderIndices);
9639
+ LLVM_DEBUG(dbgs() << "SLP: split alternate node.\n"; TE->dump());
9640
+ auto AddNode = [&](ArrayRef<Value *> Op, unsigned Idx) {
9641
+ InstructionsState S = getSameOpcode(Op, *TLI);
9642
+ if (S && (isa<LoadInst>(S.getMainOp()) ||
9643
+ getSameValuesTreeEntry(S.getMainOp(), Op, /*SameVF=*/true))) {
9644
+ // Build gather node for loads, they will be gathered later.
9645
+ TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9646
+ Idx == 0 ? 0 : Op1.size());
9647
+ (void)newTreeEntry(Op, TreeEntry::NeedToGather, Invalid, S, {TE, Idx});
9648
+ } else {
9649
+ TE->CombinedEntriesWithIndices.emplace_back(VectorizableTree.size(),
9650
+ Idx == 0 ? 0 : Op1.size());
9651
+ buildTree_rec(Op, Depth, {TE, Idx});
9652
+ }
9653
+ };
9654
+ AddNode(Op1, 0);
9655
+ AddNode(Op2, 1);
9656
+ return true;
9657
+ };
9658
+
9659
+ bool TryToPackDuplicates;
9660
+ bool TrySplitVectorize;
9661
+ if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
9662
+ TrySplitVectorize)) {
9663
+ if (TrySplitVectorize) {
9664
+ auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
9665
+ // Last chance to try to vectorize alternate node.
9666
+ if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
9667
+ return;
9668
+ }
9669
+ if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
9670
+ auto Invalid = ScheduleBundle::invalid();
9671
+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
9672
+ ReuseShuffleIndices);
9673
+ }
9674
9674
return;
9675
9675
}
9676
9676
@@ -9683,6 +9683,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9683
9683
return;
9684
9684
9685
9685
// Perform specific checks for each particular instruction kind.
9686
+ bool IsScatterVectorizeUserTE =
9687
+ UserTreeIdx.UserTE &&
9688
+ UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
9686
9689
OrdersType CurrentOrder;
9687
9690
SmallVector<Value *> PointerOps;
9688
9691
TreeEntry::EntryState State = getScalarsVectorizationState(
@@ -9694,6 +9697,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
9694
9697
return;
9695
9698
}
9696
9699
9700
+ Instruction *VL0 = S.getMainOp();
9701
+ BasicBlock *BB = VL0->getParent();
9697
9702
auto &BSRef = BlocksSchedules[BB];
9698
9703
if (!BSRef)
9699
9704
BSRef = std::make_unique<BlockScheduling>(BB);
0 commit comments