@@ -1707,6 +1707,18 @@ class BoUpSLP {
1707
1707
return true ;
1708
1708
}
1709
1709
1710
+ // / When ReuseShuffleIndices is empty it just returns position of \p V
1711
+ // / within vector of Scalars. Otherwise, try to remap on its reuse index.
1712
+ int findLaneForValue (Value *V) const {
1713
+ unsigned FoundLane = std::distance (Scalars.begin (), find (Scalars, V));
1714
+ assert (FoundLane < Scalars.size () && " Couldn't find extract lane" );
1715
+ if (!ReuseShuffleIndices.empty ()) {
1716
+ FoundLane = std::distance (ReuseShuffleIndices.begin (),
1717
+ find (ReuseShuffleIndices, FoundLane));
1718
+ }
1719
+ return FoundLane;
1720
+ }
1721
+
1710
1722
#ifndef NDEBUG
1711
1723
// / Debug printer.
1712
1724
LLVM_DUMP_METHOD void dump () const {
@@ -4268,13 +4280,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
4268
4280
// Add to our 'need-to-extract' list.
4269
4281
if (TreeEntry *Entry = getTreeEntry (Val)) {
4270
4282
// Find which lane we need to extract.
4271
- unsigned FoundLane = std::distance (Entry->Scalars .begin (),
4272
- find (Entry->Scalars , Val));
4273
- assert (FoundLane < Entry->Scalars .size () && " Couldn't find extract lane" );
4274
- if (!Entry->ReuseShuffleIndices .empty ()) {
4275
- FoundLane = std::distance (Entry->ReuseShuffleIndices .begin (),
4276
- find (Entry->ReuseShuffleIndices , FoundLane));
4277
- }
4283
+ unsigned FoundLane = Entry->findLaneForValue (Val);
4278
4284
ExternalUses.push_back (ExternalUser (Val, InsElt, FoundLane));
4279
4285
}
4280
4286
}
@@ -4602,8 +4608,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
4602
4608
// The pointer operand uses an in-tree scalar so we add the new BitCast
4603
4609
// to ExternalUses list to make sure that an extract will be generated
4604
4610
// in the future.
4605
- if (getTreeEntry (PO))
4606
- ExternalUses.emplace_back (PO, cast<User>(VecPtr), 0 );
4611
+ if (TreeEntry *Entry = getTreeEntry (PO)) {
4612
+ // Find which lane we need to extract.
4613
+ unsigned FoundLane = Entry->findLaneForValue (PO);
4614
+ ExternalUses.emplace_back (PO, cast<User>(VecPtr), FoundLane);
4615
+ }
4607
4616
4608
4617
NewLI = Builder.CreateAlignedLoad (VecTy, VecPtr, LI->getAlign ());
4609
4618
} else {
@@ -4654,8 +4663,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
4654
4663
// The pointer operand uses an in-tree scalar, so add the new BitCast to
4655
4664
// ExternalUses to make sure that an extract will be generated in the
4656
4665
// future.
4657
- if (getTreeEntry (ScalarPtr))
4658
- ExternalUses.push_back (ExternalUser (ScalarPtr, cast<User>(VecPtr), 0 ));
4666
+ if (TreeEntry *Entry = getTreeEntry (ScalarPtr)) {
4667
+ // Find which lane we need to extract.
4668
+ unsigned FoundLane = Entry->findLaneForValue (ScalarPtr);
4669
+ ExternalUses.push_back (
4670
+ ExternalUser (ScalarPtr, cast<User>(VecPtr), FoundLane));
4671
+ }
4659
4672
4660
4673
Value *V = propagateMetadata (ST, E->Scalars );
4661
4674
if (NeedToShuffleReuses)
@@ -4756,8 +4769,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
4756
4769
// The scalar argument uses an in-tree scalar so we add the new vectorized
4757
4770
// call to ExternalUses list to make sure that an extract will be
4758
4771
// generated in the future.
4759
- if (ScalarArg && getTreeEntry (ScalarArg))
4760
- ExternalUses.push_back (ExternalUser (ScalarArg, cast<User>(V), 0 ));
4772
+ if (ScalarArg) {
4773
+ if (TreeEntry *Entry = getTreeEntry (ScalarArg)) {
4774
+ // Find which lane we need to extract.
4775
+ unsigned FoundLane = Entry->findLaneForValue (ScalarArg);
4776
+ ExternalUses.push_back (
4777
+ ExternalUser (ScalarArg, cast<User>(V), FoundLane));
4778
+ }
4779
+ }
4761
4780
4762
4781
propagateIRFlags (V, E->Scalars , VL0);
4763
4782
if (NeedToShuffleReuses)
0 commit comments