@@ -1326,6 +1326,9 @@ class BoUpSLP {
1326
1326
}
1327
1327
LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
1328
1328
#endif
1329
+ bool operator == (const EdgeInfo &Other) const {
1330
+ return UserTE == Other.UserTE && EdgeIdx == Other.EdgeIdx;
1331
+ }
1329
1332
};
1330
1333
1331
1334
/// A helper class used for scoring candidates for two consecutive lanes.
@@ -2412,12 +2415,25 @@ class BoUpSLP {
2412
2415
TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
2413
2416
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
2414
2417
TreeEntry *TE = nullptr;
2415
- const auto *It = find_if(VL, [this, &TE ](Value *V) {
2418
+ const auto *It = find_if(VL, [& ](Value *V) {
2416
2419
TE = getTreeEntry(V);
2417
- return TE;
2420
+ if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
2421
+ return true;
2422
+ auto It = MultiNodeScalars.find(V);
2423
+ if (It != MultiNodeScalars.end()) {
2424
+ for (TreeEntry *E : It->second) {
2425
+ if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
2426
+ TE = E;
2427
+ return true;
2428
+ }
2429
+ }
2430
+ }
2431
+ return false;
2418
2432
});
2419
- if (It != VL.end() && TE->isSame(VL))
2433
+ if (It != VL.end()) {
2434
+ assert(TE->isSame(VL) && "Expedted same scalars.");
2420
2435
return TE;
2436
+ }
2421
2437
return nullptr;
2422
2438
}
2423
2439
@@ -5806,18 +5822,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
5806
5822
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
5807
5823
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
5808
5824
if (!E->isSame(VL)) {
5809
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5810
- if (TryToFindDuplicates(S))
5811
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5812
- ReuseShuffleIndicies);
5825
+ if (!doesNotNeedToBeScheduled(S.OpValue)) {
5826
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5827
+ if (TryToFindDuplicates(S))
5828
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5829
+ ReuseShuffleIndicies);
5830
+ return;
5831
+ }
5832
+ } else {
5833
+ // Record the reuse of the tree node. FIXME, currently this is only used
5834
+ // to properly draw the graph rather than for the actual vectorization.
5835
+ E->UserTreeIndices.push_back(UserTreeIdx);
5836
+ LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5837
+ << ".\n");
5813
5838
return;
5814
5839
}
5815
- // Record the reuse of the tree node. FIXME, currently this is only used to
5816
- // properly draw the graph rather than for the actual vectorization.
5817
- E->UserTreeIndices.push_back(UserTreeIdx);
5818
- LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5819
- << ".\n");
5820
- return;
5821
5840
}
5822
5841
5823
5842
// Check that none of the instructions in the bundle are already in the tree.
0 commit comments