@@ -540,15 +540,20 @@ namespace {
540
540
541
541
/// This is a helper function for MergeConsecutiveStores. Stores
542
542
/// that potentially may be merged with St are placed in
543
- /// StoreNodes.
543
+ /// StoreNodes. RootNode is a chain predecessor to all store
544
+ /// candidates.
544
545
void getStoreMergeCandidates(StoreSDNode *St,
545
- SmallVectorImpl<MemOpLink> &StoreNodes);
546
+ SmallVectorImpl<MemOpLink> &StoreNodes,
547
+ SDNode *&Root);
546
548
547
549
/// Helper function for MergeConsecutiveStores. Checks if
548
550
/// candidate stores have indirect dependency through their
549
- /// operands. \return True if safe to merge.
551
+ /// operands. RootNode is the predecessor to all stores calculated
552
+ /// by getStoreMergeCandidates and is used to prune the dependency check.
553
+ /// \return True if safe to merge.
550
554
bool checkMergeStoreCandidatesForDependencies(
551
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
555
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
556
+ SDNode *RootNode);
552
557
553
558
/// Merge consecutive store operations into a wide store.
554
559
/// This optimization uses wide integers or vectors when possible.
@@ -13229,7 +13234,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13229
13234
}
13230
13235
13231
13236
void DAGCombiner::getStoreMergeCandidates(
13232
- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
13237
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
13238
+ SDNode *&RootNode) {
13233
13239
// This holds the base pointer, index, and the offset in bytes from the base
13234
13240
// pointer.
13235
13241
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
@@ -13328,7 +13334,7 @@ void DAGCombiner::getStoreMergeCandidates(
13328
13334
// FIXME: We should be able to climb and
13329
13335
// descend TokenFactors to find candidates as well.
13330
13336
13331
- SDNode * RootNode = ( St->getChain() ).getNode();
13337
+ RootNode = St->getChain().getNode();
13332
13338
13333
13339
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13334
13340
RootNode = Ldn->getChain().getNode();
@@ -13359,21 +13365,48 @@ void DAGCombiner::getStoreMergeCandidates(
13359
13365
// through the chain). Check in parallel by searching up from
13360
13366
// non-chain operands of candidates.
13361
13367
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13362
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
13368
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
13369
+ SDNode *RootNode) {
13363
13370
// FIXME: We should be able to truncate a full search of
13364
13371
// predecessors by doing a BFS and keeping tabs the originating
13365
13372
// stores from which worklist nodes come from in a similar way to
13366
13373
// TokenFactor simplfication.
13367
13374
13368
- SmallPtrSet<const SDNode *, 16 > Visited;
13375
+ SmallPtrSet<const SDNode *, 32 > Visited;
13369
13376
SmallVector<const SDNode *, 8> Worklist;
13370
- unsigned int Max = 1024;
13377
+
13378
+ // RootNode is a predecessor to all candidates so we need not search
13379
+ // past it. Add RootNode (peeking through TokenFactors). Do not count
13380
+ // these towards size check.
13381
+
13382
+ Worklist.push_back(RootNode);
13383
+ while (!Worklist.empty()) {
13384
+ auto N = Worklist.pop_back_val();
13385
+ if (N->getOpcode() == ISD::TokenFactor) {
13386
+ for (SDValue Op : N->ops())
13387
+ Worklist.push_back(Op.getNode());
13388
+ }
13389
+ Visited.insert(N);
13390
+ }
13391
+
13392
+ // Don't count pruning nodes towards max.
13393
+ unsigned int Max = 1024 + Visited.size();
13371
13394
// Search Ops of store candidates.
13372
13395
for (unsigned i = 0; i < NumStores; ++i) {
13373
- SDNode *n = StoreNodes[i].MemNode;
13374
- // Potential loops may happen only through non-chain operands
13375
- for (unsigned j = 1; j < n->getNumOperands(); ++j)
13376
- Worklist.push_back(n->getOperand(j).getNode());
13396
+ SDNode *N = StoreNodes[i].MemNode;
13397
+ // Of the 4 Store Operands:
13398
+ // * Chain (Op 0) -> We have already considered these
13399
+ // in candidate selection and can be
13400
+ // safely ignored
13401
+ // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
13402
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
13403
+ // and so no cycles are possible.
13404
+ // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
13405
+ //
13406
+ // Thus we need only check predecessors of the value operands.
13407
+ auto *Op = N->getOperand(1).getNode();
13408
+ if (Visited.insert(Op).second)
13409
+ Worklist.push_back(Op);
13377
13410
}
13378
13411
// Search through DAG. We can stop early if we find a store node.
13379
13412
for (unsigned i = 0; i < NumStores; ++i)
@@ -13417,8 +13450,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13417
13450
return false;
13418
13451
13419
13452
SmallVector<MemOpLink, 8> StoreNodes;
13453
+ SDNode *RootNode;
13420
13454
// Find potential store merge candidates by searching through chain sub-DAG
13421
- getStoreMergeCandidates(St, StoreNodes);
13455
+ getStoreMergeCandidates(St, StoreNodes, RootNode );
13422
13456
13423
13457
// Check if there is anything to merge.
13424
13458
if (StoreNodes.size() < 2)
@@ -13569,7 +13603,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13569
13603
}
13570
13604
13571
13605
// Check that we can merge these candidates without causing a cycle.
13572
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem)) {
13606
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
13607
+ RootNode)) {
13573
13608
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13574
13609
continue;
13575
13610
}
@@ -13633,8 +13668,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13633
13668
}
13634
13669
13635
13670
// Check that we can merge these candidates without causing a cycle.
13636
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
13637
- NumStoresToMerge)) {
13671
+ if (!checkMergeStoreCandidatesForDependencies(
13672
+ StoreNodes, NumStoresToMerge, RootNode )) {
13638
13673
StoreNodes.erase(StoreNodes.begin(),
13639
13674
StoreNodes.begin() + NumStoresToMerge);
13640
13675
continue;
@@ -13810,7 +13845,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
13810
13845
}
13811
13846
13812
13847
// Check that we can merge these candidates without causing a cycle.
13813
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem)) {
13848
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
13849
+ RootNode)) {
13814
13850
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
13815
13851
continue;
13816
13852
}
0 commit comments