Skip to content

Commit ac00726

Browse files
committed
Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization""
This reverts commit 3bb32c6. Use InsertionPt for DT to handle non-memory access dominators Differential Revision: https://reviews.llvm.org/D155406
1 parent a6986f6 commit ac00726

File tree

7 files changed

+199
-115
lines changed

7 files changed

+199
-115
lines changed

llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class MemMoveInst;
3434
class MemorySSA;
3535
class MemorySSAUpdater;
3636
class MemSetInst;
37+
class PostDominatorTree;
3738
class StoreInst;
3839
class TargetLibraryInfo;
3940
class Value;
@@ -43,6 +44,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
4344
AAResults *AA = nullptr;
4445
AssumptionCache *AC = nullptr;
4546
DominatorTree *DT = nullptr;
47+
PostDominatorTree *PDT = nullptr;
4648
MemorySSA *MSSA = nullptr;
4749
MemorySSAUpdater *MSSAU = nullptr;
4850

@@ -53,7 +55,8 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
5355

5456
// Glue for the old PM.
5557
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
56-
AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
58+
AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT,
59+
MemorySSA *MSSA);
5760

5861
private:
5962
// Helper functions

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 161 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
#include "llvm/ADT/iterator_range.h"
2020
#include "llvm/Analysis/AliasAnalysis.h"
2121
#include "llvm/Analysis/AssumptionCache.h"
22+
#include "llvm/Analysis/CFG.h"
2223
#include "llvm/Analysis/CaptureTracking.h"
2324
#include "llvm/Analysis/GlobalsModRef.h"
2425
#include "llvm/Analysis/Loads.h"
2526
#include "llvm/Analysis/MemoryLocation.h"
2627
#include "llvm/Analysis/MemorySSA.h"
2728
#include "llvm/Analysis/MemorySSAUpdater.h"
29+
#include "llvm/Analysis/PostDominators.h"
2830
#include "llvm/Analysis/TargetLibraryInfo.h"
2931
#include "llvm/Analysis/ValueTracking.h"
3032
#include "llvm/IR/BasicBlock.h"
@@ -1415,6 +1417,66 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14151417
return true;
14161418
}
14171419

1420+
using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
1421+
/// Find the nearest Instruction or BasicBlock that dominates both I1 and
1422+
/// I2.
1423+
static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2,
1424+
DominatorTree *DT) {
1425+
auto GetParent = [](InsertionPt I) {
1426+
if (auto *BB = dyn_cast<BasicBlock *>(I))
1427+
return BB;
1428+
return cast<Instruction *>(I)->getParent();
1429+
};
1430+
BasicBlock *BB1 = GetParent(I1);
1431+
BasicBlock *BB2 = GetParent(I2);
1432+
if (BB1 == BB2) {
1433+
// BasicBlock InsertionPt means the terminator.
1434+
if (isa<BasicBlock *>(I1))
1435+
return I2;
1436+
if (isa<BasicBlock *>(I2))
1437+
return I1;
1438+
return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I1
1439+
: I2;
1440+
}
1441+
BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2);
1442+
if (BB2 == DomBB)
1443+
return I2;
1444+
if (BB1 == DomBB)
1445+
return I1;
1446+
return DomBB;
1447+
}
1448+
1449+
/// Find the nearest Instruction or BasicBlock that post-dominates both I1 and
1450+
/// I2.
1451+
static InsertionPt findNearestCommonPostDominator(InsertionPt I1,
1452+
InsertionPt I2,
1453+
PostDominatorTree *PDT) {
1454+
auto GetParent = [](InsertionPt I) {
1455+
if (auto *BB = dyn_cast<BasicBlock *>(I))
1456+
return BB;
1457+
return cast<Instruction *>(I)->getParent();
1458+
};
1459+
BasicBlock *BB1 = GetParent(I1);
1460+
BasicBlock *BB2 = GetParent(I2);
1461+
if (BB1 == BB2) {
1462+
// BasicBlock InsertionPt means the first non-phi instruction.
1463+
if (isa<BasicBlock *>(I1))
1464+
return I2;
1465+
if (isa<BasicBlock *>(I2))
1466+
return I1;
1467+
return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I2
1468+
: I1;
1469+
}
1470+
BasicBlock *PDomBB = PDT->findNearestCommonDominator(BB1, BB2);
1471+
if (!PDomBB)
1472+
return nullptr;
1473+
if (BB2 == PDomBB)
1474+
return I2;
1475+
if (BB1 == PDomBB)
1476+
return I1;
1477+
return PDomBB;
1478+
}
1479+
14181480
// Attempts to optimize the pattern whereby memory is copied from an alloca to
14191481
// another alloca, where the two allocas don't have conflicting mod/ref. If
14201482
// successful, the two allocas can be merged into one and the transfer can be
@@ -1440,8 +1502,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
14401502
return false;
14411503
}
14421504

1443-
// 1. Check that copy is full. Calculate the static size of the allocas to be
1444-
// merged, bail out if we can't.
1505+
// Check that copy is full with static size.
14451506
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
14461507
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
14471508
if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
@@ -1455,19 +1516,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
14551516
return false;
14561517
}
14571518

1458-
// 2-1. Check that src and dest are static allocas, which are not affected by
1459-
// stacksave/stackrestore.
1460-
if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
1461-
SrcAlloca->getParent() != Load->getParent() ||
1462-
SrcAlloca->getParent() != Store->getParent())
1519+
if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
14631520
return false;
14641521

1465-
// 2-2. Check that src and dest are never captured, unescaped allocas. Also
1466-
// collect lifetime markers first/last users in order to shrink wrap the
1467-
// lifetimes, and instructions with noalias metadata to remove them.
1522+
// Check that src and dest are never captured, unescaped allocas. Also
1523+
// find the nearest common dominator and postdominator for all users in
1524+
// order to shrink wrap the lifetimes, and instructions with noalias metadata
1525+
// to remove them.
14681526

14691527
SmallVector<Instruction *, 4> LifetimeMarkers;
1470-
Instruction *FirstUser = nullptr, *LastUser = nullptr;
1528+
InsertionPt Dom = nullptr, PDom = nullptr;
14711529
SmallSet<Instruction *, 4> NoAliasInstrs;
14721530

14731531
// Recursively track the user and check whether modified alias exist.
@@ -1505,12 +1563,13 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15051563
continue;
15061564
case UseCaptureKind::NO_CAPTURE: {
15071565
auto *UI = cast<Instruction>(U.getUser());
1508-
if (DestAlloca->getParent() != UI->getParent())
1509-
return false;
1510-
if (!FirstUser || UI->comesBefore(FirstUser))
1511-
FirstUser = UI;
1512-
if (!LastUser || LastUser->comesBefore(UI))
1513-
LastUser = UI;
1566+
if (!Dom) {
1567+
PDom = Dom = UI;
1568+
} else {
1569+
Dom = findNearestCommonDominator(Dom, UI, DT);
1570+
if (PDom)
1571+
PDom = findNearestCommonPostDominator(PDom, UI, PDT);
1572+
}
15141573
if (UI->isLifetimeStartOrEnd()) {
15151574
// We note the locations of these intrinsic calls so that we can
15161575
// delete them later if the optimization succeeds, this is safe
@@ -1534,37 +1593,64 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15341593
return true;
15351594
};
15361595

1537-
// 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1538-
// from the alloca to the Store.
1596+
// Check that dest has no Mod/Ref, from the alloca to the Store, except full
1597+
// size lifetime intrinsics. And collect modref inst for the reachability
1598+
// check.
15391599
ModRefInfo DestModRef = ModRefInfo::NoModRef;
15401600
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1601+
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
15411602
auto DestModRefCallback = [&](Instruction *UI) -> bool {
15421603
// We don't care about the store itself.
15431604
if (UI == Store)
15441605
return true;
15451606
ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1546-
// FIXME: For multi-BB cases, we need to see reachability from it to
1547-
// store.
1548-
// Bailout if Dest may have any ModRef before Store.
1549-
if (UI->comesBefore(Store) && isModOrRefSet(Res))
1550-
return false;
1551-
DestModRef |= BAA.getModRefInfo(UI, DestLoc);
1607+
DestModRef |= Res;
1608+
if (isModOrRefSet(Res)) {
1609+
// Instructions reachability checks.
1610+
// FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1611+
// lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1612+
if (UI->getParent() == Store->getParent()) {
1613+
// The same block case is special because it's the only time we're
1614+
// looking within a single block to see which instruction comes first.
1615+
// Once we start looking at multiple blocks, the first instruction of
1616+
// the block is reachable, so we only need to determine reachability
1617+
// between whole blocks.
1618+
BasicBlock *BB = UI->getParent();
1619+
1620+
// If A comes before B, then B is definitively reachable from A.
1621+
if (UI->comesBefore(Store))
1622+
return false;
1623+
1624+
// If the user's parent block is entry, no predecessor exists.
1625+
if (BB->isEntryBlock())
1626+
return true;
15521627

1628+
// Otherwise, continue doing the normal per-BB CFG walk.
1629+
ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1630+
} else {
1631+
ReachabilityWorklist.push_back(UI->getParent());
1632+
}
1633+
}
15531634
return true;
15541635
};
15551636

15561637
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
15571638
return false;
1639+
// Bailout if Dest may have any ModRef before Store.
1640+
if (!ReachabilityWorklist.empty() &&
1641+
isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1642+
nullptr, DT, nullptr))
1643+
return false;
15581644

1559-
// 3. Check that, from after the Load to the end of the BB,
1560-
// 3-1. if the dest has any Mod, src has no Ref, and
1561-
// 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1645+
// Check that, from after the Load to the end of the BB,
1646+
// - if the dest has any Mod, src has no Ref, and
1647+
// - if the dest has any Ref, src has no Mod except full-sized lifetimes.
15621648
MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
15631649

15641650
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1565-
// Any ModRef before Load doesn't matter, also Load and Store can be
1566-
// ignored.
1567-
if (UI->comesBefore(Load) || UI == Load || UI == Store)
1651+
// Any ModRef post-dominated by Load doesn't matter, also Load and Store
1652+
// themselves can be ignored.
1653+
if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
15681654
return true;
15691655
ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
15701656
if ((isModSet(DestModRef) && isRefSet(Res)) ||
@@ -1596,22 +1682,48 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15961682
ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
15971683
// Create a new lifetime start marker before the first user of src or alloca
15981684
// users.
1599-
Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
1600-
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1601-
auto *FirstMA = MSSA->getMemoryAccess(FirstUser);
1602-
auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA);
1685+
MemoryAccess *StartMA;
1686+
if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
1687+
Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator());
1688+
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1689+
StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr,
1690+
MSSA->getMemoryAccess(DomI));
1691+
} else {
1692+
auto *DomB = cast<BasicBlock *>(Dom);
1693+
Builder.SetInsertPoint(DomB->getTerminator());
1694+
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1695+
StartMA = MSSAU->createMemoryAccessInBB(
1696+
Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator);
1697+
}
16031698
MSSAU->insertDef(cast<MemoryDef>(StartMA), /*RenameUses=*/true);
16041699

16051700
// Create a new lifetime end marker after the last user of src or alloca
1606-
// users.
1607-
// FIXME: If the last user is the terminator for the bb, we can insert
1608-
// lifetime.end marker to the immidiate post-dominator, but currently do
1609-
// nothing.
1610-
if (!LastUser->isTerminator()) {
1611-
Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
1612-
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1613-
auto *LastMA = MSSA->getMemoryAccess(LastUser);
1614-
auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA);
1701+
// users. If there's no such postdominator, just don't bother; we could
1702+
// create one at each exit block, but that'd be essentially semantically
1703+
// meaningless.
1704+
// If the PDom is the terminator (e.g. invoke), see the next immediate post
1705+
// dominator.
1706+
if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
1707+
PDomI && PDomI->isTerminator()) {
1708+
auto *IPDomNode = (*PDT)[PDomI->getParent()]->getIDom();
1709+
PDom = IPDomNode ? IPDomNode->getBlock() : nullptr;
1710+
}
1711+
if (PDom) {
1712+
MemoryAccess *EndMA;
1713+
if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
1714+
// If PDom is Instruction ptr, insert after it, because it's a user of
1715+
// SrcAlloca.
1716+
Builder.SetInsertPoint(PDomI->getParent(), ++PDomI->getIterator());
1717+
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1718+
EndMA = MSSAU->createMemoryAccessAfter(End, nullptr,
1719+
MSSA->getMemoryAccess(PDomI));
1720+
} else {
1721+
auto *PDomB = cast<BasicBlock *>(PDom);
1722+
Builder.SetInsertPoint(PDomB, PDomB->getFirstInsertionPt());
1723+
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1724+
EndMA = MSSAU->createMemoryAccessInBB(End, nullptr, End->getParent(),
1725+
MemorySSA::Beginning);
1726+
}
16151727
MSSAU->insertDef(cast<MemoryDef>(EndMA), /*RenameUses=*/true);
16161728
}
16171729

@@ -1999,9 +2111,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
19992111
auto *AA = &AM.getResult<AAManager>(F);
20002112
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
20012113
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2114+
auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
20022115
auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
20032116

2004-
bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
2117+
bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
20052118
if (!MadeChange)
20062119
return PreservedAnalyses::all();
20072120

@@ -2013,12 +2126,14 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
20132126

20142127
bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
20152128
AliasAnalysis *AA_, AssumptionCache *AC_,
2016-
DominatorTree *DT_, MemorySSA *MSSA_) {
2129+
DominatorTree *DT_, PostDominatorTree *PDT_,
2130+
MemorySSA *MSSA_) {
20172131
bool MadeChange = false;
20182132
TLI = TLI_;
20192133
AA = AA_;
20202134
AC = AC_;
20212135
DT = DT_;
2136+
PDT = PDT_;
20222137
MSSA = MSSA_;
20232138
MemorySSAUpdater MSSAU_(MSSA_);
20242139
MSSAU = &MSSAU_;

llvm/test/Other/new-pm-defaults.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@
190190
; CHECK-O23SZ-NEXT: Running pass: GVNPass
191191
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
192192
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
193+
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
193194
; CHECK-O-NEXT: Running pass: SCCPPass
194195
; CHECK-O-NEXT: Running pass: BDCEPass
195196
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -201,7 +202,7 @@
201202
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
202203
; CHECK-O1-NEXT: Running pass: CoroElidePass
203204
; CHECK-O-NEXT: Running pass: ADCEPass
204-
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
205+
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
205206
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
206207
; CHECK-O23SZ-NEXT: Running pass: DSEPass
207208
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@
103103
; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo
104104
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
105105
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
106-
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
107106
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
107+
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
108108
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
109109
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
110110
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
; CHECK-O23SZ-NEXT: Running pass: GVNPass
126126
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
127127
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
128+
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
128129
; CHECK-O-NEXT: Running pass: SCCPPass
129130
; CHECK-O-NEXT: Running pass: BDCEPass
130131
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -135,7 +136,7 @@
135136
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
136137
; CHECK-O1-NEXT: Running pass: CoroElidePass
137138
; CHECK-O-NEXT: Running pass: ADCEPass
138-
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
139+
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
139140
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
140141
; CHECK-O23SZ-NEXT: Running pass: DSEPass
141142
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@
157157
; CHECK-O23SZ-NEXT: Running pass: GVNPass
158158
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
159159
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
160+
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
160161
; CHECK-O-NEXT: Running pass: SCCPPass
161162
; CHECK-O-NEXT: Running pass: BDCEPass
162163
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -167,7 +168,7 @@
167168
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
168169
; CHECK-O1-NEXT: Running pass: CoroElidePass
169170
; CHECK-O-NEXT: Running pass: ADCEPass
170-
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
171+
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
171172
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
172173
; CHECK-O23SZ-NEXT: Running pass: DSEPass
173174
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass

0 commit comments

Comments
 (0)