Skip to content

Commit e0f9cc7

Browse files
committed
Revert "Reapply "Revert "[MemCpyOpt] implement multi BB stack-move optimization"""
Breaks multiple bots. e.g. https://lab.llvm.org/buildbot/#/builders/19/builds/18856 This reverts commit ac00726.
1 parent fbcce33 commit e0f9cc7

File tree

7 files changed

+115
-199
lines changed

7 files changed

+115
-199
lines changed

llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ class MemMoveInst;
3434
class MemorySSA;
3535
class MemorySSAUpdater;
3636
class MemSetInst;
37-
class PostDominatorTree;
3837
class StoreInst;
3938
class TargetLibraryInfo;
4039
class Value;
@@ -44,7 +43,6 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
4443
AAResults *AA = nullptr;
4544
AssumptionCache *AC = nullptr;
4645
DominatorTree *DT = nullptr;
47-
PostDominatorTree *PDT = nullptr;
4846
MemorySSA *MSSA = nullptr;
4947
MemorySSAUpdater *MSSAU = nullptr;
5048

@@ -55,8 +53,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
5553

5654
// Glue for the old PM.
5755
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
58-
AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT,
59-
MemorySSA *MSSA);
56+
AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
6057

6158
private:
6259
// Helper functions

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 46 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919
#include "llvm/ADT/iterator_range.h"
2020
#include "llvm/Analysis/AliasAnalysis.h"
2121
#include "llvm/Analysis/AssumptionCache.h"
22-
#include "llvm/Analysis/CFG.h"
2322
#include "llvm/Analysis/CaptureTracking.h"
2423
#include "llvm/Analysis/GlobalsModRef.h"
2524
#include "llvm/Analysis/Loads.h"
2625
#include "llvm/Analysis/MemoryLocation.h"
2726
#include "llvm/Analysis/MemorySSA.h"
2827
#include "llvm/Analysis/MemorySSAUpdater.h"
29-
#include "llvm/Analysis/PostDominators.h"
3028
#include "llvm/Analysis/TargetLibraryInfo.h"
3129
#include "llvm/Analysis/ValueTracking.h"
3230
#include "llvm/IR/BasicBlock.h"
@@ -1417,66 +1415,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14171415
return true;
14181416
}
14191417

1420-
using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
1421-
/// Find the nearest Instruction or BasicBlock that dominates both I1 and
1422-
/// I2.
1423-
static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2,
1424-
DominatorTree *DT) {
1425-
auto GetParent = [](InsertionPt I) {
1426-
if (auto *BB = dyn_cast<BasicBlock *>(I))
1427-
return BB;
1428-
return cast<Instruction *>(I)->getParent();
1429-
};
1430-
BasicBlock *BB1 = GetParent(I1);
1431-
BasicBlock *BB2 = GetParent(I2);
1432-
if (BB1 == BB2) {
1433-
// BasicBlock InsertionPt means the terminator.
1434-
if (isa<BasicBlock *>(I1))
1435-
return I2;
1436-
if (isa<BasicBlock *>(I2))
1437-
return I1;
1438-
return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I1
1439-
: I2;
1440-
}
1441-
BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2);
1442-
if (BB2 == DomBB)
1443-
return I2;
1444-
if (BB1 == DomBB)
1445-
return I1;
1446-
return DomBB;
1447-
}
1448-
1449-
/// Find the nearest Instruction or BasicBlock that post-dominates both I1 and
1450-
/// I2.
1451-
static InsertionPt findNearestCommonPostDominator(InsertionPt I1,
1452-
InsertionPt I2,
1453-
PostDominatorTree *PDT) {
1454-
auto GetParent = [](InsertionPt I) {
1455-
if (auto *BB = dyn_cast<BasicBlock *>(I))
1456-
return BB;
1457-
return cast<Instruction *>(I)->getParent();
1458-
};
1459-
BasicBlock *BB1 = GetParent(I1);
1460-
BasicBlock *BB2 = GetParent(I2);
1461-
if (BB1 == BB2) {
1462-
// BasicBlock InsertionPt means the first non-phi instruction.
1463-
if (isa<BasicBlock *>(I1))
1464-
return I2;
1465-
if (isa<BasicBlock *>(I2))
1466-
return I1;
1467-
return cast<Instruction *>(I1)->comesBefore(cast<Instruction *>(I2)) ? I2
1468-
: I1;
1469-
}
1470-
BasicBlock *PDomBB = PDT->findNearestCommonDominator(BB1, BB2);
1471-
if (!PDomBB)
1472-
return nullptr;
1473-
if (BB2 == PDomBB)
1474-
return I2;
1475-
if (BB1 == PDomBB)
1476-
return I1;
1477-
return PDomBB;
1478-
}
1479-
14801418
// Attempts to optimize the pattern whereby memory is copied from an alloca to
14811419
// another alloca, where the two allocas don't have conflicting mod/ref. If
14821420
// successful, the two allocas can be merged into one and the transfer can be
@@ -1502,7 +1440,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15021440
return false;
15031441
}
15041442

1505-
// Check that copy is full with static size.
1443+
// 1. Check that copy is full. Calculate the static size of the allocas to be
1444+
// merged, bail out if we can't.
15061445
const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
15071446
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
15081447
if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
@@ -1516,16 +1455,19 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15161455
return false;
15171456
}
15181457

1519-
if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1458+
// 2-1. Check that src and dest are static allocas, which are not affected by
1459+
// stacksave/stackrestore.
1460+
if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
1461+
SrcAlloca->getParent() != Load->getParent() ||
1462+
SrcAlloca->getParent() != Store->getParent())
15201463
return false;
15211464

1522-
// Check that src and dest are never captured, unescaped allocas. Also
1523-
// find the nearest common dominator and postdominator for all users in
1524-
// order to shrink wrap the lifetimes, and instructions with noalias metadata
1525-
// to remove them.
1465+
// 2-2. Check that src and dest are never captured, unescaped allocas. Also
1466+
// collect lifetime markers first/last users in order to shrink wrap the
1467+
// lifetimes, and instructions with noalias metadata to remove them.
15261468

15271469
SmallVector<Instruction *, 4> LifetimeMarkers;
1528-
InsertionPt Dom = nullptr, PDom = nullptr;
1470+
Instruction *FirstUser = nullptr, *LastUser = nullptr;
15291471
SmallSet<Instruction *, 4> NoAliasInstrs;
15301472

15311473
// Recursively track the user and check whether modified alias exist.
@@ -1563,13 +1505,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15631505
continue;
15641506
case UseCaptureKind::NO_CAPTURE: {
15651507
auto *UI = cast<Instruction>(U.getUser());
1566-
if (!Dom) {
1567-
PDom = Dom = UI;
1568-
} else {
1569-
Dom = findNearestCommonDominator(Dom, UI, DT);
1570-
if (PDom)
1571-
PDom = findNearestCommonPostDominator(PDom, UI, PDT);
1572-
}
1508+
if (DestAlloca->getParent() != UI->getParent())
1509+
return false;
1510+
if (!FirstUser || UI->comesBefore(FirstUser))
1511+
FirstUser = UI;
1512+
if (!LastUser || LastUser->comesBefore(UI))
1513+
LastUser = UI;
15731514
if (UI->isLifetimeStartOrEnd()) {
15741515
// We note the locations of these intrinsic calls so that we can
15751516
// delete them later if the optimization succeeds, this is safe
@@ -1593,64 +1534,37 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15931534
return true;
15941535
};
15951536

1596-
// Check that dest has no Mod/Ref, from the alloca to the Store, except full
1597-
// size lifetime intrinsics. And collect modref inst for the reachability
1598-
// check.
1537+
// 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1538+
// from the alloca to the Store.
15991539
ModRefInfo DestModRef = ModRefInfo::NoModRef;
16001540
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1601-
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
16021541
auto DestModRefCallback = [&](Instruction *UI) -> bool {
16031542
// We don't care about the store itself.
16041543
if (UI == Store)
16051544
return true;
16061545
ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1607-
DestModRef |= Res;
1608-
if (isModOrRefSet(Res)) {
1609-
// Instructions reachability checks.
1610-
// FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1611-
// lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1612-
if (UI->getParent() == Store->getParent()) {
1613-
// The same block case is special because it's the only time we're
1614-
// looking within a single block to see which instruction comes first.
1615-
// Once we start looking at multiple blocks, the first instruction of
1616-
// the block is reachable, so we only need to determine reachability
1617-
// between whole blocks.
1618-
BasicBlock *BB = UI->getParent();
1619-
1620-
// If A comes before B, then B is definitively reachable from A.
1621-
if (UI->comesBefore(Store))
1622-
return false;
1623-
1624-
// If the user's parent block is entry, no predecessor exists.
1625-
if (BB->isEntryBlock())
1626-
return true;
1546+
// FIXME: For multi-BB cases, we need to see reachability from it to
1547+
// store.
1548+
// Bailout if Dest may have any ModRef before Store.
1549+
if (UI->comesBefore(Store) && isModOrRefSet(Res))
1550+
return false;
1551+
DestModRef |= BAA.getModRefInfo(UI, DestLoc);
16271552

1628-
// Otherwise, continue doing the normal per-BB CFG walk.
1629-
ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1630-
} else {
1631-
ReachabilityWorklist.push_back(UI->getParent());
1632-
}
1633-
}
16341553
return true;
16351554
};
16361555

16371556
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
16381557
return false;
1639-
// Bailout if Dest may have any ModRef before Store.
1640-
if (!ReachabilityWorklist.empty() &&
1641-
isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1642-
nullptr, DT, nullptr))
1643-
return false;
16441558

1645-
// Check that, from after the Load to the end of the BB,
1646-
// - if the dest has any Mod, src has no Ref, and
1647-
// - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1559+
// 3. Check that, from after the Load to the end of the BB,
1560+
// 3-1. if the dest has any Mod, src has no Ref, and
1561+
// 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
16481562
MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
16491563

16501564
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1651-
// Any ModRef post-dominated by Load doesn't matter, also Load and Store
1652-
// themselves can be ignored.
1653-
if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1565+
// Any ModRef before Load doesn't matter, also Load and Store can be
1566+
// ignored.
1567+
if (UI->comesBefore(Load) || UI == Load || UI == Store)
16541568
return true;
16551569
ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
16561570
if ((isModSet(DestModRef) && isRefSet(Res)) ||
@@ -1682,48 +1596,22 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16821596
ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
16831597
// Create a new lifetime start marker before the first user of src or alloca
16841598
// users.
1685-
MemoryAccess *StartMA;
1686-
if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
1687-
Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator());
1688-
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1689-
StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr,
1690-
MSSA->getMemoryAccess(DomI));
1691-
} else {
1692-
auto *DomB = cast<BasicBlock *>(Dom);
1693-
Builder.SetInsertPoint(DomB->getTerminator());
1694-
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1695-
StartMA = MSSAU->createMemoryAccessInBB(
1696-
Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator);
1697-
}
1599+
Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
1600+
auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
1601+
auto *FirstMA = MSSA->getMemoryAccess(FirstUser);
1602+
auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA);
16981603
MSSAU->insertDef(cast<MemoryDef>(StartMA), /*RenameUses=*/true);
16991604

17001605
// Create a new lifetime end marker after the last user of src or alloca
1701-
// users. If there's no such postdominator, just don't bother; we could
1702-
// create one at each exit block, but that'd be essentially semantically
1703-
// meaningless.
1704-
// If the PDom is the terminator (e.g. invoke), see the next immediate post
1705-
// dominator.
1706-
if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
1707-
PDomI && PDomI->isTerminator()) {
1708-
auto *IPDomNode = (*PDT)[PDomI->getParent()]->getIDom();
1709-
PDom = IPDomNode ? IPDomNode->getBlock() : nullptr;
1710-
}
1711-
if (PDom) {
1712-
MemoryAccess *EndMA;
1713-
if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
1714-
// If PDom is Instruction ptr, insert after it, because it's a user of
1715-
// SrcAlloca.
1716-
Builder.SetInsertPoint(PDomI->getParent(), ++PDomI->getIterator());
1717-
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1718-
EndMA = MSSAU->createMemoryAccessAfter(End, nullptr,
1719-
MSSA->getMemoryAccess(PDomI));
1720-
} else {
1721-
auto *PDomB = cast<BasicBlock *>(PDom);
1722-
Builder.SetInsertPoint(PDomB, PDomB->getFirstInsertionPt());
1723-
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1724-
EndMA = MSSAU->createMemoryAccessInBB(End, nullptr, End->getParent(),
1725-
MemorySSA::Beginning);
1726-
}
1606+
// users.
1607+
// FIXME: If the last user is the terminator for the bb, we can insert
1608+
// lifetime.end marker to the immidiate post-dominator, but currently do
1609+
// nothing.
1610+
if (!LastUser->isTerminator()) {
1611+
Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
1612+
auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
1613+
auto *LastMA = MSSA->getMemoryAccess(LastUser);
1614+
auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA);
17271615
MSSAU->insertDef(cast<MemoryDef>(EndMA), /*RenameUses=*/true);
17281616
}
17291617

@@ -2111,10 +1999,9 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
21111999
auto *AA = &AM.getResult<AAManager>(F);
21122000
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
21132001
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2114-
auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
21152002
auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
21162003

2117-
bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2004+
bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
21182005
if (!MadeChange)
21192006
return PreservedAnalyses::all();
21202007

@@ -2126,14 +2013,12 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
21262013

21272014
bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
21282015
AliasAnalysis *AA_, AssumptionCache *AC_,
2129-
DominatorTree *DT_, PostDominatorTree *PDT_,
2130-
MemorySSA *MSSA_) {
2016+
DominatorTree *DT_, MemorySSA *MSSA_) {
21312017
bool MadeChange = false;
21322018
TLI = TLI_;
21332019
AA = AA_;
21342020
AC = AC_;
21352021
DT = DT_;
2136-
PDT = PDT_;
21372022
MSSA = MSSA_;
21382023
MemorySSAUpdater MSSAU_(MSSA_);
21392024
MSSAU = &MSSAU_;

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@
190190
; CHECK-O23SZ-NEXT: Running pass: GVNPass
191191
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
192192
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
193-
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
194193
; CHECK-O-NEXT: Running pass: SCCPPass
195194
; CHECK-O-NEXT: Running pass: BDCEPass
196195
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -202,7 +201,7 @@
202201
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
203202
; CHECK-O1-NEXT: Running pass: CoroElidePass
204203
; CHECK-O-NEXT: Running pass: ADCEPass
205-
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
204+
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
206205
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
207206
; CHECK-O23SZ-NEXT: Running pass: DSEPass
208207
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@
103103
; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo
104104
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
105105
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
106-
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
107106
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
107+
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
108108
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo
109109
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
110110
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo

llvm/test/Other/new-pm-thinlto-postlink-defaults.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@
125125
; CHECK-O23SZ-NEXT: Running pass: GVNPass
126126
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
127127
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
128-
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
129128
; CHECK-O-NEXT: Running pass: SCCPPass
130129
; CHECK-O-NEXT: Running pass: BDCEPass
131130
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -136,7 +135,7 @@
136135
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
137136
; CHECK-O1-NEXT: Running pass: CoroElidePass
138137
; CHECK-O-NEXT: Running pass: ADCEPass
139-
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
138+
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
140139
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
141140
; CHECK-O23SZ-NEXT: Running pass: DSEPass
142141
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo

llvm/test/Other/new-pm-thinlto-prelink-defaults.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@
157157
; CHECK-O23SZ-NEXT: Running pass: GVNPass
158158
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
159159
; CHECK-O1-NEXT: Running pass: MemCpyOptPass
160-
; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis
161160
; CHECK-O-NEXT: Running pass: SCCPPass
162161
; CHECK-O-NEXT: Running pass: BDCEPass
163162
; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis
@@ -168,7 +167,7 @@
168167
; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis
169168
; CHECK-O1-NEXT: Running pass: CoroElidePass
170169
; CHECK-O-NEXT: Running pass: ADCEPass
171-
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis
170+
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis
172171
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass
173172
; CHECK-O23SZ-NEXT: Running pass: DSEPass
174173
; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass

0 commit comments

Comments
 (0)