19
19
#include " llvm/ADT/iterator_range.h"
20
20
#include " llvm/Analysis/AliasAnalysis.h"
21
21
#include " llvm/Analysis/AssumptionCache.h"
22
+ #include " llvm/Analysis/CFG.h"
22
23
#include " llvm/Analysis/CaptureTracking.h"
23
24
#include " llvm/Analysis/GlobalsModRef.h"
24
25
#include " llvm/Analysis/Loads.h"
25
26
#include " llvm/Analysis/MemoryLocation.h"
26
27
#include " llvm/Analysis/MemorySSA.h"
27
28
#include " llvm/Analysis/MemorySSAUpdater.h"
29
+ #include " llvm/Analysis/PostDominators.h"
28
30
#include " llvm/Analysis/TargetLibraryInfo.h"
29
31
#include " llvm/Analysis/ValueTracking.h"
30
32
#include " llvm/IR/BasicBlock.h"
@@ -1415,6 +1417,66 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1415
1417
return true ;
1416
1418
}
1417
1419
1420
+ using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
1421
+ // / Find the nearest Instruction or BasicBlock that dominates both I1 and
1422
+ // / I2.
1423
+ static InsertionPt findNearestCommonDominator (InsertionPt I1, InsertionPt I2,
1424
+ DominatorTree *DT) {
1425
+ auto GetParent = [](InsertionPt I) {
1426
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
1427
+ return BB;
1428
+ return cast<Instruction *>(I)->getParent ();
1429
+ };
1430
+ BasicBlock *BB1 = GetParent (I1);
1431
+ BasicBlock *BB2 = GetParent (I2);
1432
+ if (BB1 == BB2) {
1433
+ // BasicBlock InsertionPt means the terminator.
1434
+ if (isa<BasicBlock *>(I1))
1435
+ return I2;
1436
+ if (isa<BasicBlock *>(I2))
1437
+ return I1;
1438
+ return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I1
1439
+ : I2;
1440
+ }
1441
+ BasicBlock *DomBB = DT->findNearestCommonDominator (BB1, BB2);
1442
+ if (BB2 == DomBB)
1443
+ return I2;
1444
+ if (BB1 == DomBB)
1445
+ return I1;
1446
+ return DomBB;
1447
+ }
1448
+
1449
+ // / Find the nearest Instruction or BasicBlock that post-dominates both I1 and
1450
+ // / I2.
1451
+ static InsertionPt findNearestCommonPostDominator (InsertionPt I1,
1452
+ InsertionPt I2,
1453
+ PostDominatorTree *PDT) {
1454
+ auto GetParent = [](InsertionPt I) {
1455
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
1456
+ return BB;
1457
+ return cast<Instruction *>(I)->getParent ();
1458
+ };
1459
+ BasicBlock *BB1 = GetParent (I1);
1460
+ BasicBlock *BB2 = GetParent (I2);
1461
+ if (BB1 == BB2) {
1462
+ // BasicBlock InsertionPt means the first non-phi instruction.
1463
+ if (isa<BasicBlock *>(I1))
1464
+ return I2;
1465
+ if (isa<BasicBlock *>(I2))
1466
+ return I1;
1467
+ return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I2
1468
+ : I1;
1469
+ }
1470
+ BasicBlock *PDomBB = PDT->findNearestCommonDominator (BB1, BB2);
1471
+ if (!PDomBB)
1472
+ return nullptr ;
1473
+ if (BB2 == PDomBB)
1474
+ return I2;
1475
+ if (BB1 == PDomBB)
1476
+ return I1;
1477
+ return PDomBB;
1478
+ }
1479
+
1418
1480
// Attempts to optimize the pattern whereby memory is copied from an alloca to
1419
1481
// another alloca, where the two allocas don't have conflicting mod/ref. If
1420
1482
// successful, the two allocas can be merged into one and the transfer can be
@@ -1440,8 +1502,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1440
1502
return false ;
1441
1503
}
1442
1504
1443
- // 1. Check that copy is full. Calculate the static size of the allocas to be
1444
- // merged, bail out if we can't.
1505
+ // Check that copy is full with static size.
1445
1506
const DataLayout &DL = DestAlloca->getModule ()->getDataLayout ();
1446
1507
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1447
1508
if (!SrcSize || SrcSize->isScalable () || Size != SrcSize->getFixedValue ()) {
@@ -1455,19 +1516,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1455
1516
return false ;
1456
1517
}
1457
1518
1458
- // 2-1. Check that src and dest are static allocas, which are not affected by
1459
- // stacksave/stackrestore.
1460
- if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca () ||
1461
- SrcAlloca->getParent () != Load->getParent () ||
1462
- SrcAlloca->getParent () != Store->getParent ())
1519
+ if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca ())
1463
1520
return false ;
1464
1521
1465
- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
1466
- // collect lifetime markers first/last users in order to shrink wrap the
1467
- // lifetimes, and instructions with noalias metadata to remove them.
1522
+ // Check that src and dest are never captured, unescaped allocas. Also
1523
+ // find the nearest common dominator and postdominator for all users in
1524
+ // order to shrink wrap the lifetimes, and instructions with noalias metadata
1525
+ // to remove them.
1468
1526
1469
1527
SmallVector<Instruction *, 4 > LifetimeMarkers;
1470
- Instruction *FirstUser = nullptr , *LastUser = nullptr ;
1528
+ InsertionPt Dom = nullptr , PDom = nullptr ;
1471
1529
SmallSet<Instruction *, 4 > NoAliasInstrs;
1472
1530
1473
1531
// Recursively track the user and check whether modified alias exist.
@@ -1505,12 +1563,13 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1505
1563
continue ;
1506
1564
case UseCaptureKind::NO_CAPTURE: {
1507
1565
auto *UI = cast<Instruction>(U.getUser ());
1508
- if (DestAlloca->getParent () != UI->getParent ())
1509
- return false ;
1510
- if (!FirstUser || UI->comesBefore (FirstUser))
1511
- FirstUser = UI;
1512
- if (!LastUser || LastUser->comesBefore (UI))
1513
- LastUser = UI;
1566
+ if (!Dom) {
1567
+ PDom = Dom = UI;
1568
+ } else {
1569
+ Dom = findNearestCommonDominator (Dom, UI, DT);
1570
+ if (PDom)
1571
+ PDom = findNearestCommonPostDominator (PDom, UI, PDT);
1572
+ }
1514
1573
if (UI->isLifetimeStartOrEnd ()) {
1515
1574
// We note the locations of these intrinsic calls so that we can
1516
1575
// delete them later if the optimization succeeds, this is safe
@@ -1534,37 +1593,64 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1534
1593
return true ;
1535
1594
};
1536
1595
1537
- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1538
- // from the alloca to the Store.
1596
+ // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1597
+ // size lifetime intrinsics. And collect modref inst for the reachability
1598
+ // check.
1539
1599
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1540
1600
MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1601
+ SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
1541
1602
auto DestModRefCallback = [&](Instruction *UI) -> bool {
1542
1603
// We don't care about the store itself.
1543
1604
if (UI == Store)
1544
1605
return true ;
1545
1606
ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1546
- // FIXME: For multi-BB cases, we need to see reachability from it to
1547
- // store.
1548
- // Bailout if Dest may have any ModRef before Store.
1549
- if (UI->comesBefore (Store) && isModOrRefSet (Res))
1550
- return false ;
1551
- DestModRef |= BAA.getModRefInfo (UI, DestLoc);
1607
+ DestModRef |= Res;
1608
+ if (isModOrRefSet (Res)) {
1609
+ // Instructions reachability checks.
1610
+ // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1611
+ // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1612
+ if (UI->getParent () == Store->getParent ()) {
1613
+ // The same block case is special because it's the only time we're
1614
+ // looking within a single block to see which instruction comes first.
1615
+ // Once we start looking at multiple blocks, the first instruction of
1616
+ // the block is reachable, so we only need to determine reachability
1617
+ // between whole blocks.
1618
+ BasicBlock *BB = UI->getParent ();
1619
+
1620
+ // If A comes before B, then B is definitively reachable from A.
1621
+ if (UI->comesBefore (Store))
1622
+ return false ;
1623
+
1624
+ // If the user's parent block is entry, no predecessor exists.
1625
+ if (BB->isEntryBlock ())
1626
+ return true ;
1552
1627
1628
+ // Otherwise, continue doing the normal per-BB CFG walk.
1629
+ ReachabilityWorklist.append (succ_begin (BB), succ_end (BB));
1630
+ } else {
1631
+ ReachabilityWorklist.push_back (UI->getParent ());
1632
+ }
1633
+ }
1553
1634
return true ;
1554
1635
};
1555
1636
1556
1637
if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1557
1638
return false ;
1639
+ // Bailout if Dest may have any ModRef before Store.
1640
+ if (!ReachabilityWorklist.empty () &&
1641
+ isPotentiallyReachableFromMany (ReachabilityWorklist, Store->getParent (),
1642
+ nullptr , DT, nullptr ))
1643
+ return false ;
1558
1644
1559
- // 3. Check that, from after the Load to the end of the BB,
1560
- // 3-1. if the dest has any Mod, src has no Ref, and
1561
- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1645
+ // Check that, from after the Load to the end of the BB,
1646
+ // - if the dest has any Mod, src has no Ref, and
1647
+ // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1562
1648
MemoryLocation SrcLoc (SrcAlloca, LocationSize::precise (Size));
1563
1649
1564
1650
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1565
- // Any ModRef before Load doesn't matter, also Load and Store can be
1566
- // ignored.
1567
- if (UI-> comesBefore (Load) || UI == Load || UI == Store)
1651
+ // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1652
+ // themselves can be ignored.
1653
+ if (PDT-> dominates (Load, UI ) || UI == Load || UI == Store)
1568
1654
return true ;
1569
1655
ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1570
1656
if ((isModSet (DestModRef) && isRefSet (Res)) ||
@@ -1596,22 +1682,48 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1596
1682
ConstantInt *AllocaSize = ConstantInt::get (Type::getInt64Ty (C), Size);
1597
1683
// Create a new lifetime start marker before the first user of src or alloca
1598
1684
// users.
1599
- Builder.SetInsertPoint (FirstUser->getParent (), FirstUser->getIterator ());
1600
- auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1601
- auto *FirstMA = MSSA->getMemoryAccess (FirstUser);
1602
- auto *StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr , FirstMA);
1685
+ MemoryAccess *StartMA;
1686
+ if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
1687
+ Builder.SetInsertPoint (DomI->getParent (), DomI->getIterator ());
1688
+ auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1689
+ StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr ,
1690
+ MSSA->getMemoryAccess (DomI));
1691
+ } else {
1692
+ auto *DomB = cast<BasicBlock *>(Dom);
1693
+ Builder.SetInsertPoint (DomB->getTerminator ());
1694
+ auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1695
+ StartMA = MSSAU->createMemoryAccessInBB (
1696
+ Start, nullptr , Start->getParent (), MemorySSA::BeforeTerminator);
1697
+ }
1603
1698
MSSAU->insertDef (cast<MemoryDef>(StartMA), /* RenameUses=*/ true );
1604
1699
1605
1700
// Create a new lifetime end marker after the last user of src or alloca
1606
- // users.
1607
- // FIXME: If the last user is the terminator for the bb, we can insert
1608
- // lifetime.end marker to the immidiate post-dominator, but currently do
1609
- // nothing.
1610
- if (!LastUser->isTerminator ()) {
1611
- Builder.SetInsertPoint (LastUser->getParent (), ++LastUser->getIterator ());
1612
- auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1613
- auto *LastMA = MSSA->getMemoryAccess (LastUser);
1614
- auto *EndMA = MSSAU->createMemoryAccessAfter (End, nullptr , LastMA);
1701
+ // users. If there's no such postdominator, just don't bother; we could
1702
+ // create one at each exit block, but that'd be essentially semantically
1703
+ // meaningless.
1704
+ // If the PDom is the terminator (e.g. invoke), see the next immediate post
1705
+ // dominator.
1706
+ if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
1707
+ PDomI && PDomI->isTerminator ()) {
1708
+ auto *IPDomNode = (*PDT)[PDomI->getParent ()]->getIDom ();
1709
+ PDom = IPDomNode ? IPDomNode->getBlock () : nullptr ;
1710
+ }
1711
+ if (PDom) {
1712
+ MemoryAccess *EndMA;
1713
+ if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
1714
+ // If PDom is Instruction ptr, insert after it, because it's a user of
1715
+ // SrcAlloca.
1716
+ Builder.SetInsertPoint (PDomI->getParent (), ++PDomI->getIterator ());
1717
+ auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1718
+ EndMA = MSSAU->createMemoryAccessAfter (End, nullptr ,
1719
+ MSSA->getMemoryAccess (PDomI));
1720
+ } else {
1721
+ auto *PDomB = cast<BasicBlock *>(PDom);
1722
+ Builder.SetInsertPoint (PDomB, PDomB->getFirstInsertionPt ());
1723
+ auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1724
+ EndMA = MSSAU->createMemoryAccessInBB (End, nullptr , End->getParent (),
1725
+ MemorySSA::Beginning);
1726
+ }
1615
1727
MSSAU->insertDef (cast<MemoryDef>(EndMA), /* RenameUses=*/ true );
1616
1728
}
1617
1729
@@ -1999,9 +2111,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1999
2111
auto *AA = &AM.getResult <AAManager>(F);
2000
2112
auto *AC = &AM.getResult <AssumptionAnalysis>(F);
2001
2113
auto *DT = &AM.getResult <DominatorTreeAnalysis>(F);
2114
+ auto *PDT = &AM.getResult <PostDominatorTreeAnalysis>(F);
2002
2115
auto *MSSA = &AM.getResult <MemorySSAAnalysis>(F);
2003
2116
2004
- bool MadeChange = runImpl (F, &TLI, AA, AC, DT, &MSSA->getMSSA ());
2117
+ bool MadeChange = runImpl (F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA ());
2005
2118
if (!MadeChange)
2006
2119
return PreservedAnalyses::all ();
2007
2120
@@ -2013,12 +2126,14 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
2013
2126
2014
2127
bool MemCpyOptPass::runImpl (Function &F, TargetLibraryInfo *TLI_,
2015
2128
AliasAnalysis *AA_, AssumptionCache *AC_,
2016
- DominatorTree *DT_, MemorySSA *MSSA_) {
2129
+ DominatorTree *DT_, PostDominatorTree *PDT_,
2130
+ MemorySSA *MSSA_) {
2017
2131
bool MadeChange = false ;
2018
2132
TLI = TLI_;
2019
2133
AA = AA_;
2020
2134
AC = AC_;
2021
2135
DT = DT_;
2136
+ PDT = PDT_;
2022
2137
MSSA = MSSA_;
2023
2138
MemorySSAUpdater MSSAU_ (MSSA_);
2024
2139
MSSAU = &MSSAU_;
0 commit comments