19
19
#include " llvm/ADT/iterator_range.h"
20
20
#include " llvm/Analysis/AliasAnalysis.h"
21
21
#include " llvm/Analysis/AssumptionCache.h"
22
+ #include " llvm/Analysis/CFG.h"
22
23
#include " llvm/Analysis/CaptureTracking.h"
23
24
#include " llvm/Analysis/GlobalsModRef.h"
24
25
#include " llvm/Analysis/Loads.h"
25
26
#include " llvm/Analysis/MemoryLocation.h"
26
27
#include " llvm/Analysis/MemorySSA.h"
27
28
#include " llvm/Analysis/MemorySSAUpdater.h"
29
+ #include " llvm/Analysis/PostDominators.h"
28
30
#include " llvm/Analysis/TargetLibraryInfo.h"
29
31
#include " llvm/Analysis/ValueTracking.h"
30
32
#include " llvm/IR/BasicBlock.h"
@@ -1415,6 +1417,74 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1415
1417
return true ;
1416
1418
}
1417
1419
1420
+ using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
1421
+ // / Find the nearest Instruction or BasicBlock that dominates both I1 and
1422
+ // / I2.
1423
+ static InsertionPt findNearestCommonDominator (InsertionPt I1, InsertionPt I2,
1424
+ DominatorTree *DT) {
1425
+ auto GetParent = [](InsertionPt I) {
1426
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
1427
+ return BB;
1428
+ return cast<Instruction *>(I)->getParent ();
1429
+ };
1430
+ BasicBlock *BB1 = GetParent (I1);
1431
+ BasicBlock *BB2 = GetParent (I2);
1432
+ if (BB1 == BB2) {
1433
+ // BasicBlock InsertionPt means the terminator.
1434
+ if (isa<BasicBlock *>(I1))
1435
+ return I2;
1436
+ if (isa<BasicBlock *>(I2))
1437
+ return I1;
1438
+ return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I1
1439
+ : I2;
1440
+ }
1441
+
1442
+ // These checks are necessary, because findNearestCommonDominator for NodeT
1443
+ // doesn't handle these.
1444
+ if (!DT->isReachableFromEntry (BB2))
1445
+ return I1;
1446
+ if (!DT->isReachableFromEntry (BB1))
1447
+ return I2;
1448
+
1449
+ BasicBlock *DomBB = DT->findNearestCommonDominator (BB1, BB2);
1450
+ if (BB2 == DomBB)
1451
+ return I2;
1452
+ if (BB1 == DomBB)
1453
+ return I1;
1454
+ return DomBB;
1455
+ }
1456
+
1457
+ // / Find the nearest Instruction or BasicBlock that post-dominates both I1 and
1458
+ // / I2.
1459
+ static InsertionPt findNearestCommonPostDominator (InsertionPt I1,
1460
+ InsertionPt I2,
1461
+ PostDominatorTree *PDT) {
1462
+ auto GetParent = [](InsertionPt I) {
1463
+ if (auto *BB = dyn_cast<BasicBlock *>(I))
1464
+ return BB;
1465
+ return cast<Instruction *>(I)->getParent ();
1466
+ };
1467
+ BasicBlock *BB1 = GetParent (I1);
1468
+ BasicBlock *BB2 = GetParent (I2);
1469
+ if (BB1 == BB2) {
1470
+ // BasicBlock InsertionPt means the first non-phi instruction.
1471
+ if (isa<BasicBlock *>(I1))
1472
+ return I2;
1473
+ if (isa<BasicBlock *>(I2))
1474
+ return I1;
1475
+ return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I2
1476
+ : I1;
1477
+ }
1478
+ BasicBlock *PDomBB = PDT->findNearestCommonDominator (BB1, BB2);
1479
+ if (!PDomBB)
1480
+ return nullptr ;
1481
+ if (BB2 == PDomBB)
1482
+ return I2;
1483
+ if (BB1 == PDomBB)
1484
+ return I1;
1485
+ return PDomBB;
1486
+ }
1487
+
1418
1488
// Attempts to optimize the pattern whereby memory is copied from an alloca to
1419
1489
// another alloca, where the two allocas don't have conflicting mod/ref. If
1420
1490
// successful, the two allocas can be merged into one and the transfer can be
@@ -1440,8 +1510,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1440
1510
return false ;
1441
1511
}
1442
1512
1443
- // 1. Check that copy is full. Calculate the static size of the allocas to be
1444
- // merged, bail out if we can't.
1513
+ // Check that copy is full with static size.
1445
1514
const DataLayout &DL = DestAlloca->getModule ()->getDataLayout ();
1446
1515
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1447
1516
if (!SrcSize || SrcSize->isScalable () || Size != SrcSize->getFixedValue ()) {
@@ -1455,19 +1524,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1455
1524
return false ;
1456
1525
}
1457
1526
1458
- // 2-1. Check that src and dest are static allocas, which are not affected by
1459
- // stacksave/stackrestore.
1460
- if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca () ||
1461
- SrcAlloca->getParent () != Load->getParent () ||
1462
- SrcAlloca->getParent () != Store->getParent ())
1527
+ if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca ())
1463
1528
return false ;
1464
1529
1465
- // 2-2. Check that src and dest are never captured, unescaped allocas. Also
1466
- // collect lifetime markers first/last users in order to shrink wrap the
1467
- // lifetimes, and instructions with noalias metadata to remove them.
1530
+ // Check that src and dest are never captured, unescaped allocas. Also
1531
+ // find the nearest common dominator and postdominator for all users in
1532
+ // order to shrink wrap the lifetimes, and instructions with noalias metadata
1533
+ // to remove them.
1468
1534
1469
1535
SmallVector<Instruction *, 4 > LifetimeMarkers;
1470
- Instruction *FirstUser = nullptr , *LastUser = nullptr ;
1536
+ InsertionPt Dom = nullptr , PDom = nullptr ;
1471
1537
SmallSet<Instruction *, 4 > NoAliasInstrs;
1472
1538
1473
1539
// Recursively track the user and check whether modified alias exist.
@@ -1505,12 +1571,13 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1505
1571
continue ;
1506
1572
case UseCaptureKind::NO_CAPTURE: {
1507
1573
auto *UI = cast<Instruction>(U.getUser ());
1508
- if (DestAlloca->getParent () != UI->getParent ())
1509
- return false ;
1510
- if (!FirstUser || UI->comesBefore (FirstUser))
1511
- FirstUser = UI;
1512
- if (!LastUser || LastUser->comesBefore (UI))
1513
- LastUser = UI;
1574
+ if (!Dom) {
1575
+ PDom = Dom = UI;
1576
+ } else {
1577
+ Dom = findNearestCommonDominator (Dom, UI, DT);
1578
+ if (PDom)
1579
+ PDom = findNearestCommonPostDominator (PDom, UI, PDT);
1580
+ }
1514
1581
if (UI->isLifetimeStartOrEnd ()) {
1515
1582
// We note the locations of these intrinsic calls so that we can
1516
1583
// delete them later if the optimization succeeds, this is safe
@@ -1534,37 +1601,64 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1534
1601
return true ;
1535
1602
};
1536
1603
1537
- // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1538
- // from the alloca to the Store.
1604
+ // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1605
+ // size lifetime intrinsics. And collect modref inst for the reachability
1606
+ // check.
1539
1607
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1540
1608
MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1609
+ SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
1541
1610
auto DestModRefCallback = [&](Instruction *UI) -> bool {
1542
1611
// We don't care about the store itself.
1543
1612
if (UI == Store)
1544
1613
return true ;
1545
1614
ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1546
- // FIXME: For multi-BB cases, we need to see reachability from it to
1547
- // store.
1548
- // Bailout if Dest may have any ModRef before Store.
1549
- if (UI->comesBefore (Store) && isModOrRefSet (Res))
1550
- return false ;
1551
- DestModRef |= BAA.getModRefInfo (UI, DestLoc);
1615
+ DestModRef |= Res;
1616
+ if (isModOrRefSet (Res)) {
1617
+ // Instructions reachability checks.
1618
+ // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1619
+ // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1620
+ if (UI->getParent () == Store->getParent ()) {
1621
+ // The same block case is special because it's the only time we're
1622
+ // looking within a single block to see which instruction comes first.
1623
+ // Once we start looking at multiple blocks, the first instruction of
1624
+ // the block is reachable, so we only need to determine reachability
1625
+ // between whole blocks.
1626
+ BasicBlock *BB = UI->getParent ();
1627
+
1628
+ // If A comes before B, then B is definitively reachable from A.
1629
+ if (UI->comesBefore (Store))
1630
+ return false ;
1631
+
1632
+ // If the user's parent block is entry, no predecessor exists.
1633
+ if (BB->isEntryBlock ())
1634
+ return true ;
1552
1635
1636
+ // Otherwise, continue doing the normal per-BB CFG walk.
1637
+ ReachabilityWorklist.append (succ_begin (BB), succ_end (BB));
1638
+ } else {
1639
+ ReachabilityWorklist.push_back (UI->getParent ());
1640
+ }
1641
+ }
1553
1642
return true ;
1554
1643
};
1555
1644
1556
1645
if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1557
1646
return false ;
1647
+ // Bailout if Dest may have any ModRef before Store.
1648
+ if (!ReachabilityWorklist.empty () &&
1649
+ isPotentiallyReachableFromMany (ReachabilityWorklist, Store->getParent (),
1650
+ nullptr , DT, nullptr ))
1651
+ return false ;
1558
1652
1559
- // 3. Check that, from after the Load to the end of the BB,
1560
- // 3-1. if the dest has any Mod, src has no Ref, and
1561
- // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1653
+ // Check that, from after the Load to the end of the BB,
1654
+ // - if the dest has any Mod, src has no Ref, and
1655
+ // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1562
1656
MemoryLocation SrcLoc (SrcAlloca, LocationSize::precise (Size));
1563
1657
1564
1658
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1565
- // Any ModRef before Load doesn't matter, also Load and Store can be
1566
- // ignored.
1567
- if (UI-> comesBefore (Load) || UI == Load || UI == Store)
1659
+ // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1660
+ // themselves can be ignored.
1661
+ if (PDT-> dominates (Load, UI ) || UI == Load || UI == Store)
1568
1662
return true ;
1569
1663
ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1570
1664
if ((isModSet (DestModRef) && isRefSet (Res)) ||
@@ -1596,22 +1690,48 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1596
1690
ConstantInt *AllocaSize = ConstantInt::get (Type::getInt64Ty (C), Size);
1597
1691
// Create a new lifetime start marker before the first user of src or alloca
1598
1692
// users.
1599
- Builder.SetInsertPoint (FirstUser->getParent (), FirstUser->getIterator ());
1600
- auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1601
- auto *FirstMA = MSSA->getMemoryAccess (FirstUser);
1602
- auto *StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr , FirstMA);
1693
+ MemoryAccess *StartMA;
1694
+ if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
1695
+ Builder.SetInsertPoint (DomI->getParent (), DomI->getIterator ());
1696
+ auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1697
+ StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr ,
1698
+ MSSA->getMemoryAccess (DomI));
1699
+ } else {
1700
+ auto *DomB = cast<BasicBlock *>(Dom);
1701
+ Builder.SetInsertPoint (DomB->getTerminator ());
1702
+ auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1703
+ StartMA = MSSAU->createMemoryAccessInBB (
1704
+ Start, nullptr , Start->getParent (), MemorySSA::BeforeTerminator);
1705
+ }
1603
1706
MSSAU->insertDef (cast<MemoryDef>(StartMA), /* RenameUses=*/ true );
1604
1707
1605
1708
// Create a new lifetime end marker after the last user of src or alloca
1606
- // users.
1607
- // FIXME: If the last user is the terminator for the bb, we can insert
1608
- // lifetime.end marker to the immidiate post-dominator, but currently do
1609
- // nothing.
1610
- if (!LastUser->isTerminator ()) {
1611
- Builder.SetInsertPoint (LastUser->getParent (), ++LastUser->getIterator ());
1612
- auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1613
- auto *LastMA = MSSA->getMemoryAccess (LastUser);
1614
- auto *EndMA = MSSAU->createMemoryAccessAfter (End, nullptr , LastMA);
1709
+ // users. If there's no such postdominator, just don't bother; we could
1710
+ // create one at each exit block, but that'd be essentially semantically
1711
+ // meaningless.
1712
+ // If the PDom is the terminator (e.g. invoke), see the next immediate post
1713
+ // dominator.
1714
+ if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
1715
+ PDomI && PDomI->isTerminator ()) {
1716
+ auto *IPDomNode = (*PDT)[PDomI->getParent ()]->getIDom ();
1717
+ PDom = IPDomNode ? IPDomNode->getBlock () : nullptr ;
1718
+ }
1719
+ if (PDom) {
1720
+ MemoryAccess *EndMA;
1721
+ if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
1722
+ // If PDom is Instruction ptr, insert after it, because it's a user of
1723
+ // SrcAlloca.
1724
+ Builder.SetInsertPoint (PDomI->getParent (), ++PDomI->getIterator ());
1725
+ auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1726
+ EndMA = MSSAU->createMemoryAccessAfter (End, nullptr ,
1727
+ MSSA->getMemoryAccess (PDomI));
1728
+ } else {
1729
+ auto *PDomB = cast<BasicBlock *>(PDom);
1730
+ Builder.SetInsertPoint (PDomB, PDomB->getFirstInsertionPt ());
1731
+ auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1732
+ EndMA = MSSAU->createMemoryAccessInBB (End, nullptr , End->getParent (),
1733
+ MemorySSA::Beginning);
1734
+ }
1615
1735
MSSAU->insertDef (cast<MemoryDef>(EndMA), /* RenameUses=*/ true );
1616
1736
}
1617
1737
@@ -1999,9 +2119,10 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1999
2119
auto *AA = &AM.getResult <AAManager>(F);
2000
2120
auto *AC = &AM.getResult <AssumptionAnalysis>(F);
2001
2121
auto *DT = &AM.getResult <DominatorTreeAnalysis>(F);
2122
+ auto *PDT = &AM.getResult <PostDominatorTreeAnalysis>(F);
2002
2123
auto *MSSA = &AM.getResult <MemorySSAAnalysis>(F);
2003
2124
2004
- bool MadeChange = runImpl (F, &TLI, AA, AC, DT, &MSSA->getMSSA ());
2125
+ bool MadeChange = runImpl (F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA ());
2005
2126
if (!MadeChange)
2006
2127
return PreservedAnalyses::all ();
2007
2128
@@ -2013,12 +2134,14 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
2013
2134
2014
2135
bool MemCpyOptPass::runImpl (Function &F, TargetLibraryInfo *TLI_,
2015
2136
AliasAnalysis *AA_, AssumptionCache *AC_,
2016
- DominatorTree *DT_, MemorySSA *MSSA_) {
2137
+ DominatorTree *DT_, PostDominatorTree *PDT_,
2138
+ MemorySSA *MSSA_) {
2017
2139
bool MadeChange = false ;
2018
2140
TLI = TLI_;
2019
2141
AA = AA_;
2020
2142
AC = AC_;
2021
2143
DT = DT_;
2144
+ PDT = PDT_;
2022
2145
MSSA = MSSA_;
2023
2146
MemorySSAUpdater MSSAU_ (MSSA_);
2024
2147
MSSAU = &MSSAU_;
0 commit comments