19
19
#include " llvm/ADT/iterator_range.h"
20
20
#include " llvm/Analysis/AliasAnalysis.h"
21
21
#include " llvm/Analysis/AssumptionCache.h"
22
- #include " llvm/Analysis/CFG.h"
23
22
#include " llvm/Analysis/CaptureTracking.h"
24
23
#include " llvm/Analysis/GlobalsModRef.h"
25
24
#include " llvm/Analysis/Loads.h"
26
25
#include " llvm/Analysis/MemoryLocation.h"
27
26
#include " llvm/Analysis/MemorySSA.h"
28
27
#include " llvm/Analysis/MemorySSAUpdater.h"
29
- #include " llvm/Analysis/PostDominators.h"
30
28
#include " llvm/Analysis/TargetLibraryInfo.h"
31
29
#include " llvm/Analysis/ValueTracking.h"
32
30
#include " llvm/IR/BasicBlock.h"
@@ -1417,74 +1415,6 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1417
1415
return true ;
1418
1416
}
1419
1417
1420
- using InsertionPt = PointerUnion<Instruction *, BasicBlock *>;
1421
- // / Find the nearest Instruction or BasicBlock that dominates both I1 and
1422
- // / I2.
1423
- static InsertionPt findNearestCommonDominator (InsertionPt I1, InsertionPt I2,
1424
- DominatorTree *DT) {
1425
- auto GetParent = [](InsertionPt I) {
1426
- if (auto *BB = dyn_cast<BasicBlock *>(I))
1427
- return BB;
1428
- return cast<Instruction *>(I)->getParent ();
1429
- };
1430
- BasicBlock *BB1 = GetParent (I1);
1431
- BasicBlock *BB2 = GetParent (I2);
1432
- if (BB1 == BB2) {
1433
- // BasicBlock InsertionPt means the terminator.
1434
- if (isa<BasicBlock *>(I1))
1435
- return I2;
1436
- if (isa<BasicBlock *>(I2))
1437
- return I1;
1438
- return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I1
1439
- : I2;
1440
- }
1441
-
1442
- // These checks are necessary, because findNearestCommonDominator for NodeT
1443
- // doesn't handle these.
1444
- if (!DT->isReachableFromEntry (BB2))
1445
- return I1;
1446
- if (!DT->isReachableFromEntry (BB1))
1447
- return I2;
1448
-
1449
- BasicBlock *DomBB = DT->findNearestCommonDominator (BB1, BB2);
1450
- if (BB2 == DomBB)
1451
- return I2;
1452
- if (BB1 == DomBB)
1453
- return I1;
1454
- return DomBB;
1455
- }
1456
-
1457
- // / Find the nearest Instruction or BasicBlock that post-dominates both I1 and
1458
- // / I2.
1459
- static InsertionPt findNearestCommonPostDominator (InsertionPt I1,
1460
- InsertionPt I2,
1461
- PostDominatorTree *PDT) {
1462
- auto GetParent = [](InsertionPt I) {
1463
- if (auto *BB = dyn_cast<BasicBlock *>(I))
1464
- return BB;
1465
- return cast<Instruction *>(I)->getParent ();
1466
- };
1467
- BasicBlock *BB1 = GetParent (I1);
1468
- BasicBlock *BB2 = GetParent (I2);
1469
- if (BB1 == BB2) {
1470
- // BasicBlock InsertionPt means the first non-phi instruction.
1471
- if (isa<BasicBlock *>(I1))
1472
- return I2;
1473
- if (isa<BasicBlock *>(I2))
1474
- return I1;
1475
- return cast<Instruction *>(I1)->comesBefore (cast<Instruction *>(I2)) ? I2
1476
- : I1;
1477
- }
1478
- BasicBlock *PDomBB = PDT->findNearestCommonDominator (BB1, BB2);
1479
- if (!PDomBB)
1480
- return nullptr ;
1481
- if (BB2 == PDomBB)
1482
- return I2;
1483
- if (BB1 == PDomBB)
1484
- return I1;
1485
- return PDomBB;
1486
- }
1487
-
1488
1418
// Attempts to optimize the pattern whereby memory is copied from an alloca to
1489
1419
// another alloca, where the two allocas don't have conflicting mod/ref. If
1490
1420
// successful, the two allocas can be merged into one and the transfer can be
@@ -1510,7 +1440,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1510
1440
return false ;
1511
1441
}
1512
1442
1513
- // Check that copy is full with static size.
1443
+ // 1. Check that copy is full. Calculate the static size of the allocas to be
1444
+ // merged, bail out if we can't.
1514
1445
const DataLayout &DL = DestAlloca->getModule ()->getDataLayout ();
1515
1446
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1516
1447
if (!SrcSize || SrcSize->isScalable () || Size != SrcSize->getFixedValue ()) {
@@ -1524,16 +1455,19 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1524
1455
return false ;
1525
1456
}
1526
1457
1527
- if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca ())
1458
+ // 2-1. Check that src and dest are static allocas, which are not affected by
1459
+ // stacksave/stackrestore.
1460
+ if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca () ||
1461
+ SrcAlloca->getParent () != Load->getParent () ||
1462
+ SrcAlloca->getParent () != Store->getParent ())
1528
1463
return false ;
1529
1464
1530
- // Check that src and dest are never captured, unescaped allocas. Also
1531
- // find the nearest common dominator and postdominator for all users in
1532
- // order to shrink wrap the lifetimes, and instructions with noalias metadata
1533
- // to remove them.
1465
+ // 2-2. Check that src and dest are never captured, unescaped allocas. Also
1466
+ // collect lifetime markers first/last users in order to shrink wrap the
1467
+ // lifetimes, and instructions with noalias metadata to remove them.
1534
1468
1535
1469
SmallVector<Instruction *, 4 > LifetimeMarkers;
1536
- InsertionPt Dom = nullptr , PDom = nullptr ;
1470
+ Instruction *FirstUser = nullptr , *LastUser = nullptr ;
1537
1471
SmallSet<Instruction *, 4 > NoAliasInstrs;
1538
1472
1539
1473
// Recursively track the user and check whether modified alias exist.
@@ -1571,13 +1505,12 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1571
1505
continue ;
1572
1506
case UseCaptureKind::NO_CAPTURE: {
1573
1507
auto *UI = cast<Instruction>(U.getUser ());
1574
- if (!Dom) {
1575
- PDom = Dom = UI;
1576
- } else {
1577
- Dom = findNearestCommonDominator (Dom, UI, DT);
1578
- if (PDom)
1579
- PDom = findNearestCommonPostDominator (PDom, UI, PDT);
1580
- }
1508
+ if (DestAlloca->getParent () != UI->getParent ())
1509
+ return false ;
1510
+ if (!FirstUser || UI->comesBefore (FirstUser))
1511
+ FirstUser = UI;
1512
+ if (!LastUser || LastUser->comesBefore (UI))
1513
+ LastUser = UI;
1581
1514
if (UI->isLifetimeStartOrEnd ()) {
1582
1515
// We note the locations of these intrinsic calls so that we can
1583
1516
// delete them later if the optimization succeeds, this is safe
@@ -1601,64 +1534,37 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1601
1534
return true ;
1602
1535
};
1603
1536
1604
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1605
- // size lifetime intrinsics. And collect modref inst for the reachability
1606
- // check.
1537
+ // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
1538
+ // from the alloca to the Store.
1607
1539
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1608
1540
MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1609
- SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
1610
1541
auto DestModRefCallback = [&](Instruction *UI) -> bool {
1611
1542
// We don't care about the store itself.
1612
1543
if (UI == Store)
1613
1544
return true ;
1614
1545
ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1615
- DestModRef |= Res;
1616
- if (isModOrRefSet (Res)) {
1617
- // Instructions reachability checks.
1618
- // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1619
- // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1620
- if (UI->getParent () == Store->getParent ()) {
1621
- // The same block case is special because it's the only time we're
1622
- // looking within a single block to see which instruction comes first.
1623
- // Once we start looking at multiple blocks, the first instruction of
1624
- // the block is reachable, so we only need to determine reachability
1625
- // between whole blocks.
1626
- BasicBlock *BB = UI->getParent ();
1627
-
1628
- // If A comes before B, then B is definitively reachable from A.
1629
- if (UI->comesBefore (Store))
1630
- return false ;
1631
-
1632
- // If the user's parent block is entry, no predecessor exists.
1633
- if (BB->isEntryBlock ())
1634
- return true ;
1546
+ // FIXME: For multi-BB cases, we need to see reachability from it to
1547
+ // store.
1548
+ // Bailout if Dest may have any ModRef before Store.
1549
+ if (UI->comesBefore (Store) && isModOrRefSet (Res))
1550
+ return false ;
1551
+ DestModRef |= BAA.getModRefInfo (UI, DestLoc);
1635
1552
1636
- // Otherwise, continue doing the normal per-BB CFG walk.
1637
- ReachabilityWorklist.append (succ_begin (BB), succ_end (BB));
1638
- } else {
1639
- ReachabilityWorklist.push_back (UI->getParent ());
1640
- }
1641
- }
1642
1553
return true ;
1643
1554
};
1644
1555
1645
1556
if (!CaptureTrackingWithModRef (DestAlloca, DestModRefCallback))
1646
1557
return false ;
1647
- // Bailout if Dest may have any ModRef before Store.
1648
- if (!ReachabilityWorklist.empty () &&
1649
- isPotentiallyReachableFromMany (ReachabilityWorklist, Store->getParent (),
1650
- nullptr , DT, nullptr ))
1651
- return false ;
1652
1558
1653
- // Check that, from after the Load to the end of the BB,
1654
- // - if the dest has any Mod, src has no Ref, and
1655
- // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1559
+ // 3. Check that, from after the Load to the end of the BB,
1560
+ // 3-1. if the dest has any Mod, src has no Ref, and
1561
+ // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
1656
1562
MemoryLocation SrcLoc (SrcAlloca, LocationSize::precise (Size));
1657
1563
1658
1564
auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1659
- // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1660
- // themselves can be ignored.
1661
- if (PDT-> dominates (Load, UI ) || UI == Load || UI == Store)
1565
+ // Any ModRef before Load doesn't matter, also Load and Store can be
1566
+ // ignored.
1567
+ if (UI-> comesBefore (Load) || UI == Load || UI == Store)
1662
1568
return true ;
1663
1569
ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1664
1570
if ((isModSet (DestModRef) && isRefSet (Res)) ||
@@ -1690,48 +1596,22 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1690
1596
ConstantInt *AllocaSize = ConstantInt::get (Type::getInt64Ty (C), Size);
1691
1597
// Create a new lifetime start marker before the first user of src or alloca
1692
1598
// users.
1693
- MemoryAccess *StartMA;
1694
- if (auto *DomI = dyn_cast_if_present<Instruction *>(Dom)) {
1695
- Builder.SetInsertPoint (DomI->getParent (), DomI->getIterator ());
1696
- auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1697
- StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr ,
1698
- MSSA->getMemoryAccess (DomI));
1699
- } else {
1700
- auto *DomB = cast<BasicBlock *>(Dom);
1701
- Builder.SetInsertPoint (DomB->getTerminator ());
1702
- auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1703
- StartMA = MSSAU->createMemoryAccessInBB (
1704
- Start, nullptr , Start->getParent (), MemorySSA::BeforeTerminator);
1705
- }
1599
+ Builder.SetInsertPoint (FirstUser->getParent (), FirstUser->getIterator ());
1600
+ auto *Start = Builder.CreateLifetimeStart (SrcAlloca, AllocaSize);
1601
+ auto *FirstMA = MSSA->getMemoryAccess (FirstUser);
1602
+ auto *StartMA = MSSAU->createMemoryAccessBefore (Start, nullptr , FirstMA);
1706
1603
MSSAU->insertDef (cast<MemoryDef>(StartMA), /* RenameUses=*/ true );
1707
1604
1708
1605
// Create a new lifetime end marker after the last user of src or alloca
1709
- // users. If there's no such postdominator, just don't bother; we could
1710
- // create one at each exit block, but that'd be essentially semantically
1711
- // meaningless.
1712
- // If the PDom is the terminator (e.g. invoke), see the next immediate post
1713
- // dominator.
1714
- if (auto *PDomI = dyn_cast_if_present<Instruction *>(PDom);
1715
- PDomI && PDomI->isTerminator ()) {
1716
- auto *IPDomNode = (*PDT)[PDomI->getParent ()]->getIDom ();
1717
- PDom = IPDomNode ? IPDomNode->getBlock () : nullptr ;
1718
- }
1719
- if (PDom) {
1720
- MemoryAccess *EndMA;
1721
- if (auto *PDomI = dyn_cast<Instruction *>(PDom)) {
1722
- // If PDom is Instruction ptr, insert after it, because it's a user of
1723
- // SrcAlloca.
1724
- Builder.SetInsertPoint (PDomI->getParent (), ++PDomI->getIterator ());
1725
- auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1726
- EndMA = MSSAU->createMemoryAccessAfter (End, nullptr ,
1727
- MSSA->getMemoryAccess (PDomI));
1728
- } else {
1729
- auto *PDomB = cast<BasicBlock *>(PDom);
1730
- Builder.SetInsertPoint (PDomB, PDomB->getFirstInsertionPt ());
1731
- auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1732
- EndMA = MSSAU->createMemoryAccessInBB (End, nullptr , End->getParent (),
1733
- MemorySSA::Beginning);
1734
- }
1606
+ // users.
1607
+ // FIXME: If the last user is the terminator for the bb, we can insert
1608
+ // lifetime.end marker to the immidiate post-dominator, but currently do
1609
+ // nothing.
1610
+ if (!LastUser->isTerminator ()) {
1611
+ Builder.SetInsertPoint (LastUser->getParent (), ++LastUser->getIterator ());
1612
+ auto *End = Builder.CreateLifetimeEnd (SrcAlloca, AllocaSize);
1613
+ auto *LastMA = MSSA->getMemoryAccess (LastUser);
1614
+ auto *EndMA = MSSAU->createMemoryAccessAfter (End, nullptr , LastMA);
1735
1615
MSSAU->insertDef (cast<MemoryDef>(EndMA), /* RenameUses=*/ true );
1736
1616
}
1737
1617
@@ -2119,10 +1999,9 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
2119
1999
auto *AA = &AM.getResult <AAManager>(F);
2120
2000
auto *AC = &AM.getResult <AssumptionAnalysis>(F);
2121
2001
auto *DT = &AM.getResult <DominatorTreeAnalysis>(F);
2122
- auto *PDT = &AM.getResult <PostDominatorTreeAnalysis>(F);
2123
2002
auto *MSSA = &AM.getResult <MemorySSAAnalysis>(F);
2124
2003
2125
- bool MadeChange = runImpl (F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA ());
2004
+ bool MadeChange = runImpl (F, &TLI, AA, AC, DT, &MSSA->getMSSA ());
2126
2005
if (!MadeChange)
2127
2006
return PreservedAnalyses::all ();
2128
2007
@@ -2134,14 +2013,12 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
2134
2013
2135
2014
bool MemCpyOptPass::runImpl (Function &F, TargetLibraryInfo *TLI_,
2136
2015
AliasAnalysis *AA_, AssumptionCache *AC_,
2137
- DominatorTree *DT_, PostDominatorTree *PDT_,
2138
- MemorySSA *MSSA_) {
2016
+ DominatorTree *DT_, MemorySSA *MSSA_) {
2139
2017
bool MadeChange = false ;
2140
2018
TLI = TLI_;
2141
2019
AA = AA_;
2142
2020
AC = AC_;
2143
2021
DT = DT_;
2144
- PDT = PDT_;
2145
2022
MSSA = MSSA_;
2146
2023
MemorySSAUpdater MSSAU_ (MSSA_);
2147
2024
MSSAU = &MSSAU_;
0 commit comments