23
23
#include " llvm/Analysis/CFG.h"
24
24
#include " llvm/Analysis/CaptureTracking.h"
25
25
#include " llvm/Analysis/GlobalsModRef.h"
26
+ #include " llvm/Analysis/InstSimplifyFolder.h"
26
27
#include " llvm/Analysis/InstructionSimplify.h"
27
28
#include " llvm/Analysis/Loads.h"
28
29
#include " llvm/Analysis/MemoryLocation.h"
@@ -1431,6 +1432,28 @@ static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1431
1432
return false ;
1432
1433
}
1433
1434
1435
+ // If only the MemSrc instruction is known, a similar but slightly weaker
1436
+ // analysis can apply
1437
+ static bool anyOverreadUndefContents (MemorySSA *MSSA, Instruction *Store,
1438
+ BatchAAResults &BAA) {
1439
+ MemoryLocation Loc;
1440
+ Value *Ptr;
1441
+ if (auto SI = dyn_cast<StoreInst>(Store)) {
1442
+ Loc = MemoryLocation::get (SI);
1443
+ Ptr = SI->getPointerOperand ();
1444
+ } else if (auto MI = dyn_cast<MemCpyInst>(Store)) {
1445
+ Loc = MemoryLocation::getForDest (MI);
1446
+ Ptr = MI->getDest ();
1447
+ } else {
1448
+ llvm_unreachable (" performStackMoveOptzn must have a known store kind" );
1449
+ }
1450
+ MemoryAccess *MemAccess = MSSA->getMemoryAccess (Store)->getDefiningAccess ();
1451
+ if (hadUndefContentsBefore (MSSA, BAA, Ptr, MemAccess, Loc, nullptr ))
1452
+ return true ;
1453
+ return false ;
1454
+ }
1455
+
1456
+
1434
1457
// / Transform memcpy to memset when its source was just memset.
1435
1458
// / In other words, turn:
1436
1459
// / \code
@@ -1524,30 +1547,49 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1524
1547
return false ;
1525
1548
}
1526
1549
1527
- // Check that copy is full with static size.
1528
- const DataLayout &DL = DestAlloca->getDataLayout ();
1529
- std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1530
- if (!SrcSize || Size != *SrcSize) {
1531
- LLVM_DEBUG (dbgs () << " Stack Move: Source alloca size mismatch\n " );
1532
- return false ;
1533
- }
1534
- std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize (DL);
1535
- if (!DestSize || Size != *DestSize) {
1536
- LLVM_DEBUG (dbgs () << " Stack Move: Destination alloca size mismatch\n " );
1537
- return false ;
1538
- }
1539
-
1540
1550
if (!SrcAlloca->isStaticAlloca () || !DestAlloca->isStaticAlloca ())
1541
1551
return false ;
1542
1552
1553
+ Type *SrcType = SrcAlloca->getAllocatedType ();
1554
+ Type *DestType = DestAlloca->getAllocatedType ();
1555
+ // If they don't have common type, then they will need to be converted to a
1556
+ // common size at runtime
1557
+ const auto &DL = SrcAlloca->getDataLayout ();
1558
+ TypeSize SrcSize = DL.getTypeAllocSize (SrcType);
1559
+ TypeSize DestSize = DL.getTypeAllocSize (DestType);
1560
+ if (SrcType != DestType)
1561
+ if (SrcSize != DestSize)
1562
+ if (!SrcSize.isFixed () || !DestSize.isFixed ())
1563
+ return false ;
1564
+
1565
+ // Check that copy is full with dest size, either because it wrote every byte,
1566
+ // or it was fresh.
1567
+ std::optional<TypeSize> FullSize = DestAlloca->getAllocationSize (DL);
1568
+ if (!FullSize || Size != *FullSize)
1569
+ if (!anyOverreadUndefContents (MSSA, Store, BAA)) {
1570
+ LLVM_DEBUG (dbgs () << " Stack Move: Destination alloca size mismatch\n " );
1571
+ return false ;
1572
+ }
1573
+
1574
+ // Check if it will be legal to combine allocas without breaking dominator.
1575
+ // TODO: Try to hoist the arguments (recursively) instead of giving up
1576
+ // immediately.
1577
+ bool MoveSrc = !DT->dominates (SrcAlloca, DestAlloca);
1578
+ if (MoveSrc) {
1579
+ if (!DT->dominates (SrcAlloca->getArraySize (), DestAlloca))
1580
+ return false ;
1581
+ } else {
1582
+ if (!DT->dominates (DestAlloca->getArraySize (), SrcAlloca))
1583
+ return false ;
1584
+ }
1585
+
1543
1586
// Check that src and dest are never captured, unescaped allocas. Also
1544
1587
// find the nearest common dominator and postdominator for all users in
1545
1588
// order to shrink wrap the lifetimes, and instructions with noalias metadata
1546
1589
// to remove them.
1547
1590
1548
1591
SmallVector<Instruction *, 4 > LifetimeMarkers;
1549
1592
SmallSet<Instruction *, 4 > AAMetadataInstrs;
1550
- bool SrcNotDom = false ;
1551
1593
1552
1594
auto CaptureTrackingWithModRef =
1553
1595
[&](Instruction *AI,
@@ -1561,10 +1603,6 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1561
1603
Instruction *I = Worklist.pop_back_val ();
1562
1604
for (const Use &U : I->uses ()) {
1563
1605
auto *UI = cast<Instruction>(U.getUser ());
1564
- // If any use that isn't dominated by SrcAlloca exists, we move src
1565
- // alloca to the entry before the transformation.
1566
- if (!DT->dominates (SrcAlloca, UI))
1567
- SrcNotDom = true ;
1568
1606
1569
1607
if (Visited.size () >= MaxUsesToExplore) {
1570
1608
LLVM_DEBUG (
@@ -1678,15 +1716,43 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1678
1716
if (!CaptureTrackingWithModRef (SrcAlloca, SrcModRefCallback))
1679
1717
return false ;
1680
1718
1681
- // We can do the transformation. First, move the SrcAlloca to the start of the
1682
- // BB.
1683
- if (SrcNotDom)
1684
- SrcAlloca->moveBefore (*SrcAlloca->getParent (),
1685
- SrcAlloca->getParent ()->getFirstInsertionPt ());
1719
+ // We can now do the transformation. First move the Src if it was after Dest.
1720
+ if (MoveSrc)
1721
+ SrcAlloca->moveBefore (DestAlloca->getIterator ());
1722
+
1686
1723
// Align the allocas appropriately.
1687
1724
SrcAlloca->setAlignment (
1688
1725
std::max (SrcAlloca->getAlign (), DestAlloca->getAlign ()));
1689
1726
1727
+ // Size the allocas appropriately.
1728
+ Value *SrcArraySize = SrcAlloca->getArraySize ();
1729
+ Value *DestArraySize = DestAlloca->getArraySize ();
1730
+ IRBuilder<InstSimplifyFolder> Builder (SrcAlloca->getContext (),
1731
+ InstSimplifyFolder (DL));
1732
+ Builder.SetInsertPoint (SrcAlloca);
1733
+ Type *Int32Ty = Builder.getInt32Ty ();
1734
+ if (SrcType != DestType && SrcSize != DestSize) {
1735
+ SrcAlloca->setAllocatedType (Type::getInt8Ty (Load->getContext ()));
1736
+ if (SrcArraySize->getType () != Int32Ty)
1737
+ SrcArraySize = Builder.CreateZExtOrTrunc (SrcArraySize, Int32Ty);
1738
+ if (DestArraySize->getType () != Int32Ty)
1739
+ DestArraySize = Builder.CreateZExtOrTrunc (DestArraySize, Int32Ty);
1740
+ SrcArraySize = Builder.CreateMul (
1741
+ SrcArraySize, ConstantInt::get (Int32Ty, SrcSize.getFixedValue ()), " " ,
1742
+ true , true );
1743
+ DestArraySize = Builder.CreateMul (
1744
+ DestArraySize, ConstantInt::get (Int32Ty, DestSize.getFixedValue ()), " " ,
1745
+ true , true );
1746
+ }
1747
+ if (SrcArraySize != DestArraySize) {
1748
+ if (SrcArraySize->getType () != DestArraySize->getType ()) {
1749
+ SrcArraySize = Builder.CreateZExtOrTrunc (SrcArraySize, Int32Ty);
1750
+ DestArraySize = Builder.CreateZExtOrTrunc (DestArraySize, Int32Ty);
1751
+ }
1752
+ SrcAlloca->setOperand (0 , Builder.CreateBinaryIntrinsic (
1753
+ Intrinsic::umax, SrcArraySize, DestArraySize));
1754
+ }
1755
+
1690
1756
// Merge the two allocas.
1691
1757
DestAlloca->replaceAllUsesWith (SrcAlloca);
1692
1758
eraseInstruction (DestAlloca);
0 commit comments