@@ -1366,56 +1366,65 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1366
1366
1367
1367
// / Determine whether the pointer V had only undefined content (due to Def) up
1368
1368
// / to the given Size, either because it was freshly alloca'd or started its
1369
- // / lifetime.
1370
- static bool hasUndefContents (MemorySSA *MSSA, BatchAAResults &AA , Value *V,
1371
- MemoryDef *Def , Value *Size) {
1372
- if (MSSA-> isLiveOnEntryDef (Def))
1373
- return isa<AllocaInst>( getUnderlyingObject (V) );
1374
-
1375
- if ( auto *II = dyn_cast_or_null<IntrinsicInst>( Def-> getMemoryInst ())) {
1376
- if (II-> getIntrinsicID () == Intrinsic::lifetime_start) {
1377
- auto *LTSize = cast<ConstantInt>(II-> getArgOperand ( 0 ));
1378
-
1379
- if ( auto *CSize = dyn_cast<ConstantInt>(Size)) {
1380
- if (AA. isMustAlias (V, II-> getArgOperand ( 1 )) &&
1381
- LTSize-> getZExtValue () >= CSize-> getZExtValue ())
1382
- return true ;
1383
- }
1369
+ // / lifetime by walking the MSSA graph .
1370
+ static bool hadUndefContentsBefore (MemorySSA *MSSA, BatchAAResults &BAA , Value *V,
1371
+ MemoryAccess *Clobber, MemoryLocation Loc , Value *Size) {
1372
+ while ( 1 ) {
1373
+ Clobber = MSSA-> getWalker ()-> getClobberingMemoryAccess (Clobber, Loc, BAA );
1374
+ MemoryDef *Def = dyn_cast<MemoryDef>(Clobber);
1375
+ if (! Def)
1376
+ return false ;
1377
+
1378
+ if (MSSA-> isLiveOnEntryDef (Def))
1379
+ return isa<AllocaInst>( getUnderlyingObject (V));
1380
+
1381
+ if ( auto *II = dyn_cast_or_null<IntrinsicInst>(Def-> getMemoryInst ())) {
1382
+ if (II-> getIntrinsicID () == Intrinsic::lifetime_start) {
1383
+ auto *LTSize = cast<ConstantInt>(II-> getArgOperand ( 0 ));
1384
1384
1385
- // If the lifetime.start covers a whole alloca (as it almost always
1386
- // does) and we're querying a pointer based on that alloca, then we know
1387
- // the memory is definitely undef, regardless of how exactly we alias.
1388
- // The size also doesn't matter, as an out-of-bounds access would be UB.
1389
- if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1390
- if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1391
- const DataLayout &DL = Alloca->getDataLayout ();
1392
- if (std::optional<TypeSize> AllocaSize =
1393
- Alloca->getAllocationSize (DL))
1394
- if (*AllocaSize == LTSize->getValue ())
1385
+ if (Size)
1386
+ if (auto CSize = dyn_cast<ConstantInt>(Size))
1387
+ if (BAA.isMustAlias (V, II->getArgOperand (1 )) &&
1388
+ LTSize->getZExtValue () >= CSize->getZExtValue ())
1395
1389
return true ;
1390
+
1391
+ // If the lifetime.start covers a whole alloca (as it almost always
1392
+ // does) and we're querying a pointer based on that alloca, then we know
1393
+ // the memory is definitely undef, regardless of how exactly we alias.
1394
+ // The size also doesn't matter, as an out-of-bounds access would be UB.
1395
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1396
+ if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1397
+ const DataLayout &DL = Alloca->getDataLayout ();
1398
+ if (std::optional<TypeSize> AllocaSize =
1399
+ Alloca->getAllocationSize (DL))
1400
+ if (*AllocaSize == LTSize->getValue ())
1401
+ return true ;
1402
+ }
1396
1403
}
1404
+ Clobber = Def->getDefiningAccess ();
1405
+ continue ;
1406
+ } else if (II->getIntrinsicID () == Intrinsic::lifetime_end) {
1407
+ Clobber = Def->getDefiningAccess ();
1408
+ continue ;
1397
1409
}
1398
1410
}
1399
- }
1400
1411
1401
- return false ;
1412
+ return false ;
1413
+ }
1402
1414
}
1403
1415
1404
1416
// If the memcpy is larger than the previous, but the memory was undef prior to
1405
1417
// that, we can just ignore the tail. Technically we're only interested in the
1406
1418
// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407
- // as we can't easily represent this location (hasUndefContents uses mustAlias
1419
+ // as we can't easily represent this location (hadUndefContentsBefore uses mustAlias
1408
1420
// which cannot deal with offsets), we use the full 0..CopySize range.
1409
1421
static bool overreadUndefContents (MemorySSA *MSSA, MemCpyInst *MemCpy,
1410
1422
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1411
1423
Value *CopySize = MemCpy->getLength ();
1412
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1413
- MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess (MemSrc);
1414
- MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1415
- MemSrcAccess->getDefiningAccess (), MemCpyLoc, BAA);
1416
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1417
- if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1418
- return true ;
1424
+ MemoryLocation LoadLoc = MemoryLocation::getForSource (MemCpy);
1425
+ MemoryAccess *MemSrcAccess = MSSA->getMemoryAccess (MemSrc)->getDefiningAccess ();
1426
+ if (hadUndefContentsBefore (MSSA, BAA, MemCpy->getSource (), MemSrcAccess, LoadLoc, CopySize))
1427
+ return true ;
1419
1428
return false ;
1420
1429
}
1421
1430
@@ -1573,11 +1582,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1573
1582
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1574
1583
// practically fill all the bytes of the alloca with an undefined
1575
1584
// value, although conceptually marked as alive/dead.
1576
- int64_t Size = cast<ConstantInt>(UI->getOperand (0 ))->getSExtValue ();
1577
- if (Size < 0 || Size == DestSize) {
1578
- LifetimeMarkers.push_back (UI);
1579
- continue ;
1580
- }
1585
+ // We don't currently track GEP offsets and sizes, so we don't have
1586
+ // a way to check whether this lifetime marker affects the relevant
1587
+ // memory regions.
1588
+ // While we only really need to delete lifetime.end from Src and
1589
+ // lifetime.begin from Dst, those are often implied by the memcpy
1590
+ // anyways so hopefully not much is lost by removing all of them.
1591
+ LifetimeMarkers.push_back (UI);
1592
+ continue ;
1581
1593
}
1582
1594
AAMetadataInstrs.insert (UI);
1583
1595
@@ -1594,9 +1606,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1594
1606
return true ;
1595
1607
};
1596
1608
1597
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1598
- // size lifetime intrinsics. And collect modref inst for the reachability
1599
- // check.
1609
+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1610
+ // modref inst for the reachability check.
1600
1611
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1601
1612
MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1602
1613
SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
@@ -1779,8 +1790,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1779
1790
if (processMemSetMemCpyDependence (M, MDep, BAA))
1780
1791
return true ;
1781
1792
1793
+ MemoryLocation SrcLoc = MemoryLocation::getForSource (M);
1782
1794
MemoryAccess *SrcClobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1783
- AnyClobber, MemoryLocation::getForSource (M) , BAA);
1795
+ AnyClobber, SrcLoc , BAA);
1784
1796
1785
1797
// There are five possible optimizations we can do for memcpy:
1786
1798
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -1820,7 +1832,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1820
1832
}
1821
1833
}
1822
1834
1823
- if (hasUndefContents (MSSA, BAA, M->getSource (), MD , M->getLength ())) {
1835
+ if (hadUndefContentsBefore (MSSA, BAA, M->getSource (), AnyClobber, SrcLoc , M->getLength ())) {
1824
1836
LLVM_DEBUG (dbgs () << " Removed memcpy from undef\n " );
1825
1837
eraseInstruction (M);
1826
1838
++NumMemCpyInstr;
0 commit comments