@@ -1366,56 +1366,68 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1366
1366
1367
1367
// / Determine whether the pointer V had only undefined content (due to Def) up
1368
1368
// / to the given Size, either because it was freshly alloca'd or started its
1369
- // / lifetime.
1370
- static bool hasUndefContents (MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1371
- MemoryDef *Def, Value *Size) {
1372
- if (MSSA->isLiveOnEntryDef (Def))
1373
- return isa<AllocaInst>(getUnderlyingObject (V));
1374
-
1375
- if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst ())) {
1376
- if (II->getIntrinsicID () == Intrinsic::lifetime_start) {
1377
- auto *LTSize = cast<ConstantInt>(II->getArgOperand (0 ));
1378
-
1379
- if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1380
- if (AA.isMustAlias (V, II->getArgOperand (1 )) &&
1381
- LTSize->getZExtValue () >= CSize->getZExtValue ())
1382
- return true ;
1383
- }
1369
+ // / lifetime by walking the MSSA graph.
1370
+ static bool hadUndefContentsBefore (MemorySSA *MSSA, BatchAAResults &BAA,
1371
+ Value *V, MemoryAccess *Clobber,
1372
+ MemoryLocation Loc, Value *Size) {
1373
+ while (1 ) {
1374
+ Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (Clobber, Loc, BAA);
1375
+ MemoryDef *Def = dyn_cast<MemoryDef>(Clobber);
1376
+ if (!Def)
1377
+ return false ;
1378
+
1379
+ if (MSSA->isLiveOnEntryDef (Def))
1380
+ return isa<AllocaInst>(getUnderlyingObject (V));
1381
+
1382
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst ())) {
1383
+ if (II->getIntrinsicID () == Intrinsic::lifetime_start) {
1384
+ auto *LTSize = cast<ConstantInt>(II->getArgOperand (0 ));
1384
1385
1385
- // If the lifetime.start covers a whole alloca (as it almost always
1386
- // does) and we're querying a pointer based on that alloca, then we know
1387
- // the memory is definitely undef, regardless of how exactly we alias.
1388
- // The size also doesn't matter, as an out-of-bounds access would be UB.
1389
- if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1390
- if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1391
- const DataLayout &DL = Alloca->getDataLayout ();
1392
- if (std::optional<TypeSize> AllocaSize =
1393
- Alloca->getAllocationSize (DL))
1394
- if (*AllocaSize == LTSize->getValue ())
1386
+ if (Size)
1387
+ if (auto CSize = dyn_cast<ConstantInt>(Size))
1388
+ if (BAA.isMustAlias (V, II->getArgOperand (1 )) &&
1389
+ LTSize->getZExtValue () >= CSize->getZExtValue ())
1395
1390
return true ;
1391
+
1392
+ // If the lifetime.start covers a whole alloca (as it almost always
1393
+ // does) and we're querying a pointer based on that alloca, then we know
1394
+ // the memory is definitely undef, regardless of how exactly we alias.
1395
+ // The size also doesn't matter, as an out-of-bounds access would be UB.
1396
+ if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject (V))) {
1397
+ if (getUnderlyingObject (II->getArgOperand (1 )) == Alloca) {
1398
+ const DataLayout &DL = Alloca->getDataLayout ();
1399
+ if (std::optional<TypeSize> AllocaSize =
1400
+ Alloca->getAllocationSize (DL))
1401
+ if (*AllocaSize == LTSize->getValue ())
1402
+ return true ;
1403
+ }
1396
1404
}
1405
+ Clobber = Def->getDefiningAccess ();
1406
+ continue ;
1407
+ } else if (II->getIntrinsicID () == Intrinsic::lifetime_end) {
1408
+ Clobber = Def->getDefiningAccess ();
1409
+ continue ;
1397
1410
}
1398
1411
}
1399
- }
1400
1412
1401
- return false ;
1413
+ return false ;
1414
+ }
1402
1415
}
1403
1416
1404
1417
// If the memcpy is larger than the previous, but the memory was undef prior to
1405
1418
// that, we can just ignore the tail. Technically we're only interested in the
1406
1419
// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407
- // as we can't easily represent this location (hasUndefContents uses mustAlias
1408
- // which cannot deal with offsets), we use the full 0..CopySize range.
1420
+ // as we can't easily represent this location (hadUndefContentsBefore uses
1421
+ // mustAlias which cannot deal with offsets), we use the full 0..CopySize range.
1409
1422
static bool overreadUndefContents (MemorySSA *MSSA, MemCpyInst *MemCpy,
1410
1423
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1411
1424
Value *CopySize = MemCpy->getLength ();
1412
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource (MemCpy);
1413
- MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess (MemSrc);
1414
- MemoryAccess *Clobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1415
- MemSrcAccess->getDefiningAccess (), MemCpyLoc, BAA);
1416
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1417
- if (hasUndefContents (MSSA, BAA, MemCpy->getSource (), MD, CopySize))
1418
- return true ;
1425
+ MemoryLocation LoadLoc = MemoryLocation::getForSource (MemCpy);
1426
+ MemoryAccess *MemSrcAccess =
1427
+ MSSA->getMemoryAccess (MemSrc)->getDefiningAccess ();
1428
+ if (hadUndefContentsBefore (MSSA, BAA, MemCpy->getSource (), MemSrcAccess,
1429
+ LoadLoc, CopySize))
1430
+ return true ;
1419
1431
return false ;
1420
1432
}
1421
1433
@@ -1573,11 +1585,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1573
1585
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1574
1586
// practically fill all the bytes of the alloca with an undefined
1575
1587
// value, although conceptually marked as alive/dead.
1576
- int64_t Size = cast<ConstantInt>(UI->getOperand (0 ))->getSExtValue ();
1577
- if (Size < 0 || Size == DestSize) {
1578
- LifetimeMarkers.push_back (UI);
1579
- continue ;
1580
- }
1588
+ // We don't currently track GEP offsets and sizes, so we don't have
1589
+ // a way to check whether this lifetime marker affects the relevant
1590
+ // memory regions.
1591
+ // While we only really need to delete lifetime.end from Src and
1592
+ // lifetime.begin from Dst, those are often implied by the memcpy
1593
+ // anyways so hopefully not much is lost by removing all of them.
1594
+ LifetimeMarkers.push_back (UI);
1595
+ continue ;
1581
1596
}
1582
1597
AAMetadataInstrs.insert (UI);
1583
1598
@@ -1594,9 +1609,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1594
1609
return true ;
1595
1610
};
1596
1611
1597
- // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1598
- // size lifetime intrinsics. And collect modref inst for the reachability
1599
- // check.
1612
+ // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1613
+ // modref inst for the reachability check.
1600
1614
ModRefInfo DestModRef = ModRefInfo::NoModRef;
1601
1615
MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1602
1616
SmallVector<BasicBlock *, 8 > ReachabilityWorklist;
@@ -1779,8 +1793,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1779
1793
if (processMemSetMemCpyDependence (M, MDep, BAA))
1780
1794
return true ;
1781
1795
1782
- MemoryAccess *SrcClobber = MSSA->getWalker ()->getClobberingMemoryAccess (
1783
- AnyClobber, MemoryLocation::getForSource (M), BAA);
1796
+ MemoryLocation SrcLoc = MemoryLocation::getForSource (M);
1797
+ MemoryAccess *SrcClobber =
1798
+ MSSA->getWalker ()->getClobberingMemoryAccess (AnyClobber, SrcLoc, BAA);
1784
1799
1785
1800
// There are five possible optimizations we can do for memcpy:
1786
1801
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -1820,7 +1835,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1820
1835
}
1821
1836
}
1822
1837
1823
- if (hasUndefContents (MSSA, BAA, M->getSource (), MD, M->getLength ())) {
1838
+ if (hadUndefContentsBefore (MSSA, BAA, M->getSource (), AnyClobber, SrcLoc,
1839
+ M->getLength ())) {
1824
1840
LLVM_DEBUG (dbgs () << " Removed memcpy from undef\n " );
1825
1841
eraseInstruction (M);
1826
1842
++NumMemCpyInstr;
0 commit comments