Skip to content

Commit b1ec0fc

Browse files
committed
[memcpyopt] fix incorrect handling of lifetime markers
Having lifetime markers should only increase the information available to LLVM, but it would instead rely on the callback to entirely give up if it encountered a lifetime marker that wasn't full size, but sub-optimal lifetime markers are not supposed to be forbidding optimizations that would otherwise apply if they were either absent or optimal. This pass wasn't tracking GEP offsets either, so it wasn't quite correctly handled either, although earlier sub-optimal checks that this size is the same as the alloca test made this safe in the past, and unlikely to have encountered anything else in the past.
1 parent c19e900 commit b1ec0fc

File tree

3 files changed

+67
-60
lines changed

3 files changed

+67
-60
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 63 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,56 +1366,68 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
13661366

13671367
/// Determine whether the pointer V had only undefined content (due to Def) up
13681368
/// to the given Size, either because it was freshly alloca'd or started its
1369-
/// lifetime.
1370-
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1371-
MemoryDef *Def, Value *Size) {
1372-
if (MSSA->isLiveOnEntryDef(Def))
1373-
return isa<AllocaInst>(getUnderlyingObject(V));
1374-
1375-
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1376-
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1377-
auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1378-
1379-
if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1380-
if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1381-
LTSize->getZExtValue() >= CSize->getZExtValue())
1382-
return true;
1383-
}
1369+
/// lifetime by walking the MSSA graph.
1370+
static bool hadUndefContentsBefore(MemorySSA *MSSA, BatchAAResults &BAA,
1371+
Value *V, MemoryAccess *Clobber,
1372+
MemoryLocation Loc, Value *Size) {
1373+
while (1) {
1374+
Clobber = MSSA->getWalker()->getClobberingMemoryAccess(Clobber, Loc, BAA);
1375+
MemoryDef *Def = dyn_cast<MemoryDef>(Clobber);
1376+
if (!Def)
1377+
return false;
1378+
1379+
if (MSSA->isLiveOnEntryDef(Def))
1380+
return isa<AllocaInst>(getUnderlyingObject(V));
1381+
1382+
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1383+
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1384+
auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
13841385

1385-
// If the lifetime.start covers a whole alloca (as it almost always
1386-
// does) and we're querying a pointer based on that alloca, then we know
1387-
// the memory is definitely undef, regardless of how exactly we alias.
1388-
// The size also doesn't matter, as an out-of-bounds access would be UB.
1389-
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1390-
if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1391-
const DataLayout &DL = Alloca->getDataLayout();
1392-
if (std::optional<TypeSize> AllocaSize =
1393-
Alloca->getAllocationSize(DL))
1394-
if (*AllocaSize == LTSize->getValue())
1386+
if (Size)
1387+
if (auto CSize = dyn_cast<ConstantInt>(Size))
1388+
if (BAA.isMustAlias(V, II->getArgOperand(1)) &&
1389+
LTSize->getZExtValue() >= CSize->getZExtValue())
13951390
return true;
1391+
1392+
// If the lifetime.start covers a whole alloca (as it almost always
1393+
// does) and we're querying a pointer based on that alloca, then we know
1394+
// the memory is definitely undef, regardless of how exactly we alias.
1395+
// The size also doesn't matter, as an out-of-bounds access would be UB.
1396+
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1397+
if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1398+
const DataLayout &DL = Alloca->getDataLayout();
1399+
if (std::optional<TypeSize> AllocaSize =
1400+
Alloca->getAllocationSize(DL))
1401+
if (*AllocaSize == LTSize->getValue())
1402+
return true;
1403+
}
13961404
}
1405+
Clobber = Def->getDefiningAccess();
1406+
continue;
1407+
} else if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
1408+
Clobber = Def->getDefiningAccess();
1409+
continue;
13971410
}
13981411
}
1399-
}
14001412

1401-
return false;
1413+
return false;
1414+
}
14021415
}
14031416

14041417
// If the memcpy is larger than the previous, but the memory was undef prior to
14051418
// that, we can just ignore the tail. Technically we're only interested in the
14061419
// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407-
// as we can't easily represent this location (hasUndefContents uses mustAlias
1408-
// which cannot deal with offsets), we use the full 0..CopySize range.
1420+
// as we can't easily represent this location (hadUndefContentsBefore uses
1421+
// mustAlias which cannot deal with offsets), we use the full 0..CopySize range.
14091422
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
14101423
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
14111424
Value *CopySize = MemCpy->getLength();
1412-
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1413-
MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1414-
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1415-
MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1416-
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1417-
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1418-
return true;
1425+
MemoryLocation LoadLoc = MemoryLocation::getForSource(MemCpy);
1426+
MemoryAccess *MemSrcAccess =
1427+
MSSA->getMemoryAccess(MemSrc)->getDefiningAccess();
1428+
if (hadUndefContentsBefore(MSSA, BAA, MemCpy->getSource(), MemSrcAccess,
1429+
LoadLoc, CopySize))
1430+
return true;
14191431
return false;
14201432
}
14211433

@@ -1573,11 +1585,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15731585
// since both llvm.lifetime.start and llvm.lifetime.end intrinsics
15741586
// practically fill all the bytes of the alloca with an undefined
15751587
// value, although conceptually marked as alive/dead.
1576-
int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1577-
if (Size < 0 || Size == DestSize) {
1578-
LifetimeMarkers.push_back(UI);
1579-
continue;
1580-
}
1588+
// We don't currently track GEP offsets and sizes, so we don't have
1589+
// a way to check whether this lifetime marker affects the relevant
1590+
// memory regions.
1591+
// While we only really need to delete lifetime.end from Src and
1592+
// lifetime.begin from Dst, those are often implied by the memcpy
1593+
// anyways so hopefully not much is lost by removing all of them.
1594+
LifetimeMarkers.push_back(UI);
1595+
continue;
15811596
}
15821597
AAMetadataInstrs.insert(UI);
15831598

@@ -1594,9 +1609,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15941609
return true;
15951610
};
15961611

1597-
// Check that dest has no Mod/Ref, from the alloca to the Store, except full
1598-
// size lifetime intrinsics. And collect modref inst for the reachability
1599-
// check.
1612+
// Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1613+
// modref inst for the reachability check.
16001614
ModRefInfo DestModRef = ModRefInfo::NoModRef;
16011615
MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
16021616
SmallVector<BasicBlock *, 8> ReachabilityWorklist;
@@ -1779,8 +1793,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17791793
if (processMemSetMemCpyDependence(M, MDep, BAA))
17801794
return true;
17811795

1782-
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1783-
AnyClobber, MemoryLocation::getForSource(M), BAA);
1796+
MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
1797+
MemoryAccess *SrcClobber =
1798+
MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, SrcLoc, BAA);
17841799

17851800
// There are five possible optimizations we can do for memcpy:
17861801
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -1820,7 +1835,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
18201835
}
18211836
}
18221837

1823-
if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1838+
if (hadUndefContentsBefore(MSSA, BAA, M->getSource(), AnyClobber, SrcLoc,
1839+
M->getLength())) {
18241840
LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
18251841
eraseInstruction(M);
18261842
++NumMemCpyInstr;

llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ define void @test_lifetime_partial_alias_3(ptr noalias %dst) {
9696
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1
9797
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[A]])
9898
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
99-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[GEP]], i64 4, i1 false)
10099
; CHECK-NEXT: ret void
101100
;
102101
%a = alloca [16 x i8]
@@ -112,7 +111,6 @@ define void @test_lifetime_partial_alias_4(ptr noalias %dst) {
112111
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1
113112
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[A]])
114113
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 8
115-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[GEP]], i64 8, i1 false)
116114
; CHECK-NEXT: ret void
117115
;
118116
%a = alloca [16 x i8]

llvm/test/Transforms/MemCpyOpt/stack-move.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,20 +1649,13 @@ loop_exit:
16491649
ret void
16501650
}
16511651

1652-
; Tests that failure because partial-sized lifetimes are counted as mod.
1652+
; Tests that partial-sized lifetimes are not inhibiting the optimizer
16531653
define void @partial_lifetime() {
16541654
; CHECK-LABEL: define void @partial_lifetime() {
1655-
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
1656-
; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
1657-
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr captures(none) [[SRC]])
1658-
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 3, ptr captures(none) [[DEST]])
1659-
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
1660-
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[SRC]])
1661-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
1662-
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 3, ptr captures(none) [[SRC]])
1655+
; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
1656+
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[DEST]], align 4
1657+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
16631658
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr captures(none) [[DEST]])
1664-
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[SRC]])
1665-
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr captures(none) [[DEST]])
16661659
; CHECK-NEXT: ret void
16671660
;
16681661
%src = alloca %struct.Foo, align 4

0 commit comments

Comments
 (0)