Skip to content

Commit 26b2b0f

Browse files
vtjnashtomtor
authored andcommitted
[MemCpyOpt] handle memcpy from memset in more cases (llvm#140954)
This aims to reduce the divergence between the initial checks in this function and processMemCpyMemCpyDependence (in particular, adding handling of offsets), with the goal to eventually reduce duplication there and improve this pass in other ways.
1 parent 0506c6b commit 26b2b0f

File tree

5 files changed

+97
-31
lines changed

5 files changed

+97
-31
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,8 +1364,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
13641364
return true;
13651365
}
13661366

1367-
/// Determine whether the instruction has undefined content for the given Size,
1368-
/// either because it was freshly alloca'd or started its lifetime.
1367+
/// Determine whether the pointer V had only undefined content (due to Def) up
1368+
/// to the given Size, either because it was freshly alloca'd or started its
1369+
/// lifetime.
13691370
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
13701371
MemoryDef *Def, Value *Size) {
13711372
if (MSSA->isLiveOnEntryDef(Def))
@@ -1400,6 +1401,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14001401
return false;
14011402
}
14021403

1404+
// If the memcpy is larger than the previous, but the memory was undef prior to
1405+
// that, we can just ignore the tail. Technically we're only interested in the
1406+
// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407+
// as we can't easily represent this location (hasUndefContents uses mustAlias
1408+
// which cannot deal with offsets), we use the full 0..CopySize range.
1409+
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1410+
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1411+
Value *CopySize = MemCpy->getLength();
1412+
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1413+
MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1414+
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1415+
MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1416+
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1417+
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1418+
return true;
1419+
return false;
1420+
}
1421+
14031422
/// Transform memcpy to memset when its source was just memset.
14041423
/// In other words, turn:
14051424
/// \code
@@ -1415,19 +1434,25 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14151434
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14161435
MemSetInst *MemSet,
14171436
BatchAAResults &BAA) {
1418-
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1419-
// memcpying from the same address. Otherwise it is hard to reason about.
1420-
if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1421-
return false;
1422-
14231437
Value *MemSetSize = MemSet->getLength();
14241438
Value *CopySize = MemCpy->getLength();
14251439

1426-
if (MemSetSize != CopySize) {
1427-
// Make sure the memcpy doesn't read any more than what the memset wrote.
1428-
// Don't worry about sizes larger than i64.
1440+
int64_t MOffset = 0;
1441+
const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1442+
// We can only transforms memcpy's where the dest of one is the source of the
1443+
// other, or the memory transfer has a known offset from the memset.
1444+
if (MemCpy->getSource() != MemSet->getDest()) {
1445+
std::optional<int64_t> Offset =
1446+
MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1447+
if (!Offset || *Offset < 0)
1448+
return false;
1449+
MOffset = *Offset;
1450+
}
14291451

1430-
// A known memset size is required.
1452+
if (MOffset != 0 || MemSetSize != CopySize) {
1453+
// Make sure the memcpy doesn't read any more than what the memset wrote,
1454+
// other than undef. Don't worry about sizes larger than i64. A known memset
1455+
// size is required.
14311456
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
14321457
if (!CMemSetSize)
14331458
return false;
@@ -1436,23 +1461,18 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14361461
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
14371462
if (!CCopySize)
14381463
return false;
1439-
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1440-
// If the memcpy is larger than the memset, but the memory was undef prior
1441-
// to the memset, we can just ignore the tail. Technically we're only
1442-
// interested in the bytes from MemSetSize..CopySize here, but as we can't
1443-
// easily represent this location, we use the full 0..CopySize range.
1444-
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1445-
bool CanReduceSize = false;
1446-
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1447-
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1448-
MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1449-
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1450-
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1451-
CanReduceSize = true;
1452-
1453-
if (!CanReduceSize)
1464+
if (CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1465+
if (!overreadUndefContents(MSSA, MemCpy, MemSet, BAA))
14541466
return false;
1455-
CopySize = MemSetSize;
1467+
// Clip the memcpy to the bounds of the memset
1468+
if (MOffset == 0)
1469+
CopySize = MemSetSize;
1470+
else
1471+
CopySize =
1472+
ConstantInt::get(CopySize->getType(),
1473+
CMemSetSize->getZExtValue() <= (uint64_t)MOffset
1474+
? 0
1475+
: CMemSetSize->getZExtValue() - MOffset);
14561476
}
14571477
}
14581478

llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,53 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
187187
ret void
188188
}
189189

190+
define void @test_negative_offset_memset(ptr %result) {
191+
; CHECK-LABEL: @test_negative_offset_memset(
192+
; CHECK-NEXT: [[A1:%.*]] = alloca [16 x i8], align 8
193+
; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
194+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
195+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A1]], i64 12, i1 false)
196+
; CHECK-NEXT: ret void
197+
;
198+
%a = alloca [ 16 x i8 ], align 8
199+
%b = getelementptr i8, ptr %a, i32 4
200+
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
201+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
202+
ret void
203+
}
204+
205+
define void @test_offset_memsetcpy(ptr %result) {
206+
; CHECK-LABEL: @test_offset_memsetcpy(
207+
; CHECK-NEXT: [[A1:%.*]] = alloca [16 x i8], align 8
208+
; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
209+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
210+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
211+
; CHECK-NEXT: ret void
212+
;
213+
%a = alloca [ 16 x i8 ], align 8
214+
%b = getelementptr i8, ptr %a, i32 4
215+
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
216+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
217+
ret void
218+
}
219+
220+
define void @test_two_memset(ptr %result) {
221+
; CHECK-LABEL: @test_two_memset(
222+
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 8
223+
; CHECK-NEXT: [[B:%.*]] = getelementptr i8, ptr [[A]], i32 12
224+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
225+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
226+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
227+
; CHECK-NEXT: ret void
228+
;
229+
%a = alloca [ 16 x i8 ], align 8
230+
%b = getelementptr i8, ptr %a, i32 12
231+
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
232+
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
233+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
234+
ret void
235+
}
236+
190237
declare ptr @malloc(i64)
191238
declare void @free(ptr)
192239

llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,10 @@ define void @test_different_source_gep(ptr %dst1, ptr %dst2, i8 %c) {
7373
; CHECK-LABEL: @test_different_source_gep(
7474
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false)
7575
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, ptr [[DST1]], i64 64
76-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST2:%.*]], ptr [[P]], i64 64, i1 false)
76+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST2:%.*]], i8 [[C]], i64 64, i1 false)
7777
; CHECK-NEXT: ret void
7878
;
7979
call void @llvm.memset.p0.i64(ptr %dst1, i8 %c, i64 128, i1 false)
80-
; FIXME: We could optimize this as well.
8180
%p = getelementptr i8, ptr %dst1, i64 64
8281
call void @llvm.memcpy.p0.p0.i64(ptr %dst2, ptr %p, i64 64, i1 false)
8382
ret void

llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define i32 @foo(i1 %z) {
1919
; CHECK: for.body3.lr.ph:
2020
; CHECK-NEXT: br label [[FOR_INC7_1]]
2121
; CHECK: for.inc7.1:
22-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[SCEVGEP]], i64 4, i1 false)
22+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
2323
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
2424
; CHECK-NEXT: ret i32 [[TMP2]]
2525
;

llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define void @test(ptr %src, i8 %c, i64 %size) {
1818
ret void
1919
}
2020

21-
; Differing sizes, so left as it is.
21+
; Differing sizes, but would be UB if size1 < size2 since the memcpy would reference outside of the first alloca
2222
define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
2323
; CHECK-LABEL: @negative_test(
2424
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1

0 commit comments

Comments
 (0)