Skip to content

Commit 0d828e6

Browse files
committed
[memcpyopt] handle memcpy from memset in more cases
This aims to reduce the divergence between this function and processMemCpyMemCpyDependence, with the goal to eventually reduce duplication here and combine them and improve this pass.
1 parent 2f66e5f commit 0d828e6

File tree

6 files changed

+124
-41
lines changed

6 files changed

+124
-41
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 67 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,8 +1367,9 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
13671367
return true;
13681368
}
13691369

1370-
/// Determine whether the instruction has undefined content for the given Size,
1371-
/// either because it was freshly alloca'd or started its lifetime.
1370+
/// Determine whether the pointer V had only undefined content from Def up to
1371+
/// the given Size, either because it was freshly alloca'd or started its
1372+
/// lifetime.
13721373
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
13731374
MemoryDef *Def, Value *Size) {
13741375
if (MSSA->isLiveOnEntryDef(Def))
@@ -1403,6 +1404,24 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14031404
return false;
14041405
}
14051406

1407+
static bool coversInputFully(MemorySSA *MSSA, MemCpyInst *MemCpy,
1408+
MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1409+
// If the memcpy is larger than the previous, but the memory was undef prior
1410+
// to that, we can just ignore the tail. Technically we're only
1411+
// interested in the bytes from 0..MemSrcOffset and
1412+
// MemSrcLength+MemSrcOffset..CopySize here, but as we can't easily represent
1413+
// this location, we use the full 0..CopySize range.
1414+
Value *CopySize = MemCpy->getLength();
1415+
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1416+
MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1417+
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1418+
MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1419+
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1420+
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1421+
return true;
1422+
return false;
1423+
}
1424+
14061425
/// Transform memcpy to memset when its source was just memset.
14071426
/// In other words, turn:
14081427
/// \code
@@ -1418,51 +1437,63 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
14181437
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14191438
MemSetInst *MemSet,
14201439
BatchAAResults &BAA) {
1421-
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1422-
// memcpying from the same address. Otherwise it is hard to reason about.
1423-
if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1424-
return false;
1425-
14261440
Value *MemSetSize = MemSet->getLength();
14271441
Value *CopySize = MemCpy->getLength();
14281442

1429-
if (MemSetSize != CopySize) {
1430-
// Make sure the memcpy doesn't read any more than what the memset wrote.
1431-
// Don't worry about sizes larger than i64.
1432-
1433-
// A known memset size is required.
1434-
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1435-
if (!CMemSetSize)
1443+
int64_t MOffset = 0;
1444+
const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1445+
// We can only transforms memcpy's where the dest of one is the source of the
1446+
// other, or they have a known offset.
1447+
if (MemCpy->getSource() != MemSet->getDest()) {
1448+
std::optional<int64_t> Offset =
1449+
MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1450+
if (!Offset)
14361451
return false;
1452+
MOffset = *Offset;
1453+
}
14371454

1438-
// A known memcpy size is also required.
1455+
MaybeAlign MDestAlign = MemCpy->getDestAlign();
1456+
int64_t MOffsetAligned = MDestAlign.valueOrOne().value() > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne().value() - 1)) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
1457+
if (MOffset != 0 || MemSetSize != CopySize) {
1458+
// Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
1459+
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
14391460
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1440-
if (!CCopySize)
1441-
return false;
1442-
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1443-
// If the memcpy is larger than the memset, but the memory was undef prior
1444-
// to the memset, we can just ignore the tail. Technically we're only
1445-
// interested in the bytes from MemSetSize..CopySize here, but as we can't
1446-
// easily represent this location, we use the full 0..CopySize range.
1447-
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1448-
bool CanReduceSize = false;
1449-
MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1450-
MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1451-
MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1452-
if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1453-
if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1454-
CanReduceSize = true;
1455-
1456-
if (!CanReduceSize)
1461+
// Don't worry about sizes larger than i64.
1462+
if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1463+
CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1464+
if (!coversInputFully(MSSA, MemCpy, MemSet, BAA))
14571465
return false;
1458-
CopySize = MemSetSize;
1466+
1467+
if (CMemSetSize && CCopySize) {
1468+
// If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
1469+
if (CCopySize->getZExtValue() + std::abs(MOffset) > CMemSetSize->getZExtValue()) {
1470+
if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()))
1471+
CopySize = MemSetSize;
1472+
else
1473+
CopySize = ConstantInt::get(CopySize->getType(), std::max((int64_t)0, (int64_t)(CMemSetSize->getZExtValue() - std::abs(MOffsetAligned))));
1474+
}
1475+
else if (MOffsetAligned < 0) {
1476+
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
1477+
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
1478+
}
1479+
}
1480+
else if (CCopySize && MOffsetAligned < 0) {
1481+
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
1482+
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
1483+
}
1484+
else {
1485+
MOffsetAligned = 0;
1486+
}
14591487
}
14601488
}
14611489

14621490
IRBuilder<> Builder(MemCpy);
1491+
Value *MDest = MemCpy->getRawDest();
1492+
if (MOffsetAligned < 0)
1493+
MDest = Builder.CreateInBoundsPtrAdd(MDest, Builder.getInt64(-MOffsetAligned));
14631494
Instruction *NewM =
1464-
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1465-
CopySize, MemCpy->getDestAlign());
1495+
Builder.CreateMemSet(MDest, MemSet->getOperand(1),
1496+
CopySize, MDestAlign);
14661497
auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
14671498
auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
14681499
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
@@ -1683,7 +1714,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
16831714
I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
16841715
}
16851716

1686-
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1717+
LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
16871718
NumStackMove++;
16881719
return true;
16891720
}

llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,12 @@ define void @test() {
1414
; CHECK-LABEL: define void @test() {
1515
; CHECK-NEXT: entry:
1616
; CHECK-NEXT: [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
17-
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
17+
; CHECK-NEXT: [[AGG_TMP_SROA_15:%.*]] = alloca [20 x i8], align 4
18+
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_15]], i64 4
1819
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
20+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
21+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[AGG_TMP_SROA_14]], i64 4
22+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP0]], i8 0, i64 1, i1 false)
1923
; CHECK-NEXT: [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
2024
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
2125
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false)

llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,54 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
187187
ret void
188188
}
189189

190+
define void @test_offset_memset(ptr %result) {
191+
; CHECK-LABEL: @test_offset_memset(
192+
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
193+
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
194+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
195+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[RESULT:%.*]], i64 4
196+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP1]], i8 0, i64 8, i1 false)
197+
; CHECK-NEXT: ret void
198+
;
199+
%a = alloca [ 4 x i32 ], align 8
200+
%b = getelementptr i32, ptr %a, i32 1
201+
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
202+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
203+
ret void
204+
}
205+
206+
define void @test_offset_memsetcpy(ptr %result) {
207+
; CHECK-LABEL: @test_offset_memsetcpy(
208+
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
209+
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
210+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
211+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
212+
; CHECK-NEXT: ret void
213+
;
214+
%a = alloca [ 4 x i32 ], align 8
215+
%b = getelementptr i32, ptr %a, i32 1
216+
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
217+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
218+
ret void
219+
}
220+
221+
define void @test_two_memset(ptr %result) {
222+
; CHECK-LABEL: @test_two_memset(
223+
; CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 8
224+
; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 3
225+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
226+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
227+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
228+
; CHECK-NEXT: ret void
229+
;
230+
%a = alloca [ 4 x i32 ], align 8
231+
%b = getelementptr i32, ptr %a, i32 3
232+
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
233+
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
234+
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)
235+
ret void
236+
}
237+
190238
declare ptr @malloc(i64)
191239
declare void @free(ptr)
192240

llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define void @test_different_source_gep(ptr %dst1, ptr %dst2, i8 %c) {
7373
; CHECK-LABEL: @test_different_source_gep(
7474
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false)
7575
; CHECK-NEXT: [[P:%.*]] = getelementptr i8, ptr [[DST1]], i64 64
76-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST2:%.*]], ptr [[P]], i64 64, i1 false)
76+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST2:%.*]], i8 [[C]], i64 64, i1 false)
7777
; CHECK-NEXT: ret void
7878
;
7979
call void @llvm.memset.p0.i64(ptr %dst1, i8 %c, i64 128, i1 false)

llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define i32 @foo(i1 %z) {
1919
; CHECK: for.body3.lr.ph:
2020
; CHECK-NEXT: br label [[FOR_INC7_1]]
2121
; CHECK: for.inc7.1:
22-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[SCEVGEP]], i64 4, i1 false)
22+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A]], i8 0, i64 4, i1 false)
2323
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4
2424
; CHECK-NEXT: ret i32 [[TMP2]]
2525
;

llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ define void @test(ptr %src, i8 %c, i64 %size) {
1818
ret void
1919
}
2020

21-
; Differing sizes, so left as it is.
21+
; Differing sizes, but would be UB if size1 > size2
2222
define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
2323
; CHECK-LABEL: @negative_test(
2424
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
2525
; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
2626
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
27-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[DST1]], i64 [[SIZE2]], i1 false)
27+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST2]], i8 [[C]], i64 [[SIZE2]], i1 false)
2828
; CHECK-NEXT: ret void
2929
;
3030
%dst1 = alloca i8, i64 %size1

0 commit comments

Comments
 (0)