Skip to content

[MemCpyOpt] allow some undef contents overread in processMemCpyMemCpyDependence #143745

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ struct MemsetRange {

} // end anonymous namespace

static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
MemIntrinsic *MemSrc, BatchAAResults &BAA);

bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End - Start >= 16)
Expand Down Expand Up @@ -1129,14 +1132,29 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MForwardOffset = *Offset;
}

// The length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
Value *CopyLength = M->getLength();

// The length of the memcpy's must be the same, or the preceding one must be
// larger than the following one, or the contents of the overread must be
// undefined bytes of a defined size.
if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
if (!MDepLen || !MLen ||
MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
auto *MLen = dyn_cast<ConstantInt>(CopyLength);
// This could be converted to a runtime test (%CopyLength =
// min(max(0, MDepLen - MForwardOffset), MLen)), but it is
// unclear if that is useful
if (!MDepLen || !MLen)
return false;
if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
if (!overreadUndefContents(MSSA, M, MDep, BAA))
return false;
if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
return false; // Should not reach here (there is obviously no aliasing
// with MDep), so just bail in case it had incomplete info
// somehow
CopyLength = ConstantInt::get(CopyLength->getType(),
MDepLen->getZExtValue() - MForwardOffset);
}
}

IRBuilder<> Builder(M);
Expand All @@ -1152,9 +1170,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
eraseInstruction(NewCopySource);
});
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
// We just need to calculate the actual size of the copy.
auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
MemoryLocation::getForSource(M).Size);
auto MCopyLoc = MemoryLocation::getForSource(MDep);
// Truncate the size of the MDep access to just the bytes read
if (MDep->getLength() != CopyLength) {
auto *ConstLength = cast<ConstantInt>(CopyLength);
MCopyLoc = MCopyLoc.getWithNewSize(
LocationSize::precise(ConstLength->getZExtValue()));
}

// When the forwarding offset is greater than 0, we transform
// memcpy(d1 <- s1)
Expand Down Expand Up @@ -1223,20 +1245,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// example we could be moving from movaps -> movq on x86.
Instruction *NewM;
if (UseMemMove)
NewM =
Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
CopySourceAlign, M->getLength(), M->isVolatile());
NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
CopySourceAlign, CopyLength, M->isVolatile());
else if (M->isForceInlined())
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
// never allowed since that would allow the latter to be lowered as a call
// to an external function.
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
CopySource, CopySourceAlign,
M->getLength(), M->isVolatile());
CopySource, CopySourceAlign, CopyLength,
M->isVolatile());
else
NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
CopySourceAlign, M->getLength(),
M->isVolatile());
CopySourceAlign, CopyLength, M->isVolatile());

NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);

Expand Down
33 changes: 26 additions & 7 deletions llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,15 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
ret void
}

; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @do_not_forward_oversize_offset(
; We can forward `memcpy` by shrinking it to the size of the `memcpy` it depends on.
define void @forward_oversize_offset(ptr %src, ptr %dest) {
; CHECK-LABEL: define void @forward_oversize_offset(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
; CHECK-NEXT: ret void
;
%cpy_tmp = alloca %buf, align 1
Expand Down Expand Up @@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
ret void
}

define void @over_offset_cpy(ptr %src) {
; CHECK-LABEL: define void @over_offset_cpy(
; CHECK-SAME: ptr [[SRC:%.*]]) {
; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i8], align 1
; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
; CHECK-NEXT: ret void
;
%tmp = alloca [2 x i8]
%dst = alloca i8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)

ret void
}

declare void @use(ptr)

declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
Expand Down
37 changes: 36 additions & 1 deletion llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) {
ret void
}

; Differing sizes, so left as it is.
define void @dynalloca_test(ptr %src, i64 %size1) {
; CHECK-LABEL: @dynalloca_test(
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false)
; CHECK-NEXT: ret void
;
%tmp = alloca i8, i64 %size1
%dst = alloca i8, i64 %size1
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false)

ret void
}

define void @dynalloca_offset_test(ptr %src, i64 %size1) {
; CHECK-LABEL: @dynalloca_offset_test(
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false)
; CHECK-NEXT: ret void
;
%tmp = alloca i8, i64 %size1
%dst = alloca i8, i64 %size1
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false)

ret void
}

; Dynamic sizes, so left as it is.
define void @negative_test(ptr %src, i64 %size1, i64 %size2) {
; CHECK-LABEL: @negative_test(
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
Expand Down