-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MemCpyOpt] allow some undef contents overread in processMemCpyMemCpyDependence #143745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Jameson Nash (vtjnash) ChangesRefs #140954 which laid some of the groundwork for this. Full diff: https://github.com/llvm/llvm-project/pull/143745.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 960001bf880c6..8dd8a7dce614c 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -107,6 +107,9 @@ struct MemsetRange {
} // end anonymous namespace
+static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
+ MemIntrinsic *MemSrc, BatchAAResults &BAA);
+
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End - Start >= 16)
@@ -1129,14 +1132,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MForwardOffset = *Offset;
}
- // The length of the memcpy's must be the same, or the preceding one
- // must be larger than the following one.
- if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
+ Value *CopyLength = M->getLength();
+
+ // The length of the memcpy's must be the same, or the preceding one must be
+ // larger than the following one, or the contents of the overread must be
+ // undefined bytes of a defined size.
+ if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
- auto *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen ||
- MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
- return false;
+ auto *MLen = dyn_cast<ConstantInt>(CopyLength);
+ if (!MDepLen || !MLen)
+ return false; // This could be converted to a runtime test (%CopyLength =
+ // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
+ // unclear if that is useful
+ if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
+ if (!overreadUndefContents(MSSA, M, MDep, BAA))
+ return false;
+ if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
+ return false; // Should not reach here (there is obviously no aliasing
+ // with MDep), so just bail in case it had incomplete info
+ // somehow
+ CopyLength = ConstantInt::get(CopyLength->getType(),
+ MDepLen->getZExtValue() - MForwardOffset);
+ }
}
IRBuilder<> Builder(M);
@@ -1152,9 +1169,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
eraseInstruction(NewCopySource);
});
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
- // We just need to calculate the actual size of the copy.
- auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
- MemoryLocation::getForSource(M).Size);
+ auto MCopyLoc = MemoryLocation::getForSource(MDep);
+ // Truncate the size of the MDep access to just the bytes read
+ if (MDep->getLength() != CopyLength) {
+ auto ConstLength = cast<ConstantInt>(CopyLength);
+ MCopyLoc = MCopyLoc.getWithNewSize(
+ LocationSize::precise(ConstLength->getZExtValue()));
+ }
// When the forwarding offset is greater than 0, we transform
// memcpy(d1 <- s1)
@@ -1223,20 +1244,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// example we could be moving from movaps -> movq on x86.
Instruction *NewM;
if (UseMemMove)
- NewM =
- Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
- CopySourceAlign, M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, CopyLength, M->isVolatile());
else if (M->isForceInlined())
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
// never allowed since that would allow the latter to be lowered as a call
// to an external function.
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
- CopySource, CopySourceAlign,
- M->getLength(), M->isVolatile());
+ CopySource, CopySourceAlign, CopyLength,
+ M->isVolatile());
else
NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
- CopySourceAlign, M->getLength(),
- M->isVolatile());
+ CopySourceAlign, CopyLength, M->isVolatile());
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
index da654438d7bd6..c5406c5c64340 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll
@@ -135,13 +135,14 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
}
; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
-define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
-; CHECK-LABEL: define void @do_not_forward_oversize_offset(
+define void @forward_oversize_offset(ptr %src, ptr %dest) {
+; CHECK-LABEL: define void @forward_oversize_offset(
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
-; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
-; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
+; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
+; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
; CHECK-NEXT: ret void
;
%cpy_tmp = alloca %buf, align 1
@@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
ret void
}
+define void @over_offset_cpy(ptr %src) {
+; CHECK-LABEL: define void @over_offset_cpy(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 2, align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 1, align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
+; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 2
+ %dst = alloca i8, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
+ %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)
+
+ ret void
+}
+
declare void @use(ptr)
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
index 4f6b734ec057d..95402a8ea686c 100644
--- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll
@@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) {
ret void
}
-; Differing sizes, so left as it is.
+define void @dynalloca_test(ptr %src, i64 %size1) {
+; CHECK-LABEL: @dynalloca_test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size1
+ %dst = alloca i8, i64 %size1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false)
+
+ ret void
+}
+
+define void @dynalloca_offset_test(ptr %src, i64 %size1) {
+; CHECK-LABEL: @dynalloca_offset_test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
+; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size1
+ %dst = alloca i8, i64 %size1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
+ %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false)
+
+ ret void
+}
+
+; Dynamic sizes, so left as it is.
define void @negative_test(ptr %src, i64 %size1, i64 %size2) {
; CHECK-LABEL: @negative_test(
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
|
6662fcc
to
645f247
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Looks like you pushed the wrong changes to this branch? |
645f247
to
244e3c9
Compare
Oof, too many branches going on here trying to make each changes separate. Original commit is restored now |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
9da6c4e
to
72002f6
Compare
Allows memcpy to memcpy forwarding in cases where the second memcpy is larger, but the overread is known to be undef, by shrinking the memcpy size.
Refs #140954 which laid some of the groundwork for this.