Skip to content

Commit c04fc55

Browse files
authored
[MemCpyOpt] allow some undef contents overread in processMemCpyMemCpyDependence (#143745)
Allows memcpy to memcpy forwarding in cases where the second memcpy is larger, but the overread is known to be undef, by shrinking the memcpy size. Refs #140954 which laid some of the groundwork for this.
1 parent fb06519 commit c04fc55

File tree

3 files changed

+98
-24
lines changed

3 files changed

+98
-24
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ struct MemsetRange {
107107

108108
} // end anonymous namespace
109109

110+
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
111+
MemIntrinsic *MemSrc, BatchAAResults &BAA);
112+
110113
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111114
// If we found more than 4 stores to merge or 16 bytes, use memset.
112115
if (TheStores.size() >= 4 || End - Start >= 16)
@@ -1129,14 +1132,29 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11291132
MForwardOffset = *Offset;
11301133
}
11311134

1132-
// The length of the memcpy's must be the same, or the preceding one
1133-
// must be larger than the following one.
1134-
if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
1135+
Value *CopyLength = M->getLength();
1136+
1137+
// The length of the memcpy's must be the same, or the preceding one must be
1138+
// larger than the following one, or the contents of the overread must be
1139+
// undefined bytes of a defined size.
1140+
if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
11351141
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1136-
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1137-
if (!MDepLen || !MLen ||
1138-
MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1142+
auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1143+
// This could be converted to a runtime test (%CopyLength =
1144+
// min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1145+
// unclear if that is useful
1146+
if (!MDepLen || !MLen)
11391147
return false;
1148+
if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1149+
if (!overreadUndefContents(MSSA, M, MDep, BAA))
1150+
return false;
1151+
if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1152+
return false; // Should not reach here (there is obviously no aliasing
1153+
// with MDep), so just bail in case it had incomplete info
1154+
// somehow
1155+
CopyLength = ConstantInt::get(CopyLength->getType(),
1156+
MDepLen->getZExtValue() - MForwardOffset);
1157+
}
11401158
}
11411159

11421160
IRBuilder<> Builder(M);
@@ -1152,9 +1170,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11521170
eraseInstruction(NewCopySource);
11531171
});
11541172
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1155-
// We just need to calculate the actual size of the copy.
1156-
auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1157-
MemoryLocation::getForSource(M).Size);
1173+
auto MCopyLoc = MemoryLocation::getForSource(MDep);
1174+
// Truncate the size of the MDep access to just the bytes read
1175+
if (MDep->getLength() != CopyLength) {
1176+
auto *ConstLength = cast<ConstantInt>(CopyLength);
1177+
MCopyLoc = MCopyLoc.getWithNewSize(
1178+
LocationSize::precise(ConstLength->getZExtValue()));
1179+
}
11581180

11591181
// When the forwarding offset is greater than 0, we transform
11601182
// memcpy(d1 <- s1)
@@ -1223,20 +1245,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
12231245
// example we could be moving from movaps -> movq on x86.
12241246
Instruction *NewM;
12251247
if (UseMemMove)
1226-
NewM =
1227-
Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1228-
CopySourceAlign, M->getLength(), M->isVolatile());
1248+
NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1249+
CopySourceAlign, CopyLength, M->isVolatile());
12291250
else if (M->isForceInlined())
12301251
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
12311252
// never allowed since that would allow the latter to be lowered as a call
12321253
// to an external function.
12331254
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1234-
CopySource, CopySourceAlign,
1235-
M->getLength(), M->isVolatile());
1255+
CopySource, CopySourceAlign, CopyLength,
1256+
M->isVolatile());
12361257
else
12371258
NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1238-
CopySourceAlign, M->getLength(),
1239-
M->isVolatile());
1259+
CopySourceAlign, CopyLength, M->isVolatile());
12401260

12411261
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12421262

llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,15 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
134134
ret void
135135
}
136136

137-
; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
138-
define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
139-
; CHECK-LABEL: define void @do_not_forward_oversize_offset(
137+
; We can forward `memcpy` by shrinking it to the size of the `memcpy` it depends on.
138+
define void @forward_oversize_offset(ptr %src, ptr %dest) {
139+
; CHECK-LABEL: define void @forward_oversize_offset(
140140
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
141-
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
142-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
143-
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
144-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
141+
; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
142+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
143+
; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
144+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
145+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
145146
; CHECK-NEXT: ret void
146147
;
147148
%cpy_tmp = alloca %buf, align 1
@@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
214215
ret void
215216
}
216217

218+
define void @over_offset_cpy(ptr %src) {
219+
; CHECK-LABEL: define void @over_offset_cpy(
220+
; CHECK-SAME: ptr [[SRC:%.*]]) {
221+
; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i8], align 1
222+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1
223+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
224+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
225+
; CHECK-NEXT: ret void
226+
;
227+
%tmp = alloca [2 x i8]
228+
%dst = alloca i8
229+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
230+
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
231+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)
232+
233+
ret void
234+
}
235+
217236
declare void @use(ptr)
218237

219238
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)

llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) {
1818
ret void
1919
}
2020

21-
; Differing sizes, so left as it is.
21+
define void @dynalloca_test(ptr %src, i64 %size1) {
22+
; CHECK-LABEL: @dynalloca_test(
23+
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
24+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
25+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
26+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false)
27+
; CHECK-NEXT: ret void
28+
;
29+
%tmp = alloca i8, i64 %size1
30+
%dst = alloca i8, i64 %size1
31+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
32+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false)
33+
34+
ret void
35+
}
36+
37+
define void @dynalloca_offset_test(ptr %src, i64 %size1) {
38+
; CHECK-LABEL: @dynalloca_offset_test(
39+
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
40+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
41+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
42+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
43+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
44+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false)
45+
; CHECK-NEXT: ret void
46+
;
47+
%tmp = alloca i8, i64 %size1
48+
%dst = alloca i8, i64 %size1
49+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
50+
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
51+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false)
52+
53+
ret void
54+
}
55+
56+
; Dynamic sizes, so left as it is.
2257
define void @negative_test(ptr %src, i64 %size1, i64 %size2) {
2358
; CHECK-LABEL: @negative_test(
2459
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1

0 commit comments

Comments
 (0)