Skip to content

Commit 6662fcc

Browse files
committed
[memcpyopt] allow some undef contents overread in processMemCpyMemCpyDependence
1 parent e64f8e0 commit 6662fcc

File tree

3 files changed

+97
-24
lines changed

3 files changed

+97
-24
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ struct MemsetRange {
107107

108108
} // end anonymous namespace
109109

110+
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
111+
MemIntrinsic *MemSrc, BatchAAResults &BAA);
112+
110113
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111114
// If we found more than 4 stores to merge or 16 bytes, use memset.
112115
if (TheStores.size() >= 4 || End - Start >= 16)
@@ -1129,14 +1132,28 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11291132
MForwardOffset = *Offset;
11301133
}
11311134

1132-
// The length of the memcpy's must be the same, or the preceding one
1133-
// must be larger than the following one.
1134-
if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
1135+
Value *CopyLength = M->getLength();
1136+
1137+
// The length of the memcpy's must be the same, or the preceding one must be
1138+
// larger than the following one, or the contents of the overread must be
1139+
// undefined bytes of a defined size.
1140+
if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
11351141
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1136-
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1137-
if (!MDepLen || !MLen ||
1138-
MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1139-
return false;
1142+
auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1143+
if (!MDepLen || !MLen)
1144+
return false; // This could be converted to a runtime test (%CopyLength =
1145+
// min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1146+
// unclear if that is useful
1147+
if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1148+
if (!overreadUndefContents(MSSA, M, MDep, BAA))
1149+
return false;
1150+
if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1151+
return false; // Should not reach here (there is obviously no aliasing
1152+
// with MDep), so just bail in case it had incomplete info
1153+
// somehow
1154+
CopyLength = ConstantInt::get(CopyLength->getType(),
1155+
MDepLen->getZExtValue() - MForwardOffset);
1156+
}
11401157
}
11411158

11421159
IRBuilder<> Builder(M);
@@ -1152,9 +1169,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
11521169
eraseInstruction(NewCopySource);
11531170
});
11541171
MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1155-
// We just need to calculate the actual size of the copy.
1156-
auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1157-
MemoryLocation::getForSource(M).Size);
1172+
auto MCopyLoc = MemoryLocation::getForSource(MDep);
1173+
// Truncate the size of the MDep access to just the bytes read
1174+
if (MDep->getLength() != CopyLength) {
1175+
auto ConstLength = cast<ConstantInt>(CopyLength);
1176+
MCopyLoc = MCopyLoc.getWithNewSize(
1177+
LocationSize::precise(ConstLength->getZExtValue()));
1178+
}
11581179

11591180
// When the forwarding offset is greater than 0, we transform
11601181
// memcpy(d1 <- s1)
@@ -1223,20 +1244,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
12231244
// example we could be moving from movaps -> movq on x86.
12241245
Instruction *NewM;
12251246
if (UseMemMove)
1226-
NewM =
1227-
Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1228-
CopySourceAlign, M->getLength(), M->isVolatile());
1247+
NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248+
CopySourceAlign, CopyLength, M->isVolatile());
12291249
else if (M->isForceInlined())
12301250
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
12311251
// never allowed since that would allow the latter to be lowered as a call
12321252
// to an external function.
12331253
NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1234-
CopySource, CopySourceAlign,
1235-
M->getLength(), M->isVolatile());
1254+
CopySource, CopySourceAlign, CopyLength,
1255+
M->isVolatile());
12361256
else
12371257
NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1238-
CopySourceAlign, M->getLength(),
1239-
M->isVolatile());
1258+
CopySourceAlign, CopyLength, M->isVolatile());
12401259

12411260
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12421261

llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,14 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) {
135135
}
136136

137137
; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on.
138-
define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) {
139-
; CHECK-LABEL: define void @do_not_forward_oversize_offset(
138+
define void @forward_oversize_offset(ptr %src, ptr %dest) {
139+
; CHECK-LABEL: define void @forward_oversize_offset(
140140
; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) {
141-
; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1
142-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false)
143-
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1
144-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false)
141+
; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1
142+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false)
143+
; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1
144+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
145+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false)
145146
; CHECK-NEXT: ret void
146147
;
147148
%cpy_tmp = alloca %buf, align 1
@@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) {
214215
ret void
215216
}
216217

218+
define void @over_offset_cpy(ptr %src) {
219+
; CHECK-LABEL: define void @over_offset_cpy(
220+
; CHECK-SAME: ptr [[SRC:%.*]]) {
221+
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 2, align 1
222+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 1, align 1
223+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false)
224+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
225+
; CHECK-NEXT: ret void
226+
;
227+
%tmp = alloca i8, i64 2
228+
%dst = alloca i8, i64 1
229+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false)
230+
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
231+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false)
232+
233+
ret void
234+
}
235+
217236
declare void @use(ptr)
218237

219238
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)

llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) {
1818
ret void
1919
}
2020

21-
; Differing sizes, so left as it is.
21+
define void @dynalloca_test(ptr %src, i64 %size1) {
22+
; CHECK-LABEL: @dynalloca_test(
23+
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
24+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
25+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
26+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false)
27+
; CHECK-NEXT: ret void
28+
;
29+
%tmp = alloca i8, i64 %size1
30+
%dst = alloca i8, i64 %size1
31+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
32+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false)
33+
34+
ret void
35+
}
36+
37+
define void @dynalloca_offset_test(ptr %src, i64 %size1) {
38+
; CHECK-LABEL: @dynalloca_offset_test(
39+
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
40+
; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1
41+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false)
42+
; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1
43+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
44+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false)
45+
; CHECK-NEXT: ret void
46+
;
47+
%tmp = alloca i8, i64 %size1
48+
%dst = alloca i8, i64 %size1
49+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false)
50+
%tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1
51+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false)
52+
53+
ret void
54+
}
55+
56+
; Dynamic sizes, so left as it is.
2257
define void @negative_test(ptr %src, i64 %size1, i64 %size2) {
2358
; CHECK-LABEL: @negative_test(
2459
; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1

0 commit comments

Comments
 (0)