Skip to content

Commit bb5fdef

Browse files
committed
remove handling of undef input ranges (incl offsets < 0)
1 parent 0d828e6 commit bb5fdef

File tree

4 files changed

+39
-55
lines changed

4 files changed

+39
-55
lines changed

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,57 +1443,46 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
14431443
int64_t MOffset = 0;
14441444
const DataLayout &DL = MemCpy->getModule()->getDataLayout();
14451445
// We can only transforms memcpy's where the dest of one is the source of the
1446-
// other, or they have a known offset.
1446+
// other, or the memory transfer has a known offset from the memset.
14471447
if (MemCpy->getSource() != MemSet->getDest()) {
14481448
std::optional<int64_t> Offset =
14491449
MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1450-
if (!Offset)
1450+
if (!Offset || *Offset < 0)
14511451
return false;
14521452
MOffset = *Offset;
14531453
}
14541454

14551455
MaybeAlign MDestAlign = MemCpy->getDestAlign();
1456-
int64_t MOffsetAligned = MDestAlign.valueOrOne().value() > 1 && MOffset < 0 ? -(-MOffset & ~(MDestAlign.valueOrOne().value() - 1)) : MOffset; // Compute the MOffset that keeps MDest aligned (truncate towards zero)
14571456
if (MOffset != 0 || MemSetSize != CopySize) {
1458-
// Make sure the memcpy doesn't read any more than what the memset wrote, other than undef.
1457+
// Make sure the memcpy doesn't read any more than what the memset wrote,
1458+
// other than undef. Don't worry about sizes larger than i64. A known memset
1459+
// size is required.
14591460
auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1461+
if (!CMemSetSize)
1462+
return false;
1463+
// A known memcpy size is required.
14601464
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1461-
// Don't worry about sizes larger than i64.
1462-
if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1463-
CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1465+
if (!CCopySize)
1466+
return false;
1467+
if (CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
14641468
if (!coversInputFully(MSSA, MemCpy, MemSet, BAA))
14651469
return false;
1466-
1467-
if (CMemSetSize && CCopySize) {
1468-
// If both have constant sizes and offsets, clip the memcpy to the bounds of the memset if applicable.
1469-
if (CCopySize->getZExtValue() + std::abs(MOffset) > CMemSetSize->getZExtValue()) {
1470-
if (MOffsetAligned == 0 || (MOffset < 0 && CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()))
1471-
CopySize = MemSetSize;
1472-
else
1473-
CopySize = ConstantInt::get(CopySize->getType(), std::max((int64_t)0, (int64_t)(CMemSetSize->getZExtValue() - std::abs(MOffsetAligned))));
1474-
}
1475-
else if (MOffsetAligned < 0) {
1476-
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, make sure to clip the new memset
1477-
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
1478-
}
1479-
}
1480-
else if (CCopySize && MOffsetAligned < 0) {
1481-
// Even if CMemSetSize isn't known, if the MOffsetAligned is negative, can still clip the new memset
1482-
CopySize = ConstantInt::get(CopySize->getType(), CCopySize->getZExtValue() + MOffsetAligned);
1483-
}
1484-
else {
1485-
MOffsetAligned = 0;
1486-
}
1470+
// Clip the memcpy to the bounds of the memset
1471+
if (MOffset == 0)
1472+
CopySize = MemSetSize;
1473+
else
1474+
CopySize =
1475+
ConstantInt::get(CopySize->getType(),
1476+
CMemSetSize->getZExtValue() <= (uint64_t)MOffset
1477+
? 0
1478+
: CMemSetSize->getZExtValue() - MOffset);
14871479
}
14881480
}
14891481

14901482
IRBuilder<> Builder(MemCpy);
14911483
Value *MDest = MemCpy->getRawDest();
1492-
if (MOffsetAligned < 0)
1493-
MDest = Builder.CreateInBoundsPtrAdd(MDest, Builder.getInt64(-MOffsetAligned));
14941484
Instruction *NewM =
1495-
Builder.CreateMemSet(MDest, MemSet->getOperand(1),
1496-
CopySize, MDestAlign);
1485+
Builder.CreateMemSet(MDest, MemSet->getOperand(1), CopySize, MDestAlign);
14971486
auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
14981487
auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
14991488
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);

llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ define void @test() {
1414
; CHECK-LABEL: define void @test() {
1515
; CHECK-NEXT: entry:
1616
; CHECK-NEXT: [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4
17-
; CHECK-NEXT: [[AGG_TMP_SROA_15:%.*]] = alloca [20 x i8], align 4
18-
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_15]], i64 4
17+
; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
1918
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false)
20-
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]])
21-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[AGG_TMP_SROA_14]], i64 4
22-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP0]], i8 0, i64 1, i1 false)
2319
; CHECK-NEXT: [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4
2420
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false)
2521
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false)

llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -187,48 +187,47 @@ define void @test_write_before_memset_in_both_regions(ptr %result) {
187187
ret void
188188
}
189189

190-
define void @test_offset_memset(ptr %result) {
191-
; CHECK-LABEL: @test_offset_memset(
192-
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
193-
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
190+
define void @test_negative_offset_memset(ptr %result) {
191+
; CHECK-LABEL: @test_negative_offset_memset(
192+
; CHECK-NEXT: [[A1:%.*]] = alloca [16 x i8], align 8
193+
; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
194194
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
195-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[RESULT:%.*]], i64 4
196-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[TMP1]], i8 0, i64 8, i1 false)
195+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A1]], i64 12, i1 false)
197196
; CHECK-NEXT: ret void
198197
;
199-
%a = alloca [ 4 x i32 ], align 8
200-
%b = getelementptr i32, ptr %a, i32 1
198+
%a = alloca [ 16 x i8 ], align 8
199+
%b = getelementptr i8, ptr %a, i32 4
201200
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 0, i64 12, i1 false)
202201
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 12, i1 false)
203202
ret void
204203
}
205204

206205
define void @test_offset_memsetcpy(ptr %result) {
207206
; CHECK-LABEL: @test_offset_memsetcpy(
208-
; CHECK-NEXT: [[A1:%.*]] = alloca [4 x i32], align 8
209-
; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[A1]], i32 1
207+
; CHECK-NEXT: [[A1:%.*]] = alloca [16 x i8], align 8
208+
; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[A1]], i32 4
210209
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A1]], i8 0, i64 12, i1 false)
211210
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[RESULT:%.*]], i8 0, i64 8, i1 false)
212211
; CHECK-NEXT: ret void
213212
;
214-
%a = alloca [ 4 x i32 ], align 8
215-
%b = getelementptr i32, ptr %a, i32 1
213+
%a = alloca [ 16 x i8 ], align 8
214+
%b = getelementptr i8, ptr %a, i32 4
216215
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
217216
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %b, i64 12, i1 false)
218217
ret void
219218
}
220219

221220
define void @test_two_memset(ptr %result) {
222221
; CHECK-LABEL: @test_two_memset(
223-
; CHECK-NEXT: [[A:%.*]] = alloca [4 x i32], align 8
224-
; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 3
222+
; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 8
223+
; CHECK-NEXT: [[B:%.*]] = getelementptr i8, ptr [[A]], i32 12
225224
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 12, i1 false)
226225
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[B]], i8 1, i64 4, i1 false)
227226
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[RESULT:%.*]], ptr align 8 [[A]], i64 16, i1 false)
228227
; CHECK-NEXT: ret void
229228
;
230-
%a = alloca [ 4 x i32 ], align 8
231-
%b = getelementptr i32, ptr %a, i32 3
229+
%a = alloca [ 16 x i8 ], align 8
230+
%b = getelementptr i8, ptr %a, i32 12
232231
call void @llvm.memset.p0.i64(ptr align 8 %a, i8 0, i64 12, i1 false)
233232
call void @llvm.memset.p0.i64(ptr align 8 %b, i8 1, i64 4, i1 false)
234233
call void @llvm.memcpy.p0.p0.i64(ptr %result, ptr align 8 %a, i64 16, i1 false)

llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ define void @test(ptr %src, i8 %c, i64 %size) {
1818
ret void
1919
}
2020

21-
; Differing sizes, but would be UB if size1 > size2
21+
; Differing sizes, but would be UB if size1 < size2 since the memcpy would reference outside of the first alloca
2222
define void @negative_test(ptr %src, i8 %c, i64 %size1, i64 %size2) {
2323
; CHECK-LABEL: @negative_test(
2424
; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
2525
; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
2626
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
27-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DST2]], i8 [[C]], i64 [[SIZE2]], i1 false)
27+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[DST1]], i64 [[SIZE2]], i1 false)
2828
; CHECK-NEXT: ret void
2929
;
3030
%dst1 = alloca i8, i64 %size1

0 commit comments

Comments
 (0)