Skip to content

Commit a5e784c

Browse files
authored
Merge pull request #3163 from apple/eng/cherry-pick-calloc-0726
Cherry-pick malloc + memset -> calloc optimization
2 parents f4f64b0 + 01cf1dc commit a5e784c

File tree

5 files changed

+227
-71
lines changed

5 files changed

+227
-71
lines changed

llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,6 @@ class LibCallSimplifier {
132132
eraseFromParent(I);
133133
}
134134

135-
Value *foldMallocMemset(CallInst *Memset, IRBuilderBase &B);
136-
137135
public:
138136
LibCallSimplifier(
139137
const DataLayout &DL, const TargetLibraryInfo *TLI,

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 68 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
#include "llvm/IR/DataLayout.h"
5757
#include "llvm/IR/Dominators.h"
5858
#include "llvm/IR/Function.h"
59+
#include "llvm/IR/IRBuilder.h"
5960
#include "llvm/IR/InstIterator.h"
6061
#include "llvm/IR/InstrTypes.h"
6162
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
7879
#include "llvm/Support/raw_ostream.h"
7980
#include "llvm/Transforms/Scalar.h"
8081
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82+
#include "llvm/Transforms/Utils/BuildLibCalls.h"
8183
#include "llvm/Transforms/Utils/Local.h"
8284
#include <algorithm>
8385
#include <cassert>
@@ -505,7 +507,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
505507
BasicBlock::iterator SecondBBI(SecondI);
506508
BasicBlock *FirstBB = FirstI->getParent();
507509
BasicBlock *SecondBB = SecondI->getParent();
508-
MemoryLocation MemLoc = MemoryLocation::get(SecondI);
510+
MemoryLocation MemLoc;
511+
if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
512+
MemLoc = MemoryLocation::getForDest(MemSet);
513+
else
514+
MemLoc = MemoryLocation::get(SecondI);
515+
509516
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
510517

511518
// Start checking the SecondBB.
@@ -819,14 +826,17 @@ bool isNoopIntrinsic(Instruction *I) {
819826
}
820827

821828
// Check if we can ignore \p D for DSE.
822-
bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
829+
bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
830+
const TargetLibraryInfo &TLI) {
823831
Instruction *DI = D->getMemoryInst();
824832
// Calls that only access inaccessible memory cannot read or write any memory
825833
// locations we consider for elimination.
826834
if (auto *CB = dyn_cast<CallBase>(DI))
827-
if (CB->onlyAccessesInaccessibleMemory())
835+
if (CB->onlyAccessesInaccessibleMemory()) {
836+
if (isAllocLikeFn(DI, &TLI))
837+
return false;
828838
return true;
829-
839+
}
830840
// We can eliminate stores to locations not visible to the caller across
831841
// throwing instructions.
832842
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +851,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
841851
return true;
842852

843853
// Skip intrinsics that do not really read or modify memory.
844-
if (isNoopIntrinsic(D->getMemoryInst()))
854+
if (isNoopIntrinsic(DI))
845855
return true;
846856

847857
return false;
@@ -1389,7 +1399,7 @@ struct DSEState {
13891399
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
13901400
Instruction *CurrentI = CurrentDef->getMemoryInst();
13911401

1392-
if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
1402+
if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO), TLI))
13931403
continue;
13941404

13951405
// Before we try to remove anything, check for any extra throwing
@@ -1816,13 +1826,58 @@ struct DSEState {
18161826

18171827
if (StoredConstant && StoredConstant->isNullValue()) {
18181828
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
1819-
if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
1820-
auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
1821-
// If UnderlyingDef is the clobbering access of Def, no instructions
1822-
// between them can modify the memory location.
1823-
auto *ClobberDef =
1824-
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
1825-
return UnderlyingDef == ClobberDef;
1829+
if (DefUOInst) {
1830+
if (isCallocLikeFn(DefUOInst, &TLI)) {
1831+
auto *UnderlyingDef =
1832+
cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
1833+
// If UnderlyingDef is the clobbering access of Def, no instructions
1834+
// between them can modify the memory location.
1835+
auto *ClobberDef =
1836+
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
1837+
return UnderlyingDef == ClobberDef;
1838+
}
1839+
1840+
if (MemSet) {
1841+
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1842+
F.hasFnAttribute(Attribute::SanitizeAddress) ||
1843+
F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1844+
F.getName() == "calloc")
1845+
return false;
1846+
auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
1847+
if (!Malloc)
1848+
return false;
1849+
auto *InnerCallee = Malloc->getCalledFunction();
1850+
if (!InnerCallee)
1851+
return false;
1852+
LibFunc Func;
1853+
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1854+
Func != LibFunc_malloc)
1855+
return false;
1856+
if (Malloc->getOperand(0) == MemSet->getLength()) {
1857+
if (DT.dominates(Malloc, MemSet) &&
1858+
memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
1859+
IRBuilder<> IRB(Malloc);
1860+
const auto &DL = Malloc->getModule()->getDataLayout();
1861+
AttributeList EmptyList;
1862+
if (auto *Calloc = emitCalloc(
1863+
ConstantInt::get(IRB.getIntPtrTy(DL), 1),
1864+
Malloc->getArgOperand(0), EmptyList, IRB, TLI)) {
1865+
MemorySSAUpdater Updater(&MSSA);
1866+
auto *LastDef = cast<MemoryDef>(
1867+
Updater.getMemorySSA()->getMemoryAccess(Malloc));
1868+
auto *NewAccess = Updater.createMemoryAccessAfter(
1869+
cast<Instruction>(Calloc), LastDef, LastDef);
1870+
auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1871+
Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1872+
Updater.removeMemoryAccess(Malloc);
1873+
Malloc->replaceAllUsesWith(Calloc);
1874+
Malloc->eraseFromParent();
1875+
return true;
1876+
}
1877+
return false;
1878+
}
1879+
}
1880+
}
18261881
}
18271882
}
18281883

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,59 +1156,12 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
11561156
return CI->getArgOperand(0);
11571157
}
11581158

1159-
/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
1160-
Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilderBase &B) {
1161-
// This has to be a memset of zeros (bzero).
1162-
auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
1163-
if (!FillValue || FillValue->getZExtValue() != 0)
1164-
return nullptr;
1165-
1166-
// TODO: We should handle the case where the malloc has more than one use.
1167-
// This is necessary to optimize common patterns such as when the result of
1168-
// the malloc is checked against null or when a memset intrinsic is used in
1169-
// place of a memset library call.
1170-
auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
1171-
if (!Malloc || !Malloc->hasOneUse())
1172-
return nullptr;
1173-
1174-
// Is the inner call really malloc()?
1175-
Function *InnerCallee = Malloc->getCalledFunction();
1176-
if (!InnerCallee)
1177-
return nullptr;
1178-
1179-
LibFunc Func;
1180-
if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
1181-
Func != LibFunc_malloc)
1182-
return nullptr;
1183-
1184-
// The memset must cover the same number of bytes that are malloc'd.
1185-
if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
1186-
return nullptr;
1187-
1188-
// Replace the malloc with a calloc. We need the data layout to know what the
1189-
// actual size of a 'size_t' parameter is.
1190-
B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
1191-
const DataLayout &DL = Malloc->getModule()->getDataLayout();
1192-
IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
1193-
if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
1194-
Malloc->getArgOperand(0),
1195-
Malloc->getAttributes(), B, *TLI)) {
1196-
substituteInParent(Malloc, Calloc);
1197-
return Calloc;
1198-
}
1199-
1200-
return nullptr;
1201-
}
1202-
12031159
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
12041160
Value *Size = CI->getArgOperand(2);
12051161
annotateNonNullAndDereferenceable(CI, 0, Size, DL);
12061162
if (isa<IntrinsicInst>(CI))
12071163
return nullptr;
12081164

1209-
if (auto *Calloc = foldMallocMemset(CI, B))
1210-
return Calloc;
1211-
12121165
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
12131166
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
12141167
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
@@ -3066,7 +3019,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
30663019
return optimizeLog(CI, Builder);
30673020
case Intrinsic::sqrt:
30683021
return optimizeSqrt(CI, Builder);
3069-
// TODO: Use foldMallocMemset() with memset intrinsic.
30703022
case Intrinsic::memset:
30713023
return optimizeMemSet(CI, Builder);
30723024
case Intrinsic::memcpy:
@@ -3289,8 +3241,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
32893241

32903242
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
32913243
IRBuilderBase &B) {
3292-
// TODO: Try foldMallocMemset() here.
3293-
32943244
if (isFortifiedCallFoldable(CI, 3, 2)) {
32953245
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
32963246
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,

llvm/test/Transforms/DeadStoreElimination/noop-stores.ll

Lines changed: 151 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3-
; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
3+
; RUN: opt < %s -aa-pipeline=basic-aa -passes='dse,verify<memoryssa>' -S | FileCheck %s
44
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
55

6-
declare i8* @calloc(i64, i64)
76
declare void @memset_pattern16(i8*, i8*, i64)
87

98
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
@@ -309,6 +308,156 @@ entry:
309308
ret void
310309
}
311310

311+
declare noalias i8* @malloc(i64)
312+
declare noalias i8* @_Znwm(i64)
313+
declare void @clobber_memory(float*)
314+
315+
; based on pr25892_lite
316+
define i8* @zero_memset_after_malloc(i64 %size) {
317+
; CHECK-LABEL: @zero_memset_after_malloc(
318+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
319+
; CHECK-NEXT: ret i8* [[CALL]]
320+
;
321+
%call = call i8* @malloc(i64 %size) inaccessiblememonly
322+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
323+
ret i8* %call
324+
}
325+
326+
; based on pr25892_lite
327+
define i8* @zero_memset_after_malloc_with_intermediate_clobbering(i64 %size) {
328+
; CHECK-LABEL: @zero_memset_after_malloc_with_intermediate_clobbering(
329+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
330+
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
331+
; CHECK-NEXT: call void @clobber_memory(float* [[BC]])
332+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false)
333+
; CHECK-NEXT: ret i8* [[CALL]]
334+
;
335+
%call = call i8* @malloc(i64 %size) inaccessiblememonly
336+
%bc = bitcast i8* %call to float*
337+
call void @clobber_memory(float* %bc)
338+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
339+
ret i8* %call
340+
}
341+
342+
; based on pr25892_lite
343+
define i8* @zero_memset_after_malloc_with_different_sizes(i64 %size) {
344+
; CHECK-LABEL: @zero_memset_after_malloc_with_different_sizes(
345+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 [[SIZE:%.*]])
346+
; CHECK-NEXT: [[SIZE2:%.*]] = add nsw i64 [[SIZE]], -1
347+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE2]], i1 false)
348+
; CHECK-NEXT: ret i8* [[CALL]]
349+
;
350+
%call = call i8* @malloc(i64 %size) inaccessiblememonly
351+
%size2 = add nsw i64 %size, -1
352+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size2, i1 false)
353+
ret i8* %call
354+
}
355+
356+
; based on pr25892_lite
357+
define i8* @zero_memset_after_new(i64 %size) {
358+
; CHECK-LABEL: @zero_memset_after_new(
359+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @_Znwm(i64 [[SIZE:%.*]])
360+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 [[SIZE]], i1 false)
361+
; CHECK-NEXT: ret i8* [[CALL]]
362+
;
363+
%call = call i8* @_Znwm(i64 %size)
364+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
365+
ret i8* %call
366+
}
367+
368+
; This should not create a calloc and should not crash the compiler.
369+
define i8* @notmalloc_memset(i64 %size, i8*(i64)* %notmalloc) {
370+
; CHECK-LABEL: @notmalloc_memset(
371+
; CHECK-NEXT: [[CALL1:%.*]] = call i8* [[NOTMALLOC:%.*]](i64 [[SIZE:%.*]])
372+
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL1]], i8 0, i64 [[SIZE]], i1 false)
373+
; CHECK-NEXT: ret i8* [[CALL1]]
374+
;
375+
%call1 = call i8* %notmalloc(i64 %size)
376+
call void @llvm.memset.p0i8.i64(i8* %call1, i8 0, i64 %size, i1 false)
377+
ret i8* %call1
378+
}
379+
380+
; This should not create recursive call to calloc.
381+
define i8* @calloc(i64 %nmemb, i64 %size) {
382+
; CHECK-LABEL: @calloc(
383+
; CHECK: entry:
384+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SIZE:%.*]], [[NMEMB:%.*]]
385+
; CHECK-NEXT: [[CALL:%.*]] = tail call noalias align 16 i8* @malloc(i64 [[MUL]])
386+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8* [[CALL]], null
387+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
388+
; CHECK: if.then:
389+
; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[CALL]], i8 0, i64 [[MUL]], i1 false)
390+
; CHECK-NEXT: br label [[IF_END]]
391+
; CHECK: if.end:
392+
; CHECK-NEXT: ret i8* [[CALL]]
393+
;
394+
entry:
395+
%mul = mul i64 %size, %nmemb
396+
%call = tail call noalias align 16 i8* @malloc(i64 %mul)
397+
%tobool.not = icmp eq i8* %call, null
398+
br i1 %tobool.not, label %if.end, label %if.then
399+
400+
if.then: ; preds = %entry
401+
tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %call, i8 0, i64 %mul, i1 false)
402+
br label %if.end
403+
404+
if.end: ; preds = %if.then, %entry
405+
ret i8* %call
406+
}
407+
408+
define float* @pr25892(i64 %size) {
409+
; CHECK-LABEL: @pr25892(
410+
; CHECK: entry:
411+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
412+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
413+
; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
414+
; CHECK: if.end:
415+
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
416+
; CHECK-NEXT: br label [[CLEANUP]]
417+
; CHECK: cleanup:
418+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
419+
; CHECK-NEXT: ret float* [[RETVAL_0]]
420+
;
421+
entry:
422+
%call = call i8* @malloc(i64 %size) inaccessiblememonly
423+
%cmp = icmp eq i8* %call, null
424+
br i1 %cmp, label %cleanup, label %if.end
425+
if.end:
426+
%bc = bitcast i8* %call to float*
427+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
428+
br label %cleanup
429+
cleanup:
430+
%retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
431+
ret float* %retval.0
432+
}
433+
434+
define float* @pr25892_with_extra_store(i64 %size) {
435+
; CHECK-LABEL: @pr25892_with_extra_store(
436+
; CHECK: entry:
437+
; CHECK-NEXT: [[CALL:%.*]] = call i8* @calloc(i64 1, i64 [[SIZE:%.*]])
438+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null
439+
; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
440+
; CHECK: if.end:
441+
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float*
442+
; CHECK-NEXT: br label [[CLEANUP]]
443+
; CHECK: cleanup:
444+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
445+
; CHECK-NEXT: ret float* [[RETVAL_0]]
446+
;
447+
entry:
448+
%call = call i8* @malloc(i64 %size) inaccessiblememonly
449+
%cmp = icmp eq i8* %call, null
450+
br i1 %cmp, label %cleanup, label %if.end
451+
if.end:
452+
%bc = bitcast i8* %call to float*
453+
call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 %size, i1 false)
454+
store i8 0, i8* %call, align 1
455+
br label %cleanup
456+
cleanup:
457+
%retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
458+
ret float* %retval.0
459+
}
460+
312461
; PR50143
313462
define i8* @store_zero_after_calloc_inaccessiblememonly() {
314463
; CHECK-LABEL: @store_zero_after_calloc_inaccessiblememonly(

0 commit comments

Comments
 (0)