Skip to content

Commit d2301f9

Browse files
committed
[Inliner] Don't count a call penalty for foldable __memcpy_chk
When the copy length is known to fit within the object size, calls to __memcpy_chk will eventually be replaced by inline stores. Therefore this patch avoids counting these as calls for purposes of inlining costs. This is only really relevant on platforms whose headers redirect memcpy to __memcpy_chk (such as Darwin). On platforms that use intrinsics, memcpy and similar functions are already exempt from call penalties.
1 parent fe92cd6 commit d2301f9

File tree

3 files changed

+52
-6
lines changed

3 files changed

+52
-6
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
436436
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
437437
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
438438
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
439+
bool isLoweredToCall(Function *F, CallBase &Call);
439440

440441
/// Return true if the given argument to the function being considered for
441442
/// inlining has the given attribute set either at the call site or the
@@ -2270,6 +2271,44 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
22702271
return false;
22712272
}
22722273

2274+
bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2275+
const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2276+
LibFunc LF;
2277+
if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2278+
return TTI.isLoweredToCall(F);
2279+
2280+
switch (LF) {
2281+
case LibFunc_memcpy_chk:
2282+
case LibFunc_memmove_chk:
2283+
case LibFunc_mempcpy_chk:
2284+
case LibFunc_memset_chk: {
2285+
// Calls to __memcpy_chk whose length is known to fit within the object
2286+
// size will eventually be replaced by inline stores. Therefore, these
2287+
// should not incur a call penalty. This is only really relevant on
2288+
// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2289+
// other platforms use memcpy intrinsics, which are already exempt from the
2290+
// call penalty.
2291+
auto *LenOp = dyn_cast<ConstantInt>(Call.getOperand(2));
2292+
if (!LenOp)
2293+
LenOp = dyn_cast_or_null<ConstantInt>(
2294+
SimplifiedValues.lookup(Call.getOperand(2)));
2295+
auto *ObjSizeOp = dyn_cast<ConstantInt>(Call.getOperand(3));
2296+
if (!ObjSizeOp)
2297+
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
2298+
SimplifiedValues.lookup(Call.getOperand(3)));
2299+
if (LenOp && ObjSizeOp &&
2300+
LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2301+
return false;
2302+
}
2303+
break;
2304+
}
2305+
default:
2306+
break;
2307+
}
2308+
2309+
return TTI.isLoweredToCall(F);
2310+
}
2311+
22732312
bool CallAnalyzer::visitCallBase(CallBase &Call) {
22742313
if (!onCallBaseVisitStart(Call))
22752314
return true;
@@ -2351,7 +2390,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
23512390
return false;
23522391
}
23532392

2354-
if (TTI.isLoweredToCall(F)) {
2393+
if (isLoweredToCall(F, Call)) {
23552394
onLoweredCall(F, Call, IsIndirectCall);
23562395
}
23572396

llvm/test/Transforms/Inline/AArch64/memcpy-constant-size.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ define void @callee(ptr %dst, ptr %src, i64 %size) {
2828
define void @caller(ptr %dst, ptr %src) {
2929
; CHECK-LABEL: define void @caller
3030
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
31-
; CHECK-NEXT: call void @callee(ptr [[DST]], ptr [[SRC]], i64 4)
31+
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
32+
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
33+
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
34+
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
35+
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
3236
; CHECK-NEXT: ret void
3337
;
3438
call void @callee(ptr %dst, ptr %src, i64 4)

llvm/test/Transforms/PhaseOrdering/AArch64/memcpy-constant-size.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ define void @callee_memset(ptr %dst, i64 %size) {
5454
define void @caller_memcpy(ptr %dst, ptr %src) {
5555
; CHECK-LABEL: define void @caller_memcpy
5656
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
57-
; CHECK-NEXT: tail call void @callee_memcpy(ptr [[DST]], ptr [[SRC]], i64 4)
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
58+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
5859
; CHECK-NEXT: ret void
5960
;
6061
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
@@ -64,7 +65,8 @@ define void @caller_memcpy(ptr %dst, ptr %src) {
6465
define void @caller_memmove(ptr %dst, ptr %src) {
6566
; CHECK-LABEL: define void @caller_memmove
6667
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
67-
; CHECK-NEXT: tail call void @callee_memmove(ptr [[DST]], ptr [[SRC]], i64 4)
68+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
69+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
6870
; CHECK-NEXT: ret void
6971
;
7072
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
@@ -74,7 +76,8 @@ define void @caller_memmove(ptr %dst, ptr %src) {
7476
define void @caller_mempcpy(ptr %dst, ptr %src) {
7577
; CHECK-LABEL: define void @caller_mempcpy
7678
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
77-
; CHECK-NEXT: tail call void @callee_mempcpy(ptr [[DST]], ptr [[SRC]], i64 4)
79+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
80+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
7881
; CHECK-NEXT: ret void
7982
;
8083
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
@@ -84,7 +87,7 @@ define void @caller_mempcpy(ptr %dst, ptr %src) {
8487
define void @caller_memset(ptr %dst) {
8588
; CHECK-LABEL: define void @caller_memset
8689
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
87-
; CHECK-NEXT: tail call void @callee_memset(ptr [[DST]], i64 4)
90+
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
8891
; CHECK-NEXT: ret void
8992
;
9093
call void @callee_memset(ptr %dst, i64 4)

0 commit comments

Comments
 (0)