Skip to content

Commit 02e3e95

Browse files
committed
[Inliner] Don't count a call penalty for foldable __memcpy_chk and similar
When the size is an appropriate constant, __memcpy_chk will turn into a memcpy that gets folded away by InstCombine. Therefore this patch avoids counting these as calls for purposes of inlining costs. This is only really relevant on platforms whose headers redirect memcpy to __memcpy_chk (such as Mac). On platforms that use intrinsics, memcpy and similar functions are already exempt from call penalties.
1 parent f0df849 commit 02e3e95

File tree

3 files changed

+56
-6
lines changed

3 files changed

+56
-6
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
436436
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
437437
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
438438
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
439+
bool isLoweredToCall(Function *F, CallBase &Call);
439440

440441
/// Return true if the given argument to the function being considered for
441442
/// inlining has the given attribute set either at the call site or the
@@ -2270,6 +2271,48 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
22702271
return false;
22712272
}
22722273

2274+
bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2275+
// Calls to memcpy with a known constant size 1/2/4/8 should not incur a call
2276+
// penalty, as the calls will be folded away by InstCombine. This is only
2277+
// really relevant on platforms whose headers redirect memcpy to __memcpy_chk
2278+
// (e.g. Mac), as other platforms use memcpy intrinsics, which are already
2279+
// exempt from the call penalty.
2280+
if (GetTLI) {
2281+
auto TLI = GetTLI(*F);
2282+
LibFunc LF;
2283+
if (TLI.getLibFunc(*F, LF) && TLI.has(LF)) {
2284+
switch (LF) {
2285+
case LibFunc_memcpy_chk:
2286+
case LibFunc_memmove_chk:
2287+
case LibFunc_mempcpy_chk:
2288+
case LibFunc_memset_chk: {
2289+
auto LenOp = dyn_cast_or_null<ConstantInt>(Call.getOperand(2));
2290+
if (!LenOp)
2291+
LenOp = dyn_cast_or_null<ConstantInt>(
2292+
SimplifiedValues.lookup(Call.getOperand(2)));
2293+
auto ObjSizeOp = dyn_cast_or_null<ConstantInt>(Call.getOperand(3));
2294+
if (!ObjSizeOp)
2295+
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
2296+
SimplifiedValues.lookup(Call.getOperand(3)));
2297+
if (LenOp && ObjSizeOp) {
2298+
auto Len = LenOp->getLimitedValue();
2299+
auto ObjSize = ObjSizeOp->getLimitedValue();
2300+
if (ObjSize >= Len &&
2301+
(Len == 1 || Len == 2 || Len == 4 || Len == 8)) {
2302+
return false;
2303+
}
2304+
}
2305+
break;
2306+
}
2307+
default:
2308+
break;
2309+
}
2310+
}
2311+
}
2312+
2313+
return TTI.isLoweredToCall(F);
2314+
}
2315+
22732316
bool CallAnalyzer::visitCallBase(CallBase &Call) {
22742317
if (!onCallBaseVisitStart(Call))
22752318
return true;
@@ -2351,7 +2394,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
23512394
return false;
23522395
}
23532396

2354-
if (TTI.isLoweredToCall(F)) {
2397+
if (isLoweredToCall(F, Call)) {
23552398
onLoweredCall(F, Call, IsIndirectCall);
23562399
}
23572400

llvm/test/Transforms/Inline/AArch64/memcpy-constant-size.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,11 @@ define void @caller3(ptr %dst, ptr %src) {
3838
define void @caller4(ptr %dst, ptr %src) {
3939
; CHECK-LABEL: define void @caller4
4040
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
41-
; CHECK-NEXT: call void @callee(ptr [[DST]], ptr [[SRC]], i64 4)
41+
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
42+
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
43+
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
44+
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
45+
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
4246
; CHECK-NEXT: ret void
4347
;
4448
call void @callee(ptr %dst, ptr %src, i64 4)

llvm/test/Transforms/PhaseOrdering/AArch64/memcpy-constant-size.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ define void @callee_memset(ptr %dst, i64 %size) {
5454
define void @caller_memcpy(ptr %dst, ptr %src) {
5555
; CHECK-LABEL: define void @caller_memcpy
5656
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
57-
; CHECK-NEXT: tail call void @callee_memcpy(ptr [[DST]], ptr [[SRC]], i64 4)
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
58+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
5859
; CHECK-NEXT: ret void
5960
;
6061
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
@@ -64,7 +65,8 @@ define void @caller_memcpy(ptr %dst, ptr %src) {
6465
define void @caller_memmove(ptr %dst, ptr %src) {
6566
; CHECK-LABEL: define void @caller_memmove
6667
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
67-
; CHECK-NEXT: tail call void @callee_memmove(ptr [[DST]], ptr [[SRC]], i64 4)
68+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
69+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
6870
; CHECK-NEXT: ret void
6971
;
7072
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
@@ -74,7 +76,8 @@ define void @caller_memmove(ptr %dst, ptr %src) {
7476
define void @caller_mempcpy(ptr %dst, ptr %src) {
7577
; CHECK-LABEL: define void @caller_mempcpy
7678
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
77-
; CHECK-NEXT: tail call void @callee_mempcpy(ptr [[DST]], ptr [[SRC]], i64 4)
79+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
80+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
7881
; CHECK-NEXT: ret void
7982
;
8083
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
@@ -84,7 +87,7 @@ define void @caller_mempcpy(ptr %dst, ptr %src) {
8487
define void @caller_memset(ptr %dst) {
8588
; CHECK-LABEL: define void @caller_memset
8689
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
87-
; CHECK-NEXT: tail call void @callee_memset(ptr [[DST]], i64 4)
90+
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
8891
; CHECK-NEXT: ret void
8992
;
9093
call void @callee_memset(ptr %dst, i64 4)

0 commit comments

Comments
 (0)