Skip to content

Commit 7172dd4

Browse files
committed
[Inliner] Don't count a call penalty for foldable __memcpy_chk
When the copy length is known to fit within the object size, calls to __memcpy_chk will eventually be replaced by inline stores. Therefore this patch avoids counting these as calls for purposes of inlining costs. This is only really relevant on platforms whose headers redirect memcpy to __memcpy_chk (such as Mac). On platforms that use intrinsics, memcpy and similar functions are already exempt from call penalties.
1 parent fe92cd6 commit 7172dd4

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
436436
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
437437
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
438438
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
439+
bool isLoweredToCall(Function *F, CallBase &Call);
439440

440441
/// Return true if the given argument to the function being considered for
441442
/// inlining has the given attribute set either at the call site or the
@@ -2270,6 +2271,45 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
22702271
return false;
22712272
}
22722273

2274+
bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2275+
const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2276+
LibFunc LF;
2277+
if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2278+
return TTI.isLoweredToCall(F);
2279+
2280+
switch (LF) {
2281+
case LibFunc_memcpy_chk:
2282+
case LibFunc_memmove_chk:
2283+
case LibFunc_mempcpy_chk:
2284+
case LibFunc_memset_chk:
2285+
// Calls to __memcpy_chk whose length is known to fit within the object
2286+
// size will eventually be replaced by inline stores. Therefore, these
2287+
// should not incur a call penalty. This is only really relevant on
2288+
// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Mac), as
2289+
// other platforms use memcpy intrinsics, which are already exempt from the
2290+
// call penalty.
2291+
{
2292+
auto LenOp = dyn_cast_or_null<ConstantInt>(Call.getOperand(2));
2293+
if (!LenOp)
2294+
LenOp = dyn_cast_or_null<ConstantInt>(
2295+
SimplifiedValues.lookup(Call.getOperand(2)));
2296+
auto ObjSizeOp = dyn_cast_or_null<ConstantInt>(Call.getOperand(3));
2297+
if (!ObjSizeOp)
2298+
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
2299+
SimplifiedValues.lookup(Call.getOperand(3)));
2300+
if (LenOp && ObjSizeOp &&
2301+
LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2302+
return false;
2303+
}
2304+
break;
2305+
}
2306+
default:
2307+
break;
2308+
}
2309+
2310+
return TTI.isLoweredToCall(F);
2311+
}
2312+
22732313
bool CallAnalyzer::visitCallBase(CallBase &Call) {
22742314
if (!onCallBaseVisitStart(Call))
22752315
return true;
@@ -2351,7 +2391,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
23512391
return false;
23522392
}
23532393

2354-
if (TTI.isLoweredToCall(F)) {
2394+
if (isLoweredToCall(F, Call)) {
23552395
onLoweredCall(F, Call, IsIndirectCall);
23562396
}
23572397

llvm/test/Transforms/Inline/AArch64/memcpy-constant-size.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ define void @callee(ptr %dst, ptr %src, i64 %size) {
2828
define void @caller(ptr %dst, ptr %src) {
2929
; CHECK-LABEL: define void @caller
3030
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
31-
; CHECK-NEXT: call void @callee(ptr [[DST]], ptr [[SRC]], i64 4)
31+
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
32+
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
33+
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
34+
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
35+
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
3236
; CHECK-NEXT: ret void
3337
;
3438
call void @callee(ptr %dst, ptr %src, i64 4)

llvm/test/Transforms/PhaseOrdering/AArch64/memcpy-constant-size.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ define void @callee_memset(ptr %dst, i64 %size) {
5454
define void @caller_memcpy(ptr %dst, ptr %src) {
5555
; CHECK-LABEL: define void @caller_memcpy
5656
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
57-
; CHECK-NEXT: tail call void @callee_memcpy(ptr [[DST]], ptr [[SRC]], i64 4)
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
58+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
5859
; CHECK-NEXT: ret void
5960
;
6061
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
@@ -64,7 +65,8 @@ define void @caller_memcpy(ptr %dst, ptr %src) {
6465
define void @caller_memmove(ptr %dst, ptr %src) {
6566
; CHECK-LABEL: define void @caller_memmove
6667
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
67-
; CHECK-NEXT: tail call void @callee_memmove(ptr [[DST]], ptr [[SRC]], i64 4)
68+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
69+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
6870
; CHECK-NEXT: ret void
6971
;
7072
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
@@ -74,7 +76,8 @@ define void @caller_memmove(ptr %dst, ptr %src) {
7476
define void @caller_mempcpy(ptr %dst, ptr %src) {
7577
; CHECK-LABEL: define void @caller_mempcpy
7678
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
77-
; CHECK-NEXT: tail call void @callee_mempcpy(ptr [[DST]], ptr [[SRC]], i64 4)
79+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
80+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
7881
; CHECK-NEXT: ret void
7982
;
8083
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
@@ -84,7 +87,7 @@ define void @caller_mempcpy(ptr %dst, ptr %src) {
8487
define void @caller_memset(ptr %dst) {
8588
; CHECK-LABEL: define void @caller_memset
8689
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
87-
; CHECK-NEXT: tail call void @callee_memset(ptr [[DST]], i64 4)
90+
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
8891
; CHECK-NEXT: ret void
8992
;
9093
call void @callee_memset(ptr %dst, i64 4)

0 commit comments

Comments
 (0)