Skip to content

Commit 8fb748b

Browse files
authored
[Inliner] Don't count a call penalty for foldable __memcpy_chk and similar (llvm#117876)
When the size is an appropriate constant, __memcpy_chk will turn into a memcpy that gets folded away by InstCombine. Therefore this patch avoids counting these as calls for purposes of inlining costs. This is only really relevant on platforms whose headers redirect memcpy to __memcpy_chk (such as Darwin). On platforms that use intrinsics, memcpy and similar functions are already exempt from call penalties.
1 parent fe04290 commit 8fb748b

File tree

4 files changed

+258
-17
lines changed

4 files changed

+258
-17
lines changed

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ std::optional<int> getInliningCostEstimate(
318318
CallBase &Call, TargetTransformInfo &CalleeTTI,
319319
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
320320
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
321+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
321322
ProfileSummaryInfo *PSI = nullptr,
322323
OptimizationRemarkEmitter *ORE = nullptr);
323324

@@ -327,6 +328,7 @@ std::optional<InlineCostFeatures> getInliningCostFeatures(
327328
CallBase &Call, TargetTransformInfo &CalleeTTI,
328329
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
329330
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
331+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
330332
ProfileSummaryInfo *PSI = nullptr,
331333
OptimizationRemarkEmitter *ORE = nullptr);
332334

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249249
/// Getter for BlockFrequencyInfo
250250
function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
251251

252+
/// Getter for TargetLibraryInfo
253+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
254+
252255
/// Profile summary information.
253256
ProfileSummaryInfo *PSI;
254257

@@ -433,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
433436
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
434437
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
435438
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
439+
bool isLoweredToCall(Function *F, CallBase &Call);
436440

437441
/// Return true if the given argument to the function being considered for
438442
/// inlining has the given attribute set either at the call site or the
@@ -492,13 +496,15 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
492496
bool visitUnreachableInst(UnreachableInst &I);
493497

494498
public:
495-
CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
496-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
497-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
498-
ProfileSummaryInfo *PSI = nullptr,
499-
OptimizationRemarkEmitter *ORE = nullptr)
499+
CallAnalyzer(
500+
Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
501+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
502+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
503+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
504+
ProfileSummaryInfo *PSI = nullptr,
505+
OptimizationRemarkEmitter *ORE = nullptr)
500506
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
501-
PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
507+
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
502508
CandidateCall(Call) {}
503509

504510
InlineResult analyze();
@@ -688,7 +694,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
688694
/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
689695
/// to instantiate the derived class.
690696
InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
691-
GetAssumptionCache, GetBFI, PSI, ORE, false);
697+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
698+
false);
692699
if (CA.analyze().isSuccess()) {
693700
// We were able to inline the indirect call! Subtract the cost from the
694701
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1106,10 +1113,12 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
11061113
const TargetTransformInfo &TTI,
11071114
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
11081115
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1116+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
11091117
ProfileSummaryInfo *PSI = nullptr,
11101118
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
11111119
bool IgnoreThreshold = false)
1112-
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1120+
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1121+
ORE),
11131122
ComputeFullInlineCost(OptComputeFullInlineCost ||
11141123
Params.ComputeFullInlineCost || ORE ||
11151124
isCostBenefitAnalysisEnabled()),
@@ -1228,8 +1237,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
12281237
InlineConstants::IndirectCallThreshold;
12291238

12301239
InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1231-
GetAssumptionCache, GetBFI, PSI, ORE, false,
1232-
true);
1240+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1241+
false, true);
12331242
if (CA.analyze().isSuccess()) {
12341243
increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
12351244
CA.getCost());
@@ -1355,9 +1364,11 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
13551364
const TargetTransformInfo &TTI,
13561365
function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
13571366
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1367+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
13581368
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
13591369
CallBase &Call)
1360-
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1370+
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1371+
PSI) {}
13611372

13621373
const InlineCostFeatures &features() const { return Cost; }
13631374
};
@@ -2260,6 +2271,44 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
22602271
return false;
22612272
}
22622273

2274+
bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2275+
const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2276+
LibFunc LF;
2277+
if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2278+
return TTI.isLoweredToCall(F);
2279+
2280+
switch (LF) {
2281+
case LibFunc_memcpy_chk:
2282+
case LibFunc_memmove_chk:
2283+
case LibFunc_mempcpy_chk:
2284+
case LibFunc_memset_chk: {
2285+
// Calls to __memcpy_chk whose length is known to fit within the object
2286+
// size will eventually be replaced by inline stores. Therefore, these
2287+
// should not incur a call penalty. This is only really relevant on
2288+
// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2289+
// other platforms use memcpy intrinsics, which are already exempt from the
2290+
// call penalty.
2291+
auto *LenOp = dyn_cast<ConstantInt>(Call.getOperand(2));
2292+
if (!LenOp)
2293+
LenOp = dyn_cast_or_null<ConstantInt>(
2294+
SimplifiedValues.lookup(Call.getOperand(2)));
2295+
auto *ObjSizeOp = dyn_cast<ConstantInt>(Call.getOperand(3));
2296+
if (!ObjSizeOp)
2297+
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
2298+
SimplifiedValues.lookup(Call.getOperand(3)));
2299+
if (LenOp && ObjSizeOp &&
2300+
LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2301+
return false;
2302+
}
2303+
break;
2304+
}
2305+
default:
2306+
break;
2307+
}
2308+
2309+
return TTI.isLoweredToCall(F);
2310+
}
2311+
22632312
bool CallAnalyzer::visitCallBase(CallBase &Call) {
22642313
if (!onCallBaseVisitStart(Call))
22652314
return true;
@@ -2341,7 +2390,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
23412390
return false;
23422391
}
23432392

2344-
if (TTI.isLoweredToCall(F)) {
2393+
if (isLoweredToCall(F, Call)) {
23452394
onLoweredCall(F, Call, IsIndirectCall);
23462395
}
23472396

@@ -2945,6 +2994,7 @@ std::optional<int> llvm::getInliningCostEstimate(
29452994
CallBase &Call, TargetTransformInfo &CalleeTTI,
29462995
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29472996
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2997+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29482998
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
29492999
const InlineParams Params = {/* DefaultThreshold*/ 0,
29503000
/*HintThreshold*/ {},
@@ -2958,7 +3008,7 @@ std::optional<int> llvm::getInliningCostEstimate(
29583008
/*EnableDeferral*/ true};
29593009

29603010
InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2961-
GetAssumptionCache, GetBFI, PSI, ORE, true,
3011+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
29623012
/*IgnoreThreshold*/ true);
29633013
auto R = CA.analyze();
29643014
if (!R.isSuccess())
@@ -2970,9 +3020,10 @@ std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
29703020
CallBase &Call, TargetTransformInfo &CalleeTTI,
29713021
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29723022
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3023+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29733024
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
2974-
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2975-
ORE, *Call.getCalledFunction(), Call);
3025+
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3026+
PSI, ORE, *Call.getCalledFunction(), Call);
29763027
auto R = CFA.analyze();
29773028
if (!R.isSuccess())
29783029
return std::nullopt;
@@ -3072,7 +3123,7 @@ InlineCost llvm::getInlineCost(
30723123
<< ")\n");
30733124

30743125
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3075-
GetAssumptionCache, GetBFI, PSI, ORE);
3126+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
30763127
InlineResult ShouldInline = CA.analyze();
30773128

30783129
LLVM_DEBUG(CA.dump());
@@ -3263,7 +3314,8 @@ InlineCostAnnotationPrinterPass::run(Function &F,
32633314
continue;
32643315
OptimizationRemarkEmitter ORE(CalledFunction);
32653316
InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
3266-
GetAssumptionCache, nullptr, &PSI, &ORE);
3317+
GetAssumptionCache, nullptr, nullptr, &PSI,
3318+
&ORE);
32673319
ICCA.analyze();
32683320
OS << " Analyzing call of " << CalledFunction->getName()
32693321
<< "... (caller:" << CB->getCaller()->getName() << ")\n";
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt %s -mtriple=arm64-apple-macosx -passes=inline -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s
3+
4+
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
5+
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
6+
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
7+
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
8+
declare ptr @__memset_chk(ptr, i32, i64, i64)
9+
10+
define void @callee(ptr %dst, ptr %src, i64 %size) {
11+
; CHECK-LABEL: define void @callee
12+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
13+
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
14+
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
15+
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
16+
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
17+
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 [[OBJSIZE]])
18+
; CHECK-NEXT: ret void
19+
;
20+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
21+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
22+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
23+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
24+
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
25+
ret void
26+
}
27+
28+
define void @caller(ptr %dst, ptr %src) {
29+
; CHECK-LABEL: define void @caller
30+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
31+
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
32+
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
33+
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
34+
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
35+
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
36+
; CHECK-NEXT: ret void
37+
;
38+
call void @callee(ptr %dst, ptr %src, i64 4)
39+
ret void
40+
}
41+
42+
define void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 %size) {
43+
; CHECK-LABEL: define void @objsize_toosmall_callee
44+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
45+
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
46+
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
47+
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
48+
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 1)
49+
; CHECK-NEXT: ret void
50+
;
51+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
52+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 1)
53+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
54+
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 1)
55+
ret void
56+
}
57+
58+
define void @objsize_toosmall_caller(ptr %dst, ptr %src) {
59+
; CHECK-LABEL: define void @objsize_toosmall_caller
60+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
61+
; CHECK-NEXT: call void @objsize_toosmall_callee(ptr [[DST]], ptr [[SRC]], i64 4)
62+
; CHECK-NEXT: ret void
63+
;
64+
call void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 4)
65+
ret void
66+
}
67+
68+
define void @intrinsics_callee(ptr %dst, ptr %src, i64 %size) {
69+
; CHECK-LABEL: define void @intrinsics_callee
70+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
71+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
72+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
73+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 [[SIZE]], i1 false)
74+
; CHECK-NEXT: ret void
75+
;
76+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
77+
call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
78+
call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 %size, i1 false)
79+
ret void
80+
}
81+
82+
define void @intrinsics_caller(ptr %dst, ptr %src) {
83+
; CHECK-LABEL: define void @intrinsics_caller
84+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
85+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
86+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
87+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 4, i1 false)
88+
; CHECK-NEXT: ret void
89+
;
90+
call void @intrinsics_callee(ptr %dst, ptr %src, i64 4)
91+
ret void
92+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt %s -mtriple=arm64-apple-macosx -passes='default<O3>' -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s
3+
4+
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
5+
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
6+
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
7+
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
8+
declare ptr @__memset_chk(ptr, i32, i64, i64)
9+
10+
define void @callee_memcpy(ptr %dst, ptr %src, i64 %size) {
11+
; CHECK-LABEL: define void @callee_memcpy
12+
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
14+
; CHECK-NEXT: ret void
15+
;
16+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
17+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
18+
ret void
19+
}
20+
21+
define void @callee_memmove(ptr %dst, ptr %src, i64 %size) {
22+
; CHECK-LABEL: define void @callee_memmove
23+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
24+
; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
25+
; CHECK-NEXT: ret void
26+
;
27+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
28+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
29+
ret void
30+
}
31+
32+
define void @callee_mempcpy(ptr %dst, ptr %src, i64 %size) {
33+
; CHECK-LABEL: define void @callee_mempcpy
34+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1]] {
35+
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
36+
; CHECK-NEXT: ret void
37+
;
38+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
39+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
40+
ret void
41+
}
42+
43+
define void @callee_memset(ptr %dst, i64 %size) {
44+
; CHECK-LABEL: define void @callee_memset
45+
; CHECK-SAME: (ptr [[DST:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
46+
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[SIZE]], i1 false)
47+
; CHECK-NEXT: ret void
48+
;
49+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
50+
%call.mempcpy = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
51+
ret void
52+
}
53+
54+
define void @caller_memcpy(ptr %dst, ptr %src) {
55+
; CHECK-LABEL: define void @caller_memcpy
56+
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
58+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
59+
; CHECK-NEXT: ret void
60+
;
61+
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
62+
ret void
63+
}
64+
65+
define void @caller_memmove(ptr %dst, ptr %src) {
66+
; CHECK-LABEL: define void @caller_memmove
67+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
68+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
69+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
70+
; CHECK-NEXT: ret void
71+
;
72+
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
73+
ret void
74+
}
75+
76+
define void @caller_mempcpy(ptr %dst, ptr %src) {
77+
; CHECK-LABEL: define void @caller_mempcpy
78+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
79+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
80+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
81+
; CHECK-NEXT: ret void
82+
;
83+
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
84+
ret void
85+
}
86+
87+
define void @caller_memset(ptr %dst) {
88+
; CHECK-LABEL: define void @caller_memset
89+
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
90+
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
91+
; CHECK-NEXT: ret void
92+
;
93+
call void @callee_memset(ptr %dst, i64 4)
94+
ret void
95+
}

0 commit comments

Comments
 (0)