Skip to content

Commit 0c6b5e1

Browse files
authored
Merge pull request #9700 from citymarina/cherry-pick-inline-memcpy
[Inliner] Don't count a call penalty for foldable __memcpy_chk and similar
2 parents be8c96d + 2572b89 commit 0c6b5e1

File tree

4 files changed

+258
-17
lines changed

4 files changed

+258
-17
lines changed

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ std::optional<int> getInliningCostEstimate(
319319
CallBase &Call, TargetTransformInfo &CalleeTTI,
320320
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
321321
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
322+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
322323
ProfileSummaryInfo *PSI = nullptr,
323324
OptimizationRemarkEmitter *ORE = nullptr);
324325

@@ -328,6 +329,7 @@ std::optional<InlineCostFeatures> getInliningCostFeatures(
328329
CallBase &Call, TargetTransformInfo &CalleeTTI,
329330
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
330331
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
332+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
331333
ProfileSummaryInfo *PSI = nullptr,
332334
OptimizationRemarkEmitter *ORE = nullptr);
333335

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
249249
/// Getter for BlockFrequencyInfo
250250
function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
251251

252+
/// Getter for TargetLibraryInfo
253+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
254+
252255
/// Profile summary information.
253256
ProfileSummaryInfo *PSI;
254257

@@ -433,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
433436
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
434437
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
435438
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
439+
bool isLoweredToCall(Function *F, CallBase &Call);
436440

437441
/// Return true if the given argument to the function being considered for
438442
/// inlining has the given attribute set either at the call site or the
@@ -492,13 +496,15 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
492496
bool visitUnreachableInst(UnreachableInst &I);
493497

494498
public:
495-
CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
496-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
497-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
498-
ProfileSummaryInfo *PSI = nullptr,
499-
OptimizationRemarkEmitter *ORE = nullptr)
499+
CallAnalyzer(
500+
Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
501+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
502+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
503+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
504+
ProfileSummaryInfo *PSI = nullptr,
505+
OptimizationRemarkEmitter *ORE = nullptr)
500506
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
501-
PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
507+
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
502508
CandidateCall(Call) {}
503509

504510
InlineResult analyze();
@@ -687,7 +693,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
687693
/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
688694
/// to instantiate the derived class.
689695
InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
690-
GetAssumptionCache, GetBFI, PSI, ORE, false);
696+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
697+
false);
691698
if (CA.analyze().isSuccess()) {
692699
// We were able to inline the indirect call! Subtract the cost from the
693700
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1105,10 +1112,12 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
11051112
const TargetTransformInfo &TTI,
11061113
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
11071114
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
1115+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
11081116
ProfileSummaryInfo *PSI = nullptr,
11091117
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
11101118
bool IgnoreThreshold = false)
1111-
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
1119+
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1120+
ORE),
11121121
ComputeFullInlineCost(OptComputeFullInlineCost ||
11131122
Params.ComputeFullInlineCost || ORE ||
11141123
isCostBenefitAnalysisEnabled()),
@@ -1226,8 +1235,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
12261235
InlineConstants::IndirectCallThreshold;
12271236

12281237
InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
1229-
GetAssumptionCache, GetBFI, PSI, ORE, false,
1230-
true);
1238+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
1239+
false, true);
12311240
if (CA.analyze().isSuccess()) {
12321241
increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
12331242
CA.getCost());
@@ -1353,9 +1362,11 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
13531362
const TargetTransformInfo &TTI,
13541363
function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
13551364
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1365+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
13561366
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
13571367
CallBase &Call)
1358-
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
1368+
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
1369+
PSI) {}
13591370

13601371
const InlineCostFeatures &features() const { return Cost; }
13611372
};
@@ -2258,6 +2269,44 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
22582269
return false;
22592270
}
22602271

2272+
bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
2273+
const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
2274+
LibFunc LF;
2275+
if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
2276+
return TTI.isLoweredToCall(F);
2277+
2278+
switch (LF) {
2279+
case LibFunc_memcpy_chk:
2280+
case LibFunc_memmove_chk:
2281+
case LibFunc_mempcpy_chk:
2282+
case LibFunc_memset_chk: {
2283+
// Calls to __memcpy_chk whose length is known to fit within the object
2284+
// size will eventually be replaced by inline stores. Therefore, these
2285+
// should not incur a call penalty. This is only really relevant on
2286+
// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
2287+
// other platforms use memcpy intrinsics, which are already exempt from the
2288+
// call penalty.
2289+
auto *LenOp = dyn_cast<ConstantInt>(Call.getOperand(2));
2290+
if (!LenOp)
2291+
LenOp = dyn_cast_or_null<ConstantInt>(
2292+
SimplifiedValues.lookup(Call.getOperand(2)));
2293+
auto *ObjSizeOp = dyn_cast<ConstantInt>(Call.getOperand(3));
2294+
if (!ObjSizeOp)
2295+
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
2296+
SimplifiedValues.lookup(Call.getOperand(3)));
2297+
if (LenOp && ObjSizeOp &&
2298+
LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
2299+
return false;
2300+
}
2301+
break;
2302+
}
2303+
default:
2304+
break;
2305+
}
2306+
2307+
return TTI.isLoweredToCall(F);
2308+
}
2309+
22612310
bool CallAnalyzer::visitCallBase(CallBase &Call) {
22622311
if (!onCallBaseVisitStart(Call))
22632312
return true;
@@ -2339,7 +2388,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
23392388
return false;
23402389
}
23412390

2342-
if (TTI.isLoweredToCall(F)) {
2391+
if (isLoweredToCall(F, Call)) {
23432392
onLoweredCall(F, Call, IsIndirectCall);
23442393
}
23452394

@@ -2943,6 +2992,7 @@ std::optional<int> llvm::getInliningCostEstimate(
29432992
CallBase &Call, TargetTransformInfo &CalleeTTI,
29442993
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29452994
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2995+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29462996
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
29472997
const InlineParams Params = {/* DefaultThreshold*/ 0,
29482998
/*HintThreshold*/ {},
@@ -2956,7 +3006,7 @@ std::optional<int> llvm::getInliningCostEstimate(
29563006
/*EnableDeferral*/ true};
29573007

29583008
InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
2959-
GetAssumptionCache, GetBFI, PSI, ORE, true,
3009+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
29603010
/*IgnoreThreshold*/ true);
29613011
auto R = CA.analyze();
29623012
if (!R.isSuccess())
@@ -2968,9 +3018,10 @@ std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
29683018
CallBase &Call, TargetTransformInfo &CalleeTTI,
29693019
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29703020
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3021+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29713022
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
2972-
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
2973-
ORE, *Call.getCalledFunction(), Call);
3023+
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
3024+
PSI, ORE, *Call.getCalledFunction(), Call);
29743025
auto R = CFA.analyze();
29753026
if (!R.isSuccess())
29763027
return std::nullopt;
@@ -3070,7 +3121,7 @@ InlineCost llvm::getInlineCost(
30703121
<< ")\n");
30713122

30723123
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3073-
GetAssumptionCache, GetBFI, PSI, ORE);
3124+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
30743125
InlineResult ShouldInline = CA.analyze();
30753126

30763127
LLVM_DEBUG(CA.dump());
@@ -3262,7 +3313,8 @@ InlineCostAnnotationPrinterPass::run(Function &F,
32623313
continue;
32633314
OptimizationRemarkEmitter ORE(CalledFunction);
32643315
InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
3265-
GetAssumptionCache, nullptr, &PSI, &ORE);
3316+
GetAssumptionCache, nullptr, nullptr, &PSI,
3317+
&ORE);
32663318
ICCA.analyze();
32673319
OS << " Analyzing call of " << CalledFunction->getName()
32683320
<< "... (caller:" << CI->getCaller()->getName() << ")\n";
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt %s -mtriple=arm64-apple-macosx -passes=inline -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s
3+
4+
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
5+
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
6+
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
7+
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
8+
declare ptr @__memset_chk(ptr, i32, i64, i64)
9+
10+
define void @callee(ptr %dst, ptr %src, i64 %size) {
11+
; CHECK-LABEL: define void @callee
12+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
13+
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
14+
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
15+
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
16+
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
17+
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 [[OBJSIZE]])
18+
; CHECK-NEXT: ret void
19+
;
20+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
21+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
22+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
23+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
24+
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
25+
ret void
26+
}
27+
28+
define void @caller(ptr %dst, ptr %src) {
29+
; CHECK-LABEL: define void @caller
30+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
31+
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
32+
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
33+
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
34+
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
35+
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
36+
; CHECK-NEXT: ret void
37+
;
38+
call void @callee(ptr %dst, ptr %src, i64 4)
39+
ret void
40+
}
41+
42+
define void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 %size) {
43+
; CHECK-LABEL: define void @objsize_toosmall_callee
44+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
45+
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
46+
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
47+
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
48+
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 1)
49+
; CHECK-NEXT: ret void
50+
;
51+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
52+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 1)
53+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
54+
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 1)
55+
ret void
56+
}
57+
58+
define void @objsize_toosmall_caller(ptr %dst, ptr %src) {
59+
; CHECK-LABEL: define void @objsize_toosmall_caller
60+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
61+
; CHECK-NEXT: call void @objsize_toosmall_callee(ptr [[DST]], ptr [[SRC]], i64 4)
62+
; CHECK-NEXT: ret void
63+
;
64+
call void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 4)
65+
ret void
66+
}
67+
68+
define void @intrinsics_callee(ptr %dst, ptr %src, i64 %size) {
69+
; CHECK-LABEL: define void @intrinsics_callee
70+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
71+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
72+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
73+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 [[SIZE]], i1 false)
74+
; CHECK-NEXT: ret void
75+
;
76+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
77+
call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
78+
call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 %size, i1 false)
79+
ret void
80+
}
81+
82+
define void @intrinsics_caller(ptr %dst, ptr %src) {
83+
; CHECK-LABEL: define void @intrinsics_caller
84+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
85+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
86+
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
87+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 4, i1 false)
88+
; CHECK-NEXT: ret void
89+
;
90+
call void @intrinsics_callee(ptr %dst, ptr %src, i64 4)
91+
ret void
92+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt %s -mtriple=arm64-apple-macosx -passes='default<O3>' -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s
3+
4+
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
5+
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
6+
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
7+
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
8+
declare ptr @__memset_chk(ptr, i32, i64, i64)
9+
10+
define void @callee_memcpy(ptr %dst, ptr %src, i64 %size) {
11+
; CHECK-LABEL: define void @callee_memcpy
12+
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
14+
; CHECK-NEXT: ret void
15+
;
16+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
17+
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
18+
ret void
19+
}
20+
21+
define void @callee_memmove(ptr %dst, ptr %src, i64 %size) {
22+
; CHECK-LABEL: define void @callee_memmove
23+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
24+
; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
25+
; CHECK-NEXT: ret void
26+
;
27+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
28+
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
29+
ret void
30+
}
31+
32+
define void @callee_mempcpy(ptr %dst, ptr %src, i64 %size) {
33+
; CHECK-LABEL: define void @callee_mempcpy
34+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1]] {
35+
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
36+
; CHECK-NEXT: ret void
37+
;
38+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
39+
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
40+
ret void
41+
}
42+
43+
define void @callee_memset(ptr %dst, i64 %size) {
44+
; CHECK-LABEL: define void @callee_memset
45+
; CHECK-SAME: (ptr [[DST:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
46+
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[SIZE]], i1 false)
47+
; CHECK-NEXT: ret void
48+
;
49+
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
50+
%call.mempcpy = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
51+
ret void
52+
}
53+
54+
define void @caller_memcpy(ptr %dst, ptr %src) {
55+
; CHECK-LABEL: define void @caller_memcpy
56+
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
58+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
59+
; CHECK-NEXT: ret void
60+
;
61+
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
62+
ret void
63+
}
64+
65+
define void @caller_memmove(ptr %dst, ptr %src) {
66+
; CHECK-LABEL: define void @caller_memmove
67+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
68+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
69+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
70+
; CHECK-NEXT: ret void
71+
;
72+
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
73+
ret void
74+
}
75+
76+
define void @caller_mempcpy(ptr %dst, ptr %src) {
77+
; CHECK-LABEL: define void @caller_mempcpy
78+
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
79+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
80+
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
81+
; CHECK-NEXT: ret void
82+
;
83+
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
84+
ret void
85+
}
86+
87+
define void @caller_memset(ptr %dst) {
88+
; CHECK-LABEL: define void @caller_memset
89+
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
90+
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
91+
; CHECK-NEXT: ret void
92+
;
93+
call void @callee_memset(ptr %dst, i64 4)
94+
ret void
95+
}

0 commit comments

Comments
 (0)