Skip to content

Commit b7feccb

Browse files
[memprof] Dump call site matching information (#125130)
MemProfiler.cpp annotates the IR with the memory profile so that we can later duplicate context. This patch dumps the entire inline call stack for each call site match.
1 parent 35afd02 commit b7feccb

File tree

3 files changed

+144
-1
lines changed

3 files changed

+144
-1
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,7 @@ static void
970970
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971971
const TargetLibraryInfo &TLI,
972972
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973+
std::set<std::vector<uint64_t>> &MatchedCallSites,
973974
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
974975
auto &Ctx = M.getContext();
975976
// Previously we used getIRPGOFuncName() here. If F is local linkage,
@@ -1210,6 +1211,13 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
12101211
addCallsiteMetadata(I, InlinedCallStack, Ctx);
12111212
// Only need to find one with a matching call stack and add a single
12121213
// callsite metadata.
1214+
1215+
// Accumulate call site matching information upon request.
1216+
if (ClPrintMemProfMatchInfo) {
1217+
std::vector<uint64_t> CallStack;
1218+
append_range(CallStack, InlinedCallStack);
1219+
MatchedCallSites.insert(std::move(CallStack));
1220+
}
12131221
break;
12141222
}
12151223
}
@@ -1266,13 +1274,17 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12661274
// it to an allocation in the IR.
12671275
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
12681276

1277+
// Set of the matched call sites, each expressed as a sequence of an inline
1278+
// call stack.
1279+
std::set<std::vector<uint64_t>> MatchedCallSites;
1280+
12691281
for (auto &F : M) {
12701282
if (F.isDeclaration())
12711283
continue;
12721284

12731285
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
12741286
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1275-
UndriftMaps);
1287+
MatchedCallSites, UndriftMaps);
12761288
}
12771289

12781290
if (ClPrintMemProfMatchInfo) {
@@ -1281,6 +1293,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12811293
<< " context with id " << Id << " has total profiled size "
12821294
<< Info.TotalSize << (Info.Matched ? " is" : " not")
12831295
<< " matched\n";
1296+
1297+
for (const auto &CallStack : MatchedCallSites) {
1298+
errs() << "MemProf callsite match for inline call stack";
1299+
for (uint64_t StackId : CallStack)
1300+
errs() << " " << StackId;
1301+
errs() << "\n";
1302+
}
12841303
}
12851304

12861305
return PreservedAnalyses::none();
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; Tests that the compiler dumps call site matches upon request.
2+
;
3+
; The test case is generated from:
4+
;
5+
; // main
6+
; // |
7+
; // f1 (noinline)
8+
; // |
9+
; // f2
10+
; // |
11+
; // f3 (noinline)
12+
; // |
13+
; // new
14+
;
15+
; __attribute__((noinline)) char *f3() { return ::new char[4]; }
16+
;
17+
; static char *f2() { return f3(); }
18+
;
19+
; __attribute__((noinline)) static char *f1() { return f2(); }
20+
;
21+
; int main() {
22+
; f1();
23+
; return 0;
24+
; }
25+
;
26+
; Here we expect to match two inline call stacks:
27+
;
28+
; - [main]
29+
; - [f1, f2]
30+
;
31+
; Note that f3 is considered to be an allocation site, not a call site, because
32+
; it directly calls new after inlining.
33+
34+
; REQUIRES: x86_64-linux
35+
; RUN: split-file %s %t
36+
; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata
37+
; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
38+
39+
;--- memprof-dump-matched-call-site.yaml
40+
---
41+
HeapProfileRecords:
42+
- GUID: main
43+
AllocSites: []
44+
CallSites:
45+
- - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
46+
- GUID: _ZL2f1v
47+
AllocSites: []
48+
CallSites:
49+
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
50+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
51+
- GUID: _ZL2f2v
52+
AllocSites: []
53+
CallSites:
54+
- - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
55+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
56+
- GUID: _Z2f3v
57+
AllocSites:
58+
- Callstack:
59+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
60+
- { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
61+
- { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
62+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
63+
MemInfoBlock:
64+
AllocCount: 1
65+
TotalSize: 4
66+
TotalLifetime: 0
67+
TotalLifetimeAccessDensity: 0
68+
CallSites: []
69+
...
70+
;--- memprof-dump-matched-call-site.ll
71+
; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
72+
; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
73+
; CHECK: MemProf callsite match for inline call stack 5401059281181789382
74+
75+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
76+
target triple = "x86_64-unknown-linux-gnu"
77+
78+
define ptr @_Z2f3v() {
79+
entry:
80+
%call = call ptr @_Znam(i64 0), !dbg !3
81+
ret ptr null
82+
}
83+
84+
declare ptr @_Znam(i64)
85+
86+
define i32 @main() {
87+
entry:
88+
call void @_ZL2f1v(), !dbg !7
89+
ret i32 0
90+
}
91+
92+
define void @_ZL2f1v() {
93+
entry:
94+
%call.i = call ptr @_Z2f3v(), !dbg !9
95+
ret void
96+
}
97+
98+
!llvm.dbg.cu = !{!0}
99+
!llvm.module.flags = !{!2}
100+
101+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
102+
!1 = !DIFile(filename: "match.cc", directory: "/")
103+
!2 = !{i32 2, !"Debug Info Version", i32 3}
104+
!3 = !DILocation(line: 11, column: 47, scope: !4)
105+
!4 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
106+
!5 = !DISubroutineType(types: !6)
107+
!6 = !{}
108+
!7 = !DILocation(line: 18, column: 3, scope: !8)
109+
!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !5, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
110+
!9 = !DILocation(line: 13, column: 28, scope: !10, inlinedAt: !11)
111+
!10 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 13, type: !5, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
112+
!11 = distinct !DILocation(line: 15, column: 54, scope: !12)
113+
!12 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 15, type: !13, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
114+
!13 = !DISubroutineType(cc: DW_CC_nocall, types: !6)

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@
101101
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
102102
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
103103
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
104+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
105+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
106+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
107+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2104812325165620841
108+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 6281715513834610934
109+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8467819354083268568
110+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8690657650969109624
111+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 9086428284934609951
112+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12481870273128938184
113+
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12699492813229484831
104114

105115
; ModuleID = 'memprof.cc'
106116
source_filename = "memprof.cc"

0 commit comments

Comments
 (0)