Skip to content

Commit c7b421d

Browse files
authored
[MemProf] Attach value profile metadata to the IR using CalleeGuids. (#141164)
Use the newly introduced CalleeGuids in CallSiteInfo to annotate the IR where necessary with value profile metadata. Use a synthetic count of 1 since we don't have actual counts in the profile collection.
1 parent 0f00a96 commit c7b421d

File tree

2 files changed

+171
-10
lines changed

2 files changed

+171
-10
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@ static cl::opt<bool>
178178
cl::desc("Salvage stale MemProf profile"),
179179
cl::init(false), cl::Hidden);
180180

181+
static cl::opt<bool> ClMemProfAttachCalleeGuids(
182+
"memprof-attach-calleeguids",
183+
cl::desc(
184+
"Attach calleeguids as value profile metadata for indirect calls."),
185+
cl::init(true), cl::Hidden);
186+
181187
extern cl::opt<bool> MemProfReportHintedSizes;
182188
extern cl::opt<unsigned> MinClonedColdBytePercent;
183189
extern cl::opt<unsigned> MinCallsiteColdBytePercent;
@@ -952,6 +958,46 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
952958
UndriftCallStack(CS.Frames);
953959
}
954960

961+
// Helper function to process CalleeGuids and create value profile metadata
962+
static void addVPMetadata(Module &M, Instruction &I,
963+
ArrayRef<GlobalValue::GUID> CalleeGuids) {
964+
if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())
965+
return;
966+
967+
if (I.getMetadata(LLVMContext::MD_prof)) {
968+
uint64_t Unused;
969+
// TODO: When merging is implemented, increase this to a typical ICP value
970+
// (e.g., 3-6) For now, we only need to check if existing data exists, so 1
971+
// is sufficient
972+
auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,
973+
/*MaxNumValueData=*/1, Unused);
974+
// We don't know how to merge value profile data yet.
975+
if (!ExistingVD.empty()) {
976+
return;
977+
}
978+
}
979+
980+
SmallVector<InstrProfValueData, 4> VDs;
981+
uint64_t TotalCount = 0;
982+
983+
for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {
984+
InstrProfValueData VD;
985+
VD.Value = CalleeGUID;
986+
// For MemProf, we don't have actual call counts, so we assign
987+
// a weight of 1 to each potential target.
988+
// TODO: Consider making this weight configurable or increasing it to
989+
// improve effectiveness for ICP.
990+
VD.Count = 1;
991+
VDs.push_back(VD);
992+
TotalCount += VD.Count;
993+
}
994+
995+
if (!VDs.empty()) {
996+
annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,
997+
VDs.size());
998+
}
999+
}
1000+
9551001
static void
9561002
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
9571003
const TargetLibraryInfo &TLI,
@@ -1020,15 +1066,35 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10201066
// Build maps of the location hash to all profile data with that leaf location
10211067
// (allocation info and the callsites).
10221068
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1023-
// A hash function for std::unordered_set<ArrayRef<Frame>> to work.
1024-
struct CallStackHash {
1025-
size_t operator()(ArrayRef<Frame> CS) const {
1026-
return computeFullStackId(CS);
1069+
1070+
// Helper struct for maintaining refs to callsite data. As an alternative we
1071+
// could store a pointer to the CallSiteInfo struct but we also need the frame
1072+
// index. Using ArrayRefs instead makes it a little easier to read.
1073+
struct CallSiteEntry {
1074+
// Subset of frames for the corresponding CallSiteInfo.
1075+
ArrayRef<Frame> Frames;
1076+
// Potential targets for indirect calls.
1077+
ArrayRef<GlobalValue::GUID> CalleeGuids;
1078+
1079+
// Only compare Frame contents.
1080+
// Use pointer-based equality instead of ArrayRef's operator== which does
1081+
// element-wise comparison. We want to check if it's the same slice of the
1082+
// underlying array, not just equivalent content.
1083+
bool operator==(const CallSiteEntry &Other) const {
1084+
return Frames.data() == Other.Frames.data() &&
1085+
Frames.size() == Other.Frames.size();
1086+
}
1087+
};
1088+
1089+
struct CallSiteEntryHash {
1090+
size_t operator()(const CallSiteEntry &Entry) const {
1091+
return computeFullStackId(Entry.Frames);
10271092
}
10281093
};
1094+
10291095
// For the callsites we need to record slices of the frame array (see comments
1030-
// below where the map entries are added).
1031-
std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
1096+
// below where the map entries are added) along with their CalleeGuids.
1097+
std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>
10321098
LocHashToCallSites;
10331099
for (auto &AI : MemProfRec->AllocSites) {
10341100
NumOfMemProfAllocContextProfiles++;
@@ -1046,8 +1112,10 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
10461112
unsigned Idx = 0;
10471113
for (auto &StackFrame : CS.Frames) {
10481114
uint64_t StackId = computeStackId(StackFrame);
1049-
LocHashToCallSites[StackId].insert(
1050-
ArrayRef<Frame>(CS.Frames).drop_front(Idx++));
1115+
ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);
1116+
ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);
1117+
LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});
1118+
10511119
ProfileHasColumns |= StackFrame.Column;
10521120
// Once we find this function, we can stop recording.
10531121
if (StackFrame.Function == FuncGUID)
@@ -1191,13 +1259,18 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
11911259
// Otherwise, add callsite metadata. If we reach here then we found the
11921260
// instruction's leaf location in the callsites map and not the allocation
11931261
// map.
1194-
for (auto CallStackIdx : CallSitesIter->second) {
1262+
for (const auto &CallSiteEntry : CallSitesIter->second) {
11951263
// If we found and thus matched all frames on the call, create and
11961264
// attach call stack metadata.
1197-
if (stackFrameIncludesInlinedCallStack(CallStackIdx,
1265+
if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,
11981266
InlinedCallStack)) {
11991267
NumOfMemProfMatchedCallSites++;
12001268
addCallsiteMetadata(I, InlinedCallStack, Ctx);
1269+
1270+
// Try to attach indirect call metadata if possible.
1271+
if (!CalledFunction)
1272+
addVPMetadata(M, I, CallSiteEntry.CalleeGuids);
1273+
12011274
// Only need to find one with a matching call stack and add a single
12021275
// callsite metadata.
12031276

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; RUN: split-file %s %t
2+
3+
;; Basic functionality with flag toggle
4+
; RUN: llvm-profdata merge --memprof-version=4 %t/basic.yaml -o %t/basic.memprofdata
5+
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=false -S 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLE
6+
; RUN: opt < %t/basic.ll -passes='memprof-use<profile-filename=%t/basic.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-ENABLE
7+
8+
;; FDO conflict handling
9+
; RUN: llvm-profdata merge --memprof-version=4 %t/fdo_conflict.yaml -o %t/fdo_conflict.memprofdata
10+
; RUN: opt < %t/fdo_conflict.ll -passes='memprof-use<profile-filename=%t/fdo_conflict.memprofdata>' -memprof-attach-calleeguids=true -S 2>&1 | FileCheck %s --check-prefix=CHECK-FDO
11+
12+
;--- basic.yaml
13+
---
14+
HeapProfileRecords:
15+
- GUID: _Z3barv
16+
AllocSites: []
17+
CallSites:
18+
- Frames:
19+
- { Function: _Z3barv, LineOffset: 3, Column: 5, IsInlineFrame: false }
20+
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
21+
...
22+
23+
;--- basic.ll
24+
define dso_local void @_Z3barv() !dbg !4 {
25+
entry:
26+
%fp = alloca ptr, align 8
27+
%0 = load ptr, ptr %fp, align 8
28+
call void %0(), !dbg !5
29+
; CHECK-ENABLE: call void %0(), {{.*}} !prof !6
30+
; CHECK-DISABLE-NOT: !prof
31+
ret void
32+
}
33+
34+
; CHECK-ENABLE: !6 = !{!"VP", i32 0, i64 2, i64 1311768467463790320, i64 1, i64 2541551405711093505, i64 1}
35+
36+
!llvm.module.flags = !{!2, !3}
37+
38+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
39+
!1 = !DIFile(filename: "t", directory: "/")
40+
!2 = !{i32 7, !"Dwarf Version", i32 5}
41+
!3 = !{i32 2, !"Debug Info Version", i32 3}
42+
!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, unit: !0)
43+
!5 = !DILocation(line: 4, column: 5, scope: !4)
44+
45+
;--- fdo_conflict.yaml
46+
---
47+
HeapProfileRecords:
48+
- GUID: _Z3foov
49+
AllocSites: []
50+
CallSites:
51+
- Frames:
52+
- { Function: _Z3foov, LineOffset: 3, Column: 5, IsInlineFrame: false }
53+
CalleeGuids: [0x123456789abcdef0, 0x23456789abcdef01]
54+
- Frames:
55+
- { Function: _Z3foov, LineOffset: 5, Column: 5, IsInlineFrame: false }
56+
CalleeGuids: [0x555556789abcdef0, 0x666656789abcdef1]
57+
...
58+
59+
;--- fdo_conflict.ll
60+
define dso_local void @_Z3foov() !dbg !14 {
61+
entry:
62+
%fp = alloca ptr, align 8
63+
%0 = load ptr, ptr %fp, align 8
64+
; This call already has FDO value profile metadata - should NOT be modified
65+
; CHECK-FDO: call void %0(), {{.*}} !prof !6
66+
call void %0(), !dbg !15, !prof !16
67+
68+
%1 = load ptr, ptr %fp, align 8
69+
; This call does NOT have existing metadata - should get MemProf annotation
70+
; CHECK-FDO: call void %1(), {{.*}} !prof !9
71+
call void %1(), !dbg !17
72+
ret void
73+
}
74+
75+
!16 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
76+
77+
; CHECK-FDO: !6 = !{!"VP", i32 0, i64 100, i64 9191153033785521275, i64 80, i64 -1069303473483922844, i64 20}
78+
; CHECK-FDO: !9 = !{!"VP", i32 0, i64 2, i64 6148915942236413680, i64 1, i64 7378680115485269745, i64 1}
79+
80+
!llvm.module.flags = !{!12, !13}
81+
82+
!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !11)
83+
!11 = !DIFile(filename: "t", directory: "/")
84+
!12 = !{i32 7, !"Dwarf Version", i32 5}
85+
!13 = !{i32 2, !"Debug Info Version", i32 3}
86+
!14 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !11, file: !11, line: 1, unit: !10)
87+
!15 = !DILocation(line: 4, column: 5, scope: !14)
88+
!17 = !DILocation(line: 6, column: 5, scope: !14)

0 commit comments

Comments
 (0)