Skip to content

Commit c261bb7

Browse files
[memprof] Deduplicate alloc site matches (#142334)
With: commit 2425626 Author: Kazu Hirata <[email protected]> Date: Sun Jun 1 08:09:58 2025 -0700 we print out a lot of duplicate alloc site matches. This patch partially reverts the patch above. The core idea of using a map to deduplicate entries remains the same, but details are different. Specifically: - This PR uses the [FullStackID, MatchLength] as the key, where MatchLength is the length of an alloc site match. - AllocMatchInfo in this PR no longer has Matched because we always report matches. - AllocMatchInfo in this PR no longer has NumFramesMatched because it has become part of the key. This deduplication roughly halves the amount of messages printed out.
1 parent 77e2e3f commit c261bb7

File tree

2 files changed

+29
-11
lines changed

2 files changed

+29
-11
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,11 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
816816
}
817817
}
818818

819+
struct AllocMatchInfo {
820+
uint64_t TotalSize = 0;
821+
AllocationType AllocType = AllocationType::None;
822+
};
823+
819824
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
820825
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
821826
function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -994,6 +999,8 @@ static void addVPMetadata(Module &M, Instruction &I,
994999
static void readMemprof(Module &M, Function &F,
9951000
IndexedInstrProfReader *MemProfReader,
9961001
const TargetLibraryInfo &TLI,
1002+
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
1003+
&FullStackIdToAllocMatchInfo,
9971004
std::set<std::vector<uint64_t>> &MatchedCallSites,
9981005
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
9991006
OptimizationRemarkEmitter &ORE) {
@@ -1206,11 +1213,9 @@ static void readMemprof(Module &M, Function &F,
12061213
// was requested.
12071214
if (ClPrintMemProfMatchInfo) {
12081215
assert(FullStackId != 0);
1209-
errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
1210-
<< " context with id " << FullStackId
1211-
<< " has total profiled size "
1212-
<< AllocInfo->Info.getTotalSize() << " is matched with "
1213-
<< InlinedCallStack.size() << " frames\n";
1216+
FullStackIdToAllocMatchInfo[std::make_pair(
1217+
FullStackId, InlinedCallStack.size())] = {
1218+
AllocInfo->Info.getTotalSize(), AllocType};
12141219
}
12151220
}
12161221
}
@@ -1325,6 +1330,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
13251330
if (SalvageStaleProfile)
13261331
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
13271332

1333+
// Map from the stack hash and matched frame count of each allocation context
1334+
// in the function profiles to the total profiled size (bytes) and allocation
1335+
// type.
1336+
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
1337+
FullStackIdToAllocMatchInfo;
1338+
13281339
// Set of the matched call sites, each expressed as a sequence of an inline
13291340
// call stack.
13301341
std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1335,11 +1346,18 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
13351346

13361347
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
13371348
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1338-
readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
1339-
ORE);
1349+
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1350+
MatchedCallSites, UndriftMaps, ORE);
13401351
}
13411352

13421353
if (ClPrintMemProfMatchInfo) {
1354+
for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
1355+
auto [Id, Length] = IdLengthPair;
1356+
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1357+
<< " context with id " << Id << " has total profiled size "
1358+
<< Info.TotalSize << " is matched with " << Length << " frames\n";
1359+
}
1360+
13431361
for (const auto &CallStack : MatchedCallSites) {
13441362
errs() << "MemProf callsite match for inline call stack";
13451363
for (uint64_t StackId : CallStack)

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@
111111
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
112112

113113
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
114-
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
115-
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
116-
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
117-
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
118114
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
119115
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
116+
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
117+
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
120118
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
119+
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
120+
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
121121
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
122122
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
123123
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

0 commit comments

Comments
 (0)