Skip to content

Commit 74799f4

Browse files
[memprof] Add call stack IDs to IndexedAllocationInfo (#85888)
The indexed MemProf file has a huge amount of redundancy. In a large internal application, 82% of call stacks, stored in IndexedAllocationInfo::CallStack, are duplicates. We should work toward deduplicating call stacks by referring to them with unique IDs with actual call stacks stored in a separate data structure, much like we refer to memprof::Frame with memprof::FrameId. At the same time, we need to facilitate a graceful transition from the current version of the MemProf format to the next. We should be able to read (but not write) the current version of the MemProf file even after we move onto the next one. With those goals in mind, I propose to have an integer ID next to CallStack in IndexedAllocationInfo to refer to a call stack in a succinct manner. We'll gradually increase the areas of the compiler where IDs and call stacks have one-to-one correspondence and eventually remove the existing CallStack field. This patch adds call stack ID, named CSId, to IndexedAllocationInfo and teaches the raw profile reader to compute unique call stack IDs and store them in the new field. It does not introduce any user of the call stack IDs yet, except in verifyFunctionProfileData.
1 parent 7c9b522 commit 74799f4

File tree

5 files changed

+58
-6
lines changed

5 files changed

+58
-6
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef LLVM_PROFILEDATA_MEMPROF_H_
22
#define LLVM_PROFILEDATA_MEMPROF_H_
33

4+
#include "llvm/ADT/MapVector.h"
45
#include "llvm/ADT/STLFunctionalExtras.h"
56
#include "llvm/ADT/SmallVector.h"
67
#include "llvm/IR/GlobalValue.h"
@@ -252,18 +253,26 @@ struct Frame {
252253
}
253254
};
254255

256+
// A type representing the index into the table of call stacks.
257+
using CallStackId = uint64_t;
258+
255259
// Holds allocation information in a space efficient format where frames are
256260
// represented using unique identifiers.
257261
struct IndexedAllocationInfo {
258262
// The dynamic calling context for the allocation in bottom-up (leaf-to-root)
259263
// order. Frame contents are stored out-of-line.
264+
// TODO: Remove once we fully transition to CSId.
260265
llvm::SmallVector<FrameId> CallStack;
266+
// Conceptually the same as above. We are going to keep both CallStack and
267+
// CallStackId while we are transitioning from CallStack to CallStackId.
268+
CallStackId CSId = 0;
261269
// The statistics obtained from the runtime for the allocation.
262270
PortableMemInfoBlock Info;
263271

264272
IndexedAllocationInfo() = default;
265-
IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
266-
: CallStack(CS.begin(), CS.end()), Info(MB) {}
273+
IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
274+
const MemInfoBlock &MB)
275+
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}
267276

268277
// Returns the size in bytes when this allocation info struct is serialized.
269278
size_t serializedSize() const {
@@ -622,6 +631,16 @@ class FrameLookupTrait {
622631
return Frame::deserialize(D);
623632
}
624633
};
634+
635+
// Compute a CallStackId for a given call stack.
636+
CallStackId hashCallStack(ArrayRef<FrameId> CS);
637+
638+
// Verify that each CallStackId is computed with hashCallStack. This function
639+
// is intended to help transition from CallStack to CSId in
640+
// IndexedAllocationInfo.
641+
void verifyFunctionProfileData(
642+
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
643+
&FunctionProfileData);
625644
} // namespace memprof
626645
} // namespace llvm
627646

llvm/lib/ProfileData/MemProf.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
#include "llvm/IR/Function.h"
44
#include "llvm/ProfileData/InstrProf.h"
55
#include "llvm/ProfileData/SampleProf.h"
6+
#include "llvm/Support/BLAKE3.h"
67
#include "llvm/Support/Endian.h"
78
#include "llvm/Support/EndianStream.h"
9+
#include "llvm/Support/HashBuilder.h"
810

911
namespace llvm {
1012
namespace memprof {
@@ -117,5 +119,28 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
117119
return Result;
118120
}
119121

122+
CallStackId hashCallStack(ArrayRef<FrameId> CS) {
123+
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
124+
HashBuilder;
125+
for (FrameId F : CS)
126+
HashBuilder.add(F);
127+
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
128+
CallStackId CSId;
129+
std::memcpy(&CSId, Hash.data(), sizeof(Hash));
130+
return CSId;
131+
}
132+
133+
void verifyFunctionProfileData(
134+
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
135+
&FunctionProfileData) {
136+
for (const auto &[GUID, Record] : FunctionProfileData) {
137+
(void)GUID;
138+
for (const auto &AS : Record.AllocSites) {
139+
assert(AS.CSId == hashCallStack(AS.CallStack));
140+
(void)AS;
141+
}
142+
}
143+
}
144+
120145
} // namespace memprof
121146
} // namespace llvm

llvm/lib/ProfileData/RawMemProfReader.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,14 +446,16 @@ Error RawMemProfReader::mapRawProfileToRecords() {
446446
Callstack.append(Frames.begin(), Frames.end());
447447
}
448448

449+
CallStackId CSId = hashCallStack(Callstack);
450+
449451
// We attach the memprof record to each function bottom-up including the
450452
// first non-inline frame.
451453
for (size_t I = 0; /*Break out using the condition below*/; I++) {
452454
const Frame &F = idToFrame(Callstack[I]);
453455
auto Result =
454456
FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
455457
IndexedMemProfRecord &Record = Result.first->second;
456-
Record.AllocSites.emplace_back(Callstack, Entry.second);
458+
Record.AllocSites.emplace_back(Callstack, CSId, Entry.second);
457459

458460
if (!F.IsInlineFrame)
459461
break;
@@ -471,6 +473,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
471473
}
472474
}
473475

476+
verifyFunctionProfileData(FunctionProfileData);
477+
474478
return Error::success();
475479
}
476480

llvm/unittests/ProfileData/InstrProfTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ IndexedMemProfRecord makeRecord(
366366
const MemInfoBlock &Block = MemInfoBlock()) {
367367
llvm::memprof::IndexedMemProfRecord MR;
368368
for (const auto &Frames : AllocFrames)
369-
MR.AllocSites.emplace_back(Frames, Block);
369+
MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
370+
Block);
370371
for (const auto &Frames : CallSiteFrames)
371372
MR.CallSites.push_back(Frames);
372373
return MR;

llvm/unittests/ProfileData/MemProfTest.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ TEST(MemProf, RecordSerializationRoundTrip) {
280280
IndexedMemProfRecord Record;
281281
for (const auto &ACS : AllocCallStacks) {
282282
// Use the same info block for both allocation sites.
283-
Record.AllocSites.emplace_back(ACS, Info);
283+
Record.AllocSites.emplace_back(ACS, llvm::memprof::hashCallStack(ACS),
284+
Info);
284285
}
285286
Record.CallSites.assign(CallSites);
286287

@@ -376,7 +377,9 @@ TEST(MemProf, BaseMemProfReader) {
376377
Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
377378
Block.TotalLifetime = 200001;
378379
std::array<FrameId, 2> CallStack{F1.hash(), F2.hash()};
379-
FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*MB=*/Block);
380+
FakeRecord.AllocSites.emplace_back(
381+
/*CS=*/CallStack, /*CSId=*/llvm::memprof::hashCallStack(CallStack),
382+
/*MB=*/Block);
380383
ProfData.insert({F1.hash(), FakeRecord});
381384

382385
MemProfReader Reader(FrameIdMap, ProfData);

0 commit comments

Comments
 (0)