Skip to content

[memprof] Add call stack IDs to IndexedAllocationInfo #85888

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef LLVM_PROFILEDATA_MEMPROF_H_
#define LLVM_PROFILEDATA_MEMPROF_H_

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/GlobalValue.h"
Expand Down Expand Up @@ -252,18 +253,26 @@ struct Frame {
}
};

// A type representing the index into the table of call stacks.
using CallStackId = uint64_t;

// Holds allocation information in a space efficient format where frames are
// represented using unique identifiers.
struct IndexedAllocationInfo {
// The dynamic calling context for the allocation in bottom-up (leaf-to-root)
// order. Frame contents are stored out-of-line.
// TODO: Remove once we fully transition to CSId.
llvm::SmallVector<FrameId> CallStack;
// Conceptually the same as above. We are going to keep both CallStack and
// CallStackId while we are transitioning from CallStack to CallStackId.
CallStackId CSId = 0;
// The statistics obtained from the runtime for the allocation.
PortableMemInfoBlock Info;

IndexedAllocationInfo() = default;
IndexedAllocationInfo(ArrayRef<FrameId> CS, const MemInfoBlock &MB)
: CallStack(CS.begin(), CS.end()), Info(MB) {}
IndexedAllocationInfo(ArrayRef<FrameId> CS, CallStackId CSId,
const MemInfoBlock &MB)
: CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {}

// Returns the size in bytes when this allocation info struct is serialized.
size_t serializedSize() const {
Expand Down Expand Up @@ -622,6 +631,16 @@ class FrameLookupTrait {
return Frame::deserialize(D);
}
};

// Compute a CallStackId for a given call stack.
CallStackId hashCallStack(ArrayRef<FrameId> CS);

// Verify that each CallStackId is computed with hashCallStack. This function
// is intended to help transition from CallStack to CSId in
// IndexedAllocationInfo.
void verifyFunctionProfileData(
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
&FunctionProfileData);
} // namespace memprof
} // namespace llvm

Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#include "llvm/IR/Function.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/HashBuilder.h"

namespace llvm {
namespace memprof {
Expand Down Expand Up @@ -117,5 +119,28 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
return Result;
}

CallStackId hashCallStack(ArrayRef<FrameId> CS) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
HashBuilder;
for (FrameId F : CS)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the plan to move the FrameId to also use BLAKE3? (Doesn't need to be in this patch, but wondering if we can do that so we get more consistency with the compiler's hashes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't thinking about that, but yes, we could move the FrameID to also use BLAKE3.

HashBuilder.add(F);
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
CallStackId CSId;
std::memcpy(&CSId, Hash.data(), sizeof(Hash));
return CSId;
}

void verifyFunctionProfileData(
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
&FunctionProfileData) {
for (const auto &[GUID, Record] : FunctionProfileData) {
(void)GUID;
for (const auto &AS : Record.AllocSites) {
assert(AS.CSId == hashCallStack(AS.CallStack));
(void)AS;
}
}
}

} // namespace memprof
} // namespace llvm
6 changes: 5 additions & 1 deletion llvm/lib/ProfileData/RawMemProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,14 +445,16 @@ Error RawMemProfReader::mapRawProfileToRecords() {
Callstack.append(Frames.begin(), Frames.end());
}

CallStackId CSId = hashCallStack(Callstack);

// We attach the memprof record to each function bottom-up including the
// first non-inline frame.
for (size_t I = 0; /*Break out using the condition below*/; I++) {
const Frame &F = idToFrame(Callstack[I]);
auto Result =
FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
IndexedMemProfRecord &Record = Result.first->second;
Record.AllocSites.emplace_back(Callstack, Entry.second);
Record.AllocSites.emplace_back(Callstack, CSId, Entry.second);

if (!F.IsInlineFrame)
break;
Expand All @@ -470,6 +472,8 @@ Error RawMemProfReader::mapRawProfileToRecords() {
}
}

verifyFunctionProfileData(FunctionProfileData);

return Error::success();
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/unittests/ProfileData/InstrProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,8 @@ IndexedMemProfRecord makeRecord(
const MemInfoBlock &Block = MemInfoBlock()) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &Frames : AllocFrames)
MR.AllocSites.emplace_back(Frames, Block);
MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
Block);
for (const auto &Frames : CallSiteFrames)
MR.CallSites.push_back(Frames);
return MR;
Expand Down
7 changes: 5 additions & 2 deletions llvm/unittests/ProfileData/MemProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,8 @@ TEST(MemProf, RecordSerializationRoundTrip) {
IndexedMemProfRecord Record;
for (const auto &ACS : AllocCallStacks) {
// Use the same info block for both allocation sites.
Record.AllocSites.emplace_back(ACS, Info);
Record.AllocSites.emplace_back(ACS, llvm::memprof::hashCallStack(ACS),
Info);
}
Record.CallSites.assign(CallSites);

Expand Down Expand Up @@ -376,7 +377,9 @@ TEST(MemProf, BaseMemProfReader) {
Block.AllocCount = 1U, Block.TotalAccessDensity = 4,
Block.TotalLifetime = 200001;
std::array<FrameId, 2> CallStack{F1.hash(), F2.hash()};
FakeRecord.AllocSites.emplace_back(/*CS=*/CallStack, /*MB=*/Block);
FakeRecord.AllocSites.emplace_back(
/*CS=*/CallStack, /*CSId=*/llvm::memprof::hashCallStack(CallStack),
/*MB=*/Block);
ProfData.insert({F1.hash(), FakeRecord});

MemProfReader Reader(FrameIdMap, ProfData);
Expand Down