Skip to content

[memprof] Add Version2 of the indexed MemProf format #89100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,8 @@ using MemProfRecordHashTable =
OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
using MemProfFrameHashTable =
OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
using MemProfCallStackHashTable =
OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;

template <typename HashTableImpl>
class InstrProfReaderItaniumRemapper;
Expand Down Expand Up @@ -666,6 +668,8 @@ class IndexedInstrProfReader : public InstrProfReader {
std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
/// MemProf frame profile data on-disk indexed via frame id.
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
/// MemProf call stack data on-disk indexed via call stack id.
std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
/// VTableNamePtr points to the beginning of compressed vtable names.
/// When a symtab is constructed from profiles by llvm-profdata, the list of
/// names could be decompressed based on `VTableNamePtr` and
Expand Down
15 changes: 14 additions & 1 deletion llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ class InstrProfWriter {
// inline.
llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;

// A map to hold call stack id to call stacks.
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
MemProfCallStackData;

// List of binary ids.
std::vector<llvm::object::BuildID> BinaryIds;

Expand Down Expand Up @@ -114,6 +118,12 @@ class InstrProfWriter {
bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
function_ref<void(Error)> Warn);

/// Add a call stack identified by the hash of the contents of the call stack
/// in \p CallStack.
bool addMemProfCallStack(const memprof::CallStackId CSId,
const llvm::SmallVector<memprof::FrameId> &CallStack,
function_ref<void(Error)> Warn);

// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);

Expand Down Expand Up @@ -187,9 +197,12 @@ class InstrProfWriter {
return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
}

// Internal interface for testing purpose only.
// Internal interfaces for testing purpose only.
void setValueProfDataEndianness(llvm::endianness Endianness);
void setOutputSparse(bool Sparse);
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
MemProfVersionRequested = Version;
}
// Compute the overlap b/w this object and Other. Program level result is
// stored in Overlap and function level result is stored in FuncLevelOverlap.
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
Expand Down
92 changes: 91 additions & 1 deletion llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t {
};

constexpr uint64_t MinimumSupportedVersion = Version0;
constexpr uint64_t MaximumSupportedVersion = Version1;
constexpr uint64_t MaximumSupportedVersion = Version2;

// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
Expand Down Expand Up @@ -633,6 +633,96 @@ class FrameLookupTrait {
}
};

// Trait for writing call stacks to the on-disk hash table.
class CallStackWriterTrait {
public:
using key_type = CallStackId;
using key_type_ref = CallStackId;

using data_type = llvm::SmallVector<FrameId>;
using data_type_ref = llvm::SmallVector<FrameId> &;

using hash_value_type = CallStackId;
using offset_type = uint64_t;

static hash_value_type ComputeHash(key_type_ref K) { return K; }

static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
using namespace support;
endian::Writer LE(Out, llvm::endianness::little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
offset_type M = sizeof(FrameId) * V.size();
LE.write<offset_type>(M);
return std::make_pair(N, M);
}

void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
using namespace support;
endian::Writer LE(Out, llvm::endianness::little);
LE.write<key_type>(K);
}

void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
using namespace support;
endian::Writer LE(Out, llvm::endianness::little);
// Emit the frames. We do not explicitly emit the length of the vector
// because it can be inferred from the data length.
for (FrameId F : V)
LE.write<FrameId>(F);
}
};

// Trait for reading call stack mappings from the on-disk hash table.
class CallStackLookupTrait {
public:
using data_type = const llvm::SmallVector<FrameId>;
using internal_key_type = CallStackId;
using external_key_type = CallStackId;
using hash_value_type = CallStackId;
using offset_type = uint64_t;

static bool EqualKey(internal_key_type A, internal_key_type B) {
return A == B;
}
static uint64_t GetInternalKey(internal_key_type K) { return K; }
static uint64_t GetExternalKey(external_key_type K) { return K; }

hash_value_type ComputeHash(internal_key_type K) { return K; }

static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char *&D) {
using namespace support;

offset_type KeyLen =
endian::readNext<offset_type, llvm::endianness::little>(D);
offset_type DataLen =
endian::readNext<offset_type, llvm::endianness::little>(D);
return std::make_pair(KeyLen, DataLen);
}

uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
using namespace support;
return endian::readNext<external_key_type, llvm::endianness::little>(D);
}

data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
using namespace support;
llvm::SmallVector<FrameId> CS;
// Derive the number of frames from the data length.
uint64_t NumFrames = Length / sizeof(FrameId);
assert(Length % sizeof(FrameId) == 0);
CS.reserve(NumFrames);
for (size_t I = 0; I != NumFrames; ++I) {
FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
CS.push_back(F);
}
return CS;
}
};

// Compute a CallStackId for a given call stack.
CallStackId hashCallStack(ArrayRef<FrameId> CS);

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/ProfileData/MemProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ class MemProfReader {
return IdToFrame;
}

// Return a const reference to the internal Id to call stacks.
const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
getCallStacks() const {
return CSIdToCallStack;
}

// Return a const reference to the internal function profile data.
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
getProfileData() const {
Expand Down
66 changes: 59 additions & 7 deletions llvm/lib/ProfileData/InstrProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() {

// Read the first 64-bit word, which may be RecordTableOffset in
// memprof::MemProfVersion0 or the MemProf version number in
// memprof::MemProfVersion1.
// memprof::MemProfVersion1 and above.
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);

memprof::IndexedVersion Version = memprof::Version0;
if (FirstWord == memprof::Version1) {
if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
// Everything is good. We can proceed to deserialize the rest.
Version = memprof::Version1;
Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else if (FirstWord >= 24) {
// This is a heuristic/hack to detect memprof::MemProfVersion0,
// which does not have a version field in the header.
Expand Down Expand Up @@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() {
const uint64_t FrameTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);

// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
uint64_t CallStackPayloadOffset = 0;
// The value returned from CallStackTableGenerator.Emit.
uint64_t CallStackTableOffset = 0;
if (Version >= memprof::Version2) {
CallStackPayloadOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
CallStackTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
}

// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
Expand All @@ -1296,20 +1308,30 @@ Error IndexedInstrProfReader::readHeader() {
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Ptr,
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));

// Initialize the frame table reader with the payload and bucket offsets.
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
/*Buckets=*/Start + FrameTableOffset,
/*Payload=*/Start + FramePayloadOffset,
/*Base=*/Start, memprof::FrameLookupTrait()));

if (Version >= memprof::Version2)
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
/*Buckets=*/Start + CallStackTableOffset,
/*Payload=*/Start + CallStackPayloadOffset,
/*Base=*/Start, memprof::CallStackLookupTrait()));

#ifdef EXPENSIVE_CHECKS
// Go through all the records and verify that CSId has been correctly
// populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we
// would defeat the purpose of OnDiskIterableChainedHashTable.
for (const auto &Record : MemProfRecordTable->data())
verifyIndexedMemProfRecord(Record);
// Note that we can compare CSId against actual call stacks only for
// Version0 and Version1 because IndexedAllocationInfo::CallStack and
// IndexedMemProfRecord::CallSites are not populated in Version2.
if (Version <= memprof::Version1)
for (const auto &Record : MemProfRecordTable->data())
verifyIndexedMemProfRecord(Record);
#endif
}

Expand Down Expand Up @@ -1502,14 +1524,44 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
return *FrIter;
};

memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
// Setup a callback to convert call stack ids to call stacks using the on-disk
// hash table.
std::optional<memprof::CallStackId> LastUnmappedCSId;
auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = MemProfCallStackTable->find(CSId);
if (CSIter == MemProfCallStackTable->end()) {
LastUnmappedCSId = CSId;
} else {
const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
Frames.reserve(CS.size());
for (memprof::FrameId Id : CS)
Frames.push_back(IdToFrameCallback(Id));
}
return Frames;
};

const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
memprof::MemProfRecord Record;
if (MemProfCallStackTable)
Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
else
Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);

// Check that all frame ids were successfully converted to frames.
if (LastUnmappedFrameId) {
return make_error<InstrProfError>(instrprof_error::hash_mismatch,
"memprof frame not found for frame id " +
Twine(*LastUnmappedFrameId));
}

// Check that all call stack ids were successfully converted to call stacks.
if (LastUnmappedCSId) {
return make_error<InstrProfError>(
instrprof_error::hash_mismatch,
"memprof call stack not found for call stack id " +
Twine(*LastUnmappedCSId));
}
return Record;
}

Expand Down
Loading