Skip to content

Commit 172f6dd

Browse files
[memprof] Add Version2 of the indexed MemProf format (#89100)
This patch adds Version2 of the indexed MemProf format. The new format comes with a hash table from CallStackId to actual call stacks llvm::SmallVector<FrameId>. The rest of the format refers to call stacks with CallStackId. This "values + references" model effectively deduplicates call stacks. Without this patch, a large indexed memprof file of mine shrinks from 4.4GB to 1.6GB, a 64% reduction. This patch does not make Version2 generally available yet as I am planning to make a few more changes to the format.
1 parent 7aad1ee commit 172f6dd

File tree

9 files changed

+362
-18
lines changed

9 files changed

+362
-18
lines changed

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,8 @@ using MemProfRecordHashTable =
560560
OnDiskIterableChainedHashTable<memprof::RecordLookupTrait>;
561561
using MemProfFrameHashTable =
562562
OnDiskIterableChainedHashTable<memprof::FrameLookupTrait>;
563+
using MemProfCallStackHashTable =
564+
OnDiskIterableChainedHashTable<memprof::CallStackLookupTrait>;
563565

564566
template <typename HashTableImpl>
565567
class InstrProfReaderItaniumRemapper;
@@ -666,6 +668,8 @@ class IndexedInstrProfReader : public InstrProfReader {
666668
std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
667669
/// MemProf frame profile data on-disk indexed via frame id.
668670
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
671+
/// MemProf call stack data on-disk indexed via call stack id.
672+
std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
669673
/// VTableNamePtr points to the beginning of compressed vtable names.
670674
/// When a symtab is constructed from profiles by llvm-profdata, the list of
671675
/// names could be decompressed based on `VTableNamePtr` and

llvm/include/llvm/ProfileData/InstrProfWriter.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ class InstrProfWriter {
6060
// inline.
6161
llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
6262

63+
// A map to hold call stack id to call stacks.
64+
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
65+
MemProfCallStackData;
66+
6367
// List of binary ids.
6468
std::vector<llvm::object::BuildID> BinaryIds;
6569

@@ -114,6 +118,12 @@ class InstrProfWriter {
114118
bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
115119
function_ref<void(Error)> Warn);
116120

121+
/// Add a call stack identified by the hash of the contents of the call stack
122+
/// in \p CallStack.
123+
bool addMemProfCallStack(const memprof::CallStackId CSId,
124+
const llvm::SmallVector<memprof::FrameId> &CallStack,
125+
function_ref<void(Error)> Warn);
126+
117127
// Add a binary id to the binary ids list.
118128
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
119129

@@ -187,9 +197,12 @@ class InstrProfWriter {
187197
return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
188198
}
189199

190-
// Internal interface for testing purpose only.
200+
// Internal interfaces for testing purpose only.
191201
void setValueProfDataEndianness(llvm::endianness Endianness);
192202
void setOutputSparse(bool Sparse);
203+
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
204+
MemProfVersionRequested = Version;
205+
}
193206
// Compute the overlap b/w this object and Other. Program level result is
194207
// stored in Overlap and function level result is stored in FuncLevelOverlap.
195208
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ enum IndexedVersion : uint64_t {
2929
};
3030

3131
constexpr uint64_t MinimumSupportedVersion = Version0;
32-
constexpr uint64_t MaximumSupportedVersion = Version1;
32+
constexpr uint64_t MaximumSupportedVersion = Version2;
3333

3434
// Verify that the minimum and maximum satisfy the obvious constraint.
3535
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
@@ -633,6 +633,96 @@ class FrameLookupTrait {
633633
}
634634
};
635635

636+
// Trait for writing call stacks to the on-disk hash table.
637+
class CallStackWriterTrait {
638+
public:
639+
using key_type = CallStackId;
640+
using key_type_ref = CallStackId;
641+
642+
using data_type = llvm::SmallVector<FrameId>;
643+
using data_type_ref = llvm::SmallVector<FrameId> &;
644+
645+
using hash_value_type = CallStackId;
646+
using offset_type = uint64_t;
647+
648+
static hash_value_type ComputeHash(key_type_ref K) { return K; }
649+
650+
static std::pair<offset_type, offset_type>
651+
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
652+
using namespace support;
653+
endian::Writer LE(Out, llvm::endianness::little);
654+
offset_type N = sizeof(K);
655+
LE.write<offset_type>(N);
656+
offset_type M = sizeof(FrameId) * V.size();
657+
LE.write<offset_type>(M);
658+
return std::make_pair(N, M);
659+
}
660+
661+
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
662+
using namespace support;
663+
endian::Writer LE(Out, llvm::endianness::little);
664+
LE.write<key_type>(K);
665+
}
666+
667+
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
668+
offset_type /*Unused*/) {
669+
using namespace support;
670+
endian::Writer LE(Out, llvm::endianness::little);
671+
// Emit the frames. We do not explicitly emit the length of the vector
672+
// because it can be inferred from the data length.
673+
for (FrameId F : V)
674+
LE.write<FrameId>(F);
675+
}
676+
};
677+
678+
// Trait for reading call stack mappings from the on-disk hash table.
679+
class CallStackLookupTrait {
680+
public:
681+
using data_type = const llvm::SmallVector<FrameId>;
682+
using internal_key_type = CallStackId;
683+
using external_key_type = CallStackId;
684+
using hash_value_type = CallStackId;
685+
using offset_type = uint64_t;
686+
687+
static bool EqualKey(internal_key_type A, internal_key_type B) {
688+
return A == B;
689+
}
690+
static uint64_t GetInternalKey(internal_key_type K) { return K; }
691+
static uint64_t GetExternalKey(external_key_type K) { return K; }
692+
693+
hash_value_type ComputeHash(internal_key_type K) { return K; }
694+
695+
static std::pair<offset_type, offset_type>
696+
ReadKeyDataLength(const unsigned char *&D) {
697+
using namespace support;
698+
699+
offset_type KeyLen =
700+
endian::readNext<offset_type, llvm::endianness::little>(D);
701+
offset_type DataLen =
702+
endian::readNext<offset_type, llvm::endianness::little>(D);
703+
return std::make_pair(KeyLen, DataLen);
704+
}
705+
706+
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
707+
using namespace support;
708+
return endian::readNext<external_key_type, llvm::endianness::little>(D);
709+
}
710+
711+
data_type ReadData(uint64_t K, const unsigned char *D, offset_type Length) {
712+
using namespace support;
713+
llvm::SmallVector<FrameId> CS;
714+
// Derive the number of frames from the data length.
715+
uint64_t NumFrames = Length / sizeof(FrameId);
716+
assert(Length % sizeof(FrameId) == 0);
717+
CS.reserve(NumFrames);
718+
for (size_t I = 0; I != NumFrames; ++I) {
719+
FrameId F = endian::readNext<FrameId, llvm::endianness::little>(D);
720+
CS.push_back(F);
721+
}
722+
return CS;
723+
}
724+
};
725+
636726
// Compute a CallStackId for a given call stack.
637727
CallStackId hashCallStack(ArrayRef<FrameId> CS);
638728

llvm/include/llvm/ProfileData/MemProfReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ class MemProfReader {
5151
return IdToFrame;
5252
}
5353

54+
// Return a const reference to the internal Id to call stacks.
55+
const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> &
56+
getCallStacks() const {
57+
return CSIdToCallStack;
58+
}
59+
5460
// Return a const reference to the internal function profile data.
5561
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
5662
getProfileData() const {

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,14 +1249,14 @@ Error IndexedInstrProfReader::readHeader() {
12491249

12501250
// Read the first 64-bit word, which may be RecordTableOffset in
12511251
// memprof::MemProfVersion0 or the MemProf version number in
1252-
// memprof::MemProfVersion1.
1252+
// memprof::MemProfVersion1 and above.
12531253
const uint64_t FirstWord =
12541254
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
12551255

12561256
memprof::IndexedVersion Version = memprof::Version0;
1257-
if (FirstWord == memprof::Version1) {
1257+
if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
12581258
// Everything is good. We can proceed to deserialize the rest.
1259-
Version = memprof::Version1;
1259+
Version = static_cast<memprof::IndexedVersion>(FirstWord);
12601260
} else if (FirstWord >= 24) {
12611261
// This is a heuristic/hack to detect memprof::MemProfVersion0,
12621262
// which does not have a version field in the header.
@@ -1286,6 +1286,18 @@ Error IndexedInstrProfReader::readHeader() {
12861286
const uint64_t FrameTableOffset =
12871287
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
12881288

1289+
// The offset in the stream right before invoking
1290+
// CallStackTableGenerator.Emit.
1291+
uint64_t CallStackPayloadOffset = 0;
1292+
// The value returned from CallStackTableGenerator.Emit.
1293+
uint64_t CallStackTableOffset = 0;
1294+
if (Version >= memprof::Version2) {
1295+
CallStackPayloadOffset =
1296+
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1297+
CallStackTableOffset =
1298+
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1299+
}
1300+
12891301
// Read the schema.
12901302
auto SchemaOr = memprof::readMemProfSchema(Ptr);
12911303
if (!SchemaOr)
@@ -1296,20 +1308,30 @@ Error IndexedInstrProfReader::readHeader() {
12961308
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
12971309
/*Buckets=*/Start + RecordTableOffset,
12981310
/*Payload=*/Ptr,
1299-
/*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema)));
1311+
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
13001312

13011313
// Initialize the frame table reader with the payload and bucket offsets.
13021314
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
13031315
/*Buckets=*/Start + FrameTableOffset,
13041316
/*Payload=*/Start + FramePayloadOffset,
13051317
/*Base=*/Start, memprof::FrameLookupTrait()));
13061318

1319+
if (Version >= memprof::Version2)
1320+
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
1321+
/*Buckets=*/Start + CallStackTableOffset,
1322+
/*Payload=*/Start + CallStackPayloadOffset,
1323+
/*Base=*/Start, memprof::CallStackLookupTrait()));
1324+
13071325
#ifdef EXPENSIVE_CHECKS
13081326
// Go through all the records and verify that CSId has been correctly
13091327
// populated. Do this only under EXPENSIVE_CHECKS. Otherwise, we
13101328
// would defeat the purpose of OnDiskIterableChainedHashTable.
1311-
for (const auto &Record : MemProfRecordTable->data())
1312-
verifyIndexedMemProfRecord(Record);
1329+
// Note that we can compare CSId against actual call stacks only for
1330+
// Version0 and Version1 because IndexedAllocationInfo::CallStack and
1331+
// IndexedMemProfRecord::CallSites are not populated in Version2.
1332+
if (Version <= memprof::Version1)
1333+
for (const auto &Record : MemProfRecordTable->data())
1334+
verifyIndexedMemProfRecord(Record);
13131335
#endif
13141336
}
13151337

@@ -1502,14 +1524,44 @@ IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
15021524
return *FrIter;
15031525
};
15041526

1505-
memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
1527+
// Setup a callback to convert call stack ids to call stacks using the on-disk
1528+
// hash table.
1529+
std::optional<memprof::CallStackId> LastUnmappedCSId;
1530+
auto CSIdToCallStackCallback = [&](memprof::CallStackId CSId) {
1531+
llvm::SmallVector<memprof::Frame> Frames;
1532+
auto CSIter = MemProfCallStackTable->find(CSId);
1533+
if (CSIter == MemProfCallStackTable->end()) {
1534+
LastUnmappedCSId = CSId;
1535+
} else {
1536+
const llvm::SmallVector<memprof::FrameId> &CS = *CSIter;
1537+
Frames.reserve(CS.size());
1538+
for (memprof::FrameId Id : CS)
1539+
Frames.push_back(IdToFrameCallback(Id));
1540+
}
1541+
return Frames;
1542+
};
1543+
1544+
const memprof::IndexedMemProfRecord IndexedRecord = *Iter;
1545+
memprof::MemProfRecord Record;
1546+
if (MemProfCallStackTable)
1547+
Record = IndexedRecord.toMemProfRecord(CSIdToCallStackCallback);
1548+
else
1549+
Record = memprof::MemProfRecord(IndexedRecord, IdToFrameCallback);
15061550

15071551
// Check that all frame ids were successfully converted to frames.
15081552
if (LastUnmappedFrameId) {
15091553
return make_error<InstrProfError>(instrprof_error::hash_mismatch,
15101554
"memprof frame not found for frame id " +
15111555
Twine(*LastUnmappedFrameId));
15121556
}
1557+
1558+
// Check that all call stack ids were successfully converted to call stacks.
1559+
if (LastUnmappedCSId) {
1560+
return make_error<InstrProfError>(
1561+
instrprof_error::hash_mismatch,
1562+
"memprof call stack not found for call stack id " +
1563+
Twine(*LastUnmappedCSId));
1564+
}
15131565
return Record;
15141566
}
15151567

0 commit comments

Comments
 (0)