Skip to content

Commit 4c8ec8f

Browse files
[memprof] Reduce schema for Version2 (#89876)
Curently, the compiler only uses several fields of MemoryInfoBlock. Serializing all fields into the indexed MemProf file simply wastes storage. This patch limits the schema down to four fields for Version2 by default. It retains the old behavior of serializing all fields via: llvm-profdata merge --memprof-version=2 --memprof-full-schema This patch reduces the size of the indexed MemProf profile I have by 40% (1.6GB down to 1.0GB).
1 parent a10d67f commit 4c8ec8f

File tree

6 files changed

+145
-45
lines changed

6 files changed

+145
-45
lines changed

llvm/include/llvm/ProfileData/InstrProfWriter.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,15 @@ class InstrProfWriter {
8585
// The MemProf version we should write.
8686
memprof::IndexedVersion MemProfVersionRequested;
8787

88+
// Whether to serialize the full schema.
89+
bool MemProfFullSchema;
90+
8891
public:
8992
InstrProfWriter(
9093
bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
9194
uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false,
92-
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0);
95+
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0,
96+
bool MemProfFullSchema = false);
9397
~InstrProfWriter();
9498

9599
StringMap<ProfilingData> &getProfileData() { return FunctionData; }
@@ -203,6 +207,7 @@ class InstrProfWriter {
203207
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
204208
MemProfVersionRequested = Version;
205209
}
210+
void setMemProfFullSchema(bool Full) { MemProfFullSchema = Full; }
206211
// Compute the overlap b/w this object and Other. Program level result is
207212
// stored in Overlap and function level result is stored in FuncLevelOverlap.
208213
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,21 @@ struct PortableMemInfoBlock {
117117
void clear() { *this = PortableMemInfoBlock(); }
118118

119119
// Returns the full schema currently in use.
120-
static MemProfSchema getSchema() {
120+
static MemProfSchema getFullSchema() {
121121
MemProfSchema List;
122122
#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
123123
#include "llvm/ProfileData/MIBEntryDef.inc"
124124
#undef MIBEntryDef
125125
return List;
126126
}
127127

128+
// Returns the schema consisting of the fields currently consumed by the
129+
// compiler.
130+
static MemProfSchema getHotColdSchema() {
131+
return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
132+
Meta::TotalLifetimeAccessDensity};
133+
}
134+
128135
bool operator==(const PortableMemInfoBlock &Other) const {
129136
#define MIBEntryDef(NameTag, Name, Type) \
130137
if (Other.get##Name() != get##Name()) \

llvm/lib/ProfileData/InstrProfWriter.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,13 @@ class InstrProfRecordWriterTrait {
184184
InstrProfWriter::InstrProfWriter(
185185
bool Sparse, uint64_t TemporalProfTraceReservoirSize,
186186
uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
187-
memprof::IndexedVersion MemProfVersionRequested)
187+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
188188
: Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
189189
TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
190190
InfoObj(new InstrProfRecordWriterTrait()),
191191
WritePrevVersion(WritePrevVersion),
192-
MemProfVersionRequested(MemProfVersionRequested) {}
192+
MemProfVersionRequested(MemProfVersionRequested),
193+
MemProfFullSchema(MemProfFullSchema) {}
193194

194195
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
195196

@@ -507,7 +508,7 @@ static Error writeMemProfV0(
507508
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
508509
OS.write(0ULL); // Reserve space for the memprof frame table offset.
509510

510-
auto Schema = memprof::PortableMemInfoBlock::getSchema();
511+
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
511512
writeMemProfSchema(OS, Schema);
512513

513514
uint64_t RecordTableOffset =
@@ -533,7 +534,7 @@ static Error writeMemProfV1(
533534
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
534535
OS.write(0ULL); // Reserve space for the memprof frame table offset.
535536

536-
auto Schema = memprof::PortableMemInfoBlock::getSchema();
537+
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
537538
writeMemProfSchema(OS, Schema);
538539

539540
uint64_t RecordTableOffset =
@@ -554,7 +555,8 @@ static Error writeMemProfV2(
554555
&MemProfRecordData,
555556
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
556557
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
557-
&MemProfCallStackData) {
558+
&MemProfCallStackData,
559+
bool MemProfFullSchema) {
558560
OS.write(memprof::Version2);
559561
uint64_t HeaderUpdatePos = OS.tell();
560562
OS.write(0ULL); // Reserve space for the memprof record table offset.
@@ -563,7 +565,9 @@ static Error writeMemProfV2(
563565
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
564566
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
565567

566-
auto Schema = memprof::PortableMemInfoBlock::getSchema();
568+
auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema();
569+
if (MemProfFullSchema)
570+
Schema = memprof::PortableMemInfoBlock::getFullSchema();
567571
writeMemProfSchema(OS, Schema);
568572

569573
uint64_t RecordTableOffset =
@@ -605,7 +609,7 @@ static Error writeMemProf(
605609
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
606610
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
607611
&MemProfCallStackData,
608-
memprof::IndexedVersion MemProfVersionRequested) {
612+
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) {
609613

610614
switch (MemProfVersionRequested) {
611615
case memprof::Version0:
@@ -614,7 +618,7 @@ static Error writeMemProf(
614618
return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
615619
case memprof::Version2:
616620
return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
617-
MemProfCallStackData);
621+
MemProfCallStackData, MemProfFullSchema);
618622
}
619623

620624
return make_error<InstrProfError>(
@@ -733,7 +737,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
733737
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
734738
MemProfSectionStart = OS.tell();
735739
if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
736-
MemProfCallStackData, MemProfVersionRequested))
740+
MemProfCallStackData, MemProfVersionRequested,
741+
MemProfFullSchema))
737742
return E;
738743
}
739744

llvm/test/tools/llvm-profdata/memprof-merge-v0.test

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
1616
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
1717
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
1818

19+
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
20+
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
21+
1922
For now we only check the validity of the instrumented profile since we don't
2023
have a way to display the contents of the memprof indexed format yet.
2124

llvm/tools/llvm-profdata/llvm-profdata.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,10 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
308308
clEnumValN(memprof::Version1, "1", "version 1"),
309309
clEnumValN(memprof::Version2, "2", "version 2")));
310310

311+
cl::opt<bool> MemProfFullSchema(
312+
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
313+
cl::desc("Use the full schema for serialization"), cl::init(false));
314+
311315
// Options specific to overlap subcommand.
312316
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
313317
cl::desc("<base profile file>"),
@@ -600,7 +604,7 @@ struct WriterContext {
600604
SmallSet<instrprof_error, 4> &WriterErrorCodes,
601605
uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
602606
: Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
603-
MemProfVersionRequested),
607+
MemProfVersionRequested, MemProfFullSchema),
604608
ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
605609
};
606610

llvm/unittests/ProfileData/InstrProfTest.cpp

Lines changed: 109 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,31 @@ static CallStackIdMapTy getCallStackMapping() {
370370
return Mapping;
371371
}
372372

373+
// Populate all of the fields of MIB.
374+
MemInfoBlock makeFullMIB() {
375+
MemInfoBlock MIB;
376+
#define MIBEntryDef(NameTag, Name, Type) MIB.NameTag;
377+
#include "llvm/ProfileData/MIBEntryDef.inc"
378+
#undef MIBEntryDef
379+
return MIB;
380+
}
381+
382+
// Populate those fields returned by getHotColdSchema.
383+
MemInfoBlock makePartialMIB() {
384+
MemInfoBlock MIB;
385+
MIB.AllocCount = 1;
386+
MIB.TotalSize = 5;
387+
MIB.TotalLifetime = 10;
388+
MIB.TotalLifetimeAccessDensity = 23;
389+
return MIB;
390+
}
391+
373392
IndexedMemProfRecord makeRecord(
374393
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
375394
AllocFrames,
376395
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
377396
CallSiteFrames,
378-
const MemInfoBlock &Block = MemInfoBlock()) {
397+
const MemInfoBlock &Block = makeFullMIB()) {
379398
llvm::memprof::IndexedMemProfRecord MR;
380399
for (const auto &Frames : AllocFrames)
381400
MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
@@ -388,7 +407,7 @@ IndexedMemProfRecord makeRecord(
388407
IndexedMemProfRecord
389408
makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
390409
std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
391-
const MemInfoBlock &Block = MemInfoBlock()) {
410+
const MemInfoBlock &Block) {
392411
llvm::memprof::IndexedMemProfRecord MR;
393412
for (const auto &CSId : AllocFrames)
394413
// We don't populate IndexedAllocationInfo::CallStack because we use it only
@@ -476,15 +495,56 @@ TEST_F(InstrProfTest, test_memprof_v0) {
476495
EXPECT_THAT(WantRecord, EqualsRecord(Record));
477496
}
478497

479-
TEST_F(InstrProfTest, test_memprof_v2) {
498+
struct CallStackIdConverter {
499+
std::optional<memprof::FrameId> LastUnmappedFrameId;
500+
std::optional<memprof::CallStackId> LastUnmappedCSId;
501+
502+
const FrameIdMapTy &IdToFrameMap;
503+
const CallStackIdMapTy &CSIdToCallStackMap;
504+
505+
CallStackIdConverter() = delete;
506+
CallStackIdConverter(const FrameIdMapTy &IdToFrameMap,
507+
const CallStackIdMapTy &CSIdToCallStackMap)
508+
: IdToFrameMap(IdToFrameMap), CSIdToCallStackMap(CSIdToCallStackMap) {}
509+
510+
llvm::SmallVector<memprof::Frame>
511+
operator()(::llvm::memprof::CallStackId CSId) {
512+
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
513+
auto Iter = IdToFrameMap.find(Id);
514+
if (Iter == IdToFrameMap.end()) {
515+
LastUnmappedFrameId = Id;
516+
return memprof::Frame(0, 0, 0, false);
517+
}
518+
return Iter->second;
519+
};
520+
521+
llvm::SmallVector<memprof::Frame> Frames;
522+
auto CSIter = CSIdToCallStackMap.find(CSId);
523+
if (CSIter == CSIdToCallStackMap.end()) {
524+
LastUnmappedCSId = CSId;
525+
} else {
526+
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
527+
CSIter->getSecond();
528+
Frames.reserve(CS.size());
529+
for (::llvm::memprof::FrameId Id : CS)
530+
Frames.push_back(IdToFrameCallback(Id));
531+
}
532+
return Frames;
533+
}
534+
};
535+
536+
TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
537+
const MemInfoBlock MIB = makeFullMIB();
538+
480539
Writer.setMemProfVersionRequested(memprof::Version2);
540+
Writer.setMemProfFullSchema(true);
481541

482542
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
483543
Succeeded());
484544

485545
const IndexedMemProfRecord IndexedMR = makeRecordV2(
486546
/*AllocFrames=*/{0x111, 0x222},
487-
/*CallSiteFrames=*/{0x333});
547+
/*CallSiteFrames=*/{0x333}, MIB);
488548
const FrameIdMapTy IdToFrameMap = getFrameMapping();
489549
const auto CSIdToCallStackMap = getCallStackMapping();
490550
for (const auto &I : IdToFrameMap) {
@@ -502,38 +562,54 @@ TEST_F(InstrProfTest, test_memprof_v2) {
502562
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
503563
const memprof::MemProfRecord &Record = RecordOr.get();
504564

505-
std::optional<memprof::FrameId> LastUnmappedFrameId;
506-
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
507-
auto Iter = IdToFrameMap.find(Id);
508-
if (Iter == IdToFrameMap.end()) {
509-
LastUnmappedFrameId = Id;
510-
return memprof::Frame(0, 0, 0, false);
511-
}
512-
return Iter->second;
513-
};
565+
CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
514566

515-
std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
516-
auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
517-
llvm::SmallVector<memprof::Frame> Frames;
518-
auto CSIter = CSIdToCallStackMap.find(CSId);
519-
if (CSIter == CSIdToCallStackMap.end()) {
520-
LastUnmappedCSId = CSId;
521-
} else {
522-
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
523-
CSIter->getSecond();
524-
Frames.reserve(CS.size());
525-
for (::llvm::memprof::FrameId Id : CS)
526-
Frames.push_back(IdToFrameCallback(Id));
527-
}
528-
return Frames;
529-
};
567+
const ::llvm::memprof::MemProfRecord WantRecord =
568+
IndexedMR.toMemProfRecord(CSIdConv);
569+
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
570+
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
571+
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
572+
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
573+
EXPECT_THAT(WantRecord, EqualsRecord(Record));
574+
}
575+
576+
TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
577+
const MemInfoBlock MIB = makePartialMIB();
578+
579+
Writer.setMemProfVersionRequested(memprof::Version2);
580+
Writer.setMemProfFullSchema(false);
581+
582+
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
583+
Succeeded());
584+
585+
const IndexedMemProfRecord IndexedMR = makeRecordV2(
586+
/*AllocFrames=*/{0x111, 0x222},
587+
/*CallSiteFrames=*/{0x333}, MIB);
588+
const FrameIdMapTy IdToFrameMap = getFrameMapping();
589+
const auto CSIdToCallStackMap = getCallStackMapping();
590+
for (const auto &I : IdToFrameMap) {
591+
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
592+
}
593+
for (const auto &I : CSIdToCallStackMap) {
594+
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
595+
}
596+
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
597+
598+
auto Profile = Writer.writeBuffer();
599+
readProfile(std::move(Profile));
600+
601+
auto RecordOr = Reader->getMemProfRecord(0x9999);
602+
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
603+
const memprof::MemProfRecord &Record = RecordOr.get();
604+
605+
CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);
530606

531607
const ::llvm::memprof::MemProfRecord WantRecord =
532-
IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
533-
ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
534-
<< "could not map frame id: " << *LastUnmappedFrameId;
535-
ASSERT_EQ(LastUnmappedCSId, std::nullopt)
536-
<< "could not map call stack id: " << *LastUnmappedCSId;
608+
IndexedMR.toMemProfRecord(CSIdConv);
609+
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
610+
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
611+
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
612+
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
537613
EXPECT_THAT(WantRecord, EqualsRecord(Record));
538614
}
539615

0 commit comments

Comments
 (0)