Skip to content

[memprof] Reduce schema for Version2 #89876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,15 @@ class InstrProfWriter {
// The MemProf version we should write.
memprof::IndexedVersion MemProfVersionRequested;

// Whether to serialize the full schema.
bool MemProfFullSchema;

public:
InstrProfWriter(
bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false,
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0);
memprof::IndexedVersion MemProfVersionRequested = memprof::Version0,
bool MemProfFullSchema = false);
~InstrProfWriter();

StringMap<ProfilingData> &getProfileData() { return FunctionData; }
Expand Down Expand Up @@ -203,6 +207,7 @@ class InstrProfWriter {
void setMemProfVersionRequested(memprof::IndexedVersion Version) {
MemProfVersionRequested = Version;
}
void setMemProfFullSchema(bool Full) { MemProfFullSchema = Full; }
// Compute the overlap b/w this object and Other. Program level result is
// stored in Overlap and function level result is stored in FuncLevelOverlap.
void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
Expand Down
9 changes: 8 additions & 1 deletion llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,21 @@ struct PortableMemInfoBlock {
void clear() { *this = PortableMemInfoBlock(); }

// Returns the full schema currently in use.
static MemProfSchema getSchema() {
static MemProfSchema getFullSchema() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason that these are part of the PortableMemInfoBlock? IMO they should be peers of the Schema itself (under llvm::memprof).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll post a separate patch for that.

MemProfSchema List;
#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
return List;
}

// Returns the schema consisting of the fields currently consumed by the
// compiler.
static MemProfSchema getHotColdSchema() {
return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
Meta::TotalLifetimeAccessDensity};
}

bool operator==(const PortableMemInfoBlock &Other) const {
#define MIBEntryDef(NameTag, Name, Type) \
if (Other.get##Name() != get##Name()) \
Expand Down
23 changes: 14 additions & 9 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,13 @@ class InstrProfRecordWriterTrait {
InstrProfWriter::InstrProfWriter(
bool Sparse, uint64_t TemporalProfTraceReservoirSize,
uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
memprof::IndexedVersion MemProfVersionRequested)
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
: Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
InfoObj(new InstrProfRecordWriterTrait()),
WritePrevVersion(WritePrevVersion),
MemProfVersionRequested(MemProfVersionRequested) {}
MemProfVersionRequested(MemProfVersionRequested),
MemProfFullSchema(MemProfFullSchema) {}

InstrProfWriter::~InstrProfWriter() { delete InfoObj; }

Expand Down Expand Up @@ -507,7 +508,7 @@ static Error writeMemProfV0(
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
OS.write(0ULL); // Reserve space for the memprof frame table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand All @@ -533,7 +534,7 @@ static Error writeMemProfV1(
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
OS.write(0ULL); // Reserve space for the memprof frame table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand All @@ -554,7 +555,8 @@ static Error writeMemProfV2(
&MemProfRecordData,
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData) {
&MemProfCallStackData,
bool MemProfFullSchema) {
OS.write(memprof::Version2);
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof record table offset.
Expand All @@ -563,7 +565,9 @@ static Error writeMemProfV2(
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof call stack table offset.

auto Schema = memprof::PortableMemInfoBlock::getSchema();
auto Schema = memprof::PortableMemInfoBlock::getHotColdSchema();
if (MemProfFullSchema)
Schema = memprof::PortableMemInfoBlock::getFullSchema();
writeMemProfSchema(OS, Schema);

uint64_t RecordTableOffset =
Expand Down Expand Up @@ -605,7 +609,7 @@ static Error writeMemProf(
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
&MemProfCallStackData,
memprof::IndexedVersion MemProfVersionRequested) {
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) {

switch (MemProfVersionRequested) {
case memprof::Version0:
Expand All @@ -614,7 +618,7 @@ static Error writeMemProf(
return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData);
case memprof::Version2:
return writeMemProfV2(OS, MemProfRecordData, MemProfFrameData,
MemProfCallStackData);
MemProfCallStackData, MemProfFullSchema);
}

return make_error<InstrProfError>(
Expand Down Expand Up @@ -733,7 +737,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData,
MemProfCallStackData, MemProfVersionRequested))
MemProfCallStackData, MemProfVersionRequested,
MemProfFullSchema))
return E;
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-profdata/memprof-merge-v0.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ RUN: llvm-profdata show %t.prof.v1 | FileCheck %s
RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s

RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2
RUN: llvm-profdata show %t.prof.v2 | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to check the output to confirm that we either serialized 4 fields or all fields?

Also, just noticed that this test has "v0" in the name, should the name be changed to reflect the fact that it is testing all versions? (not in this PR but separately)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any way to check the output to confirm that we either serialized 4 fields or all fields?

Yes, let me try to extend llvm/unittests/ProfileData/InstrProfTest.cpp to verify that the roundtrip works in each case -- the 4 fields or all fields.

Also, just noticed that this test has "v0" in the name, should the name be changed to reflect the fact that it is testing all versions? (not in this PR but separately)

Yes, I just borrowed a test memprof-merge.test and added -v0 to it. I'll come up with a better name in a separate patch.


For now we only check the validity of the instrumented profile since we don't
have a way to display the contents of the memprof indexed format yet.

Expand Down
6 changes: 5 additions & 1 deletion llvm/tools/llvm-profdata/llvm-profdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
clEnumValN(memprof::Version1, "1", "version 1"),
clEnumValN(memprof::Version2, "2", "version 2")));

cl::opt<bool> MemProfFullSchema(
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
cl::desc("Use the full schema for serialization"), cl::init(false));

// Options specific to overlap subcommand.
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
cl::desc("<base profile file>"),
Expand Down Expand Up @@ -600,7 +604,7 @@ struct WriterContext {
SmallSet<instrprof_error, 4> &WriterErrorCodes,
uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
: Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
MemProfVersionRequested),
MemProfVersionRequested, MemProfFullSchema),
ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
};

Expand Down
142 changes: 109 additions & 33 deletions llvm/unittests/ProfileData/InstrProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,12 +370,31 @@ static CallStackIdMapTy getCallStackMapping() {
return Mapping;
}

// Populate all of the fields of MIB.
MemInfoBlock makeFullMIB() {
MemInfoBlock MIB;
#define MIBEntryDef(NameTag, Name, Type) MIB.NameTag;
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
return MIB;
}

// Populate those fields returned by getHotColdSchema.
MemInfoBlock makePartialMIB() {
MemInfoBlock MIB;
MIB.AllocCount = 1;
MIB.TotalSize = 5;
MIB.TotalLifetime = 10;
MIB.TotalLifetimeAccessDensity = 23;
return MIB;
}

IndexedMemProfRecord makeRecord(
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
AllocFrames,
std::initializer_list<std::initializer_list<::llvm::memprof::FrameId>>
CallSiteFrames,
const MemInfoBlock &Block = MemInfoBlock()) {
const MemInfoBlock &Block = makeFullMIB()) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &Frames : AllocFrames)
MR.AllocSites.emplace_back(Frames, llvm::memprof::hashCallStack(Frames),
Expand All @@ -388,7 +407,7 @@ IndexedMemProfRecord makeRecord(
IndexedMemProfRecord
makeRecordV2(std::initializer_list<::llvm::memprof::CallStackId> AllocFrames,
std::initializer_list<::llvm::memprof::CallStackId> CallSiteFrames,
const MemInfoBlock &Block = MemInfoBlock()) {
const MemInfoBlock &Block) {
llvm::memprof::IndexedMemProfRecord MR;
for (const auto &CSId : AllocFrames)
// We don't populate IndexedAllocationInfo::CallStack because we use it only
Expand Down Expand Up @@ -476,15 +495,56 @@ TEST_F(InstrProfTest, test_memprof_v0) {
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

TEST_F(InstrProfTest, test_memprof_v2) {
struct CallStackIdConverter {
std::optional<memprof::FrameId> LastUnmappedFrameId;
std::optional<memprof::CallStackId> LastUnmappedCSId;

const FrameIdMapTy &IdToFrameMap;
const CallStackIdMapTy &CSIdToCallStackMap;

CallStackIdConverter() = delete;
CallStackIdConverter(const FrameIdMapTy &IdToFrameMap,
const CallStackIdMapTy &CSIdToCallStackMap)
: IdToFrameMap(IdToFrameMap), CSIdToCallStackMap(CSIdToCallStackMap) {}

llvm::SmallVector<memprof::Frame>
operator()(::llvm::memprof::CallStackId CSId) {
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
auto Iter = IdToFrameMap.find(Id);
if (Iter == IdToFrameMap.end()) {
LastUnmappedFrameId = Id;
return memprof::Frame(0, 0, 0, false);
}
return Iter->second;
};

llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = CSIdToCallStackMap.find(CSId);
if (CSIter == CSIdToCallStackMap.end()) {
LastUnmappedCSId = CSId;
} else {
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
CSIter->getSecond();
Frames.reserve(CS.size());
for (::llvm::memprof::FrameId Id : CS)
Frames.push_back(IdToFrameCallback(Id));
}
return Frames;
}
};

TEST_F(InstrProfTest, test_memprof_v2_full_schema) {
const MemInfoBlock MIB = makeFullMIB();

Writer.setMemProfVersionRequested(memprof::Version2);
Writer.setMemProfFullSchema(true);

ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333});
/*CallSiteFrames=*/{0x333}, MIB);
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Expand All @@ -502,38 +562,54 @@ TEST_F(InstrProfTest, test_memprof_v2) {
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

std::optional<memprof::FrameId> LastUnmappedFrameId;
auto IdToFrameCallback = [&](const memprof::FrameId Id) {
auto Iter = IdToFrameMap.find(Id);
if (Iter == IdToFrameMap.end()) {
LastUnmappedFrameId = Id;
return memprof::Frame(0, 0, 0, false);
}
return Iter->second;
};
CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);

std::optional<::llvm::memprof::CallStackId> LastUnmappedCSId;
auto CSIdToCallStackCallback = [&](::llvm::memprof::CallStackId CSId) {
llvm::SmallVector<memprof::Frame> Frames;
auto CSIter = CSIdToCallStackMap.find(CSId);
if (CSIter == CSIdToCallStackMap.end()) {
LastUnmappedCSId = CSId;
} else {
const ::llvm::SmallVector<::llvm::memprof::FrameId> &CS =
CSIter->getSecond();
Frames.reserve(CS.size());
for (::llvm::memprof::FrameId Id : CS)
Frames.push_back(IdToFrameCallback(Id));
}
return Frames;
};
const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdConv);
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
const MemInfoBlock MIB = makePartialMIB();

Writer.setMemProfVersionRequested(memprof::Version2);
Writer.setMemProfFullSchema(false);

ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());

const IndexedMemProfRecord IndexedMR = makeRecordV2(
/*AllocFrames=*/{0x111, 0x222},
/*CallSiteFrames=*/{0x333}, MIB);
const FrameIdMapTy IdToFrameMap = getFrameMapping();
const auto CSIdToCallStackMap = getCallStackMapping();
for (const auto &I : IdToFrameMap) {
Writer.addMemProfFrame(I.first, I.getSecond(), Err);
}
for (const auto &I : CSIdToCallStackMap) {
Writer.addMemProfCallStack(I.first, I.getSecond(), Err);
}
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);

auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));

auto RecordOr = Reader->getMemProfRecord(0x9999);
ASSERT_THAT_ERROR(RecordOr.takeError(), Succeeded());
const memprof::MemProfRecord &Record = RecordOr.get();

CallStackIdConverter CSIdConv(IdToFrameMap, CSIdToCallStackMap);

const ::llvm::memprof::MemProfRecord WantRecord =
IndexedMR.toMemProfRecord(CSIdToCallStackCallback);
ASSERT_EQ(LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *LastUnmappedFrameId;
ASSERT_EQ(LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *LastUnmappedCSId;
IndexedMR.toMemProfRecord(CSIdConv);
ASSERT_EQ(CSIdConv.LastUnmappedFrameId, std::nullopt)
<< "could not map frame id: " << *CSIdConv.LastUnmappedFrameId;
ASSERT_EQ(CSIdConv.LastUnmappedCSId, std::nullopt)
<< "could not map call stack id: " << *CSIdConv.LastUnmappedCSId;
EXPECT_THAT(WantRecord, EqualsRecord(Record));
}

Expand Down