Skip to content

Commit c00e532

Browse files
[memprof] Add YAML-based deserialization for MemProf profile (#117829)
This patch adds YAML-based deserialization for MemProf profile. It's been painful to write tests for MemProf passes because we do not have a text format for the MemProf profile. We would write a test case in C++, run it for a binary MemProf profile, and then finally run a test written in LLVM IR with the binary profile. This patch paves the way toward YAML-based MemProf profile. Specifically, it adds new class YAMLMemProfReader derived from MemProfReader. For now, it only adds a function to parse StringRef pointing to YAML data. Subseqeunt patches will wire it to llvm-profdata and read from a file. The field names are based on various printYAML functions in MemProf.h. I'm not aiming for compatibility with the format used in printYAML, but I don't see a point in changing the field names.
1 parent 50dfb07 commit c00e532

File tree

4 files changed

+225
-0
lines changed

4 files changed

+225
-0
lines changed

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#include <optional>
2020

2121
namespace llvm {
22+
namespace yaml {
23+
template <typename T> struct CustomMappingTraits;
24+
} // namespace yaml
25+
2226
namespace memprof {
2327

2428
struct MemProfRecord;
@@ -193,6 +197,9 @@ struct PortableMemInfoBlock {
193197
return Result;
194198
}
195199

200+
// Give YAML access to the individual MIB fields.
201+
friend struct yaml::CustomMappingTraits<memprof::PortableMemInfoBlock>;
202+
196203
private:
197204
// The set of available fields, indexed by Meta::Name.
198205
std::bitset<llvm::to_underlying(Meta::Size)> Schema;
@@ -362,6 +369,8 @@ struct IndexedAllocationInfo {
362369
IndexedAllocationInfo(CallStackId CSId, const MemInfoBlock &MB,
363370
const MemProfSchema &Schema = getFullSchema())
364371
: CSId(CSId), Info(MB, Schema) {}
372+
IndexedAllocationInfo(CallStackId CSId, const PortableMemInfoBlock &MB)
373+
: CSId(CSId), Info(MB) {}
365374

366375
// Returns the size in bytes when this allocation info struct is serialized.
367376
size_t serializedSize(const MemProfSchema &Schema,
@@ -498,6 +507,19 @@ struct MemProfRecord {
498507
}
499508
};
500509

510+
// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields
511+
// within MemProfRecord at the same level as if the GUID were part of
512+
// MemProfRecord.
513+
struct GUIDMemProfRecordPair {
514+
GlobalValue::GUID GUID;
515+
MemProfRecord Record;
516+
};
517+
518+
// The top-level data structure, only used with YAML for now.
519+
struct AllMemProfData {
520+
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
521+
};
522+
501523
// Reads a memprof schema from a buffer. All entries in the buffer are
502524
// interpreted as uint64_t. The first entry in the buffer denotes the number of
503525
// ids in the schema. Subsequent entries are integers which map to memprof::Meta

llvm/include/llvm/ProfileData/MemProfReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ class RawMemProfReader final : public MemProfReader {
209209
// A mapping of the hash to symbol name, only used if KeepSymbolName is true.
210210
llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
211211
};
212+
213+
class YAMLMemProfReader final : public MemProfReader {
214+
public:
215+
YAMLMemProfReader() = default;
216+
void parse(StringRef YAMLData);
217+
};
212218
} // namespace memprof
213219
} // namespace llvm
214220

llvm/lib/ProfileData/MemProfReader.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,85 @@
4040
#include "llvm/Support/Path.h"
4141

4242
#define DEBUG_TYPE "memprof"
43+
44+
namespace llvm {
45+
namespace yaml {
46+
template <> struct MappingTraits<memprof::Frame> {
47+
static void mapping(IO &Io, memprof::Frame &F) {
48+
Io.mapRequired("Function", F.Function);
49+
Io.mapRequired("LineOffset", F.LineOffset);
50+
Io.mapRequired("Column", F.Column);
51+
Io.mapRequired("Inline", F.IsInlineFrame);
52+
53+
// Assert that the definition of Frame matches what we expect. The
54+
// structured bindings below detect changes to the number of fields.
55+
// static_assert checks the type of each field.
56+
const auto &[Function, SymbolName, LineOffset, Column, IsInlineFrame] = F;
57+
static_assert(
58+
std::is_same_v<remove_cvref_t<decltype(Function)>, GlobalValue::GUID>);
59+
static_assert(std::is_same_v<remove_cvref_t<decltype(SymbolName)>,
60+
std::unique_ptr<std::string>>);
61+
static_assert(
62+
std::is_same_v<remove_cvref_t<decltype(LineOffset)>, uint32_t>);
63+
static_assert(std::is_same_v<remove_cvref_t<decltype(Column)>, uint32_t>);
64+
static_assert(
65+
std::is_same_v<remove_cvref_t<decltype(IsInlineFrame)>, bool>);
66+
}
67+
};
68+
69+
template <> struct CustomMappingTraits<memprof::PortableMemInfoBlock> {
70+
static void inputOne(IO &Io, StringRef KeyStr,
71+
memprof::PortableMemInfoBlock &MIB) {
72+
// PortableMemInfoBlock keeps track of the set of fields that actually have
73+
// values. We update the set here as we receive a key-value pair from the
74+
// YAML document.
75+
#define MIBEntryDef(NameTag, Name, Type) \
76+
if (KeyStr == #Name) { \
77+
Io.mapRequired(KeyStr.str().c_str(), MIB.Name); \
78+
MIB.Schema.set(llvm::to_underlying(memprof::Meta::Name)); \
79+
return; \
80+
}
81+
#include "llvm/ProfileData/MIBEntryDef.inc"
82+
#undef MIBEntryDef
83+
Io.setError("Key is not a valid validation event");
84+
}
85+
86+
static void output(IO &Io, memprof::PortableMemInfoBlock &VI) {
87+
llvm_unreachable("To be implemented");
88+
}
89+
};
90+
91+
template <> struct MappingTraits<memprof::AllocationInfo> {
92+
static void mapping(IO &Io, memprof::AllocationInfo &AI) {
93+
Io.mapRequired("Callstack", AI.CallStack);
94+
Io.mapRequired("MemInfoBlock", AI.Info);
95+
}
96+
};
97+
98+
// In YAML, we use GUIDMemProfRecordPair instead of MemProfRecord so that we can
99+
// treat the GUID and the fields within MemProfRecord at the same level as if
100+
// the GUID were part of MemProfRecord.
101+
template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
102+
static void mapping(IO &Io, memprof::GUIDMemProfRecordPair &Pair) {
103+
Io.mapRequired("GUID", Pair.GUID);
104+
Io.mapRequired("AllocSites", Pair.Record.AllocSites);
105+
Io.mapRequired("CallSites", Pair.Record.CallSites);
106+
}
107+
};
108+
109+
template <> struct MappingTraits<memprof::AllMemProfData> {
110+
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
111+
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
112+
}
113+
};
114+
} // namespace yaml
115+
} // namespace llvm
116+
117+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::Frame)
118+
LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
119+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
120+
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
121+
43122
namespace llvm {
44123
namespace memprof {
45124
namespace {
@@ -756,5 +835,46 @@ Error RawMemProfReader::readNextRecord(
756835
};
757836
return MemProfReader::readNextRecord(GuidRecord, IdToFrameCallback);
758837
}
838+
839+
void YAMLMemProfReader::parse(StringRef YAMLData) {
840+
memprof::AllMemProfData Doc;
841+
yaml::Input Yin(YAMLData);
842+
843+
Yin >> Doc;
844+
if (Yin.error())
845+
return;
846+
847+
// Add a call stack to MemProfData.CallStacks and return its CallStackId.
848+
auto AddCallStack = [&](ArrayRef<Frame> CallStack) -> CallStackId {
849+
SmallVector<FrameId> IndexedCallStack;
850+
IndexedCallStack.reserve(CallStack.size());
851+
for (const Frame &F : CallStack) {
852+
FrameId Id = F.hash();
853+
MemProfData.Frames.try_emplace(Id, F);
854+
IndexedCallStack.push_back(Id);
855+
}
856+
CallStackId CSId = hashCallStack(IndexedCallStack);
857+
MemProfData.CallStacks.try_emplace(CSId, std::move(IndexedCallStack));
858+
return CSId;
859+
};
860+
861+
for (const auto &[GUID, Record] : Doc.HeapProfileRecords) {
862+
IndexedMemProfRecord IndexedRecord;
863+
864+
// Convert AllocationInfo to IndexedAllocationInfo.
865+
for (const AllocationInfo &AI : Record.AllocSites) {
866+
CallStackId CSId = AddCallStack(AI.CallStack);
867+
IndexedRecord.AllocSites.emplace_back(CSId, AI.Info);
868+
}
869+
870+
// Populate CallSiteIds.
871+
for (const auto &CallSite : Record.CallSites) {
872+
CallStackId CSId = AddCallStack(CallSite);
873+
IndexedRecord.CallSiteIds.push_back(CSId);
874+
}
875+
876+
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
877+
}
878+
}
759879
} // namespace memprof
760880
} // namespace llvm

llvm/unittests/ProfileData/MemProfTest.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ using ::llvm::memprof::CallStackId;
3434
using ::llvm::memprof::CallStackMap;
3535
using ::llvm::memprof::Frame;
3636
using ::llvm::memprof::FrameId;
37+
using ::llvm::memprof::hashCallStack;
3738
using ::llvm::memprof::IndexedAllocationInfo;
3839
using ::llvm::memprof::IndexedMemProfRecord;
3940
using ::llvm::memprof::MemInfoBlock;
@@ -46,8 +47,11 @@ using ::llvm::memprof::RawMemProfReader;
4647
using ::llvm::memprof::SegmentEntry;
4748
using ::llvm::object::SectionedAddress;
4849
using ::llvm::symbolize::SymbolizableModule;
50+
using ::testing::ElementsAre;
51+
using ::testing::Pair;
4952
using ::testing::Return;
5053
using ::testing::SizeIs;
54+
using ::testing::UnorderedElementsAre;
5155

5256
class MockSymbolizer : public SymbolizableModule {
5357
public:
@@ -742,4 +746,77 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
742746
EXPECT_THAT(Mappings, testing::Contains(testing::Pair(
743747
llvm::memprof::hashCallStack(CS4), 10U)));
744748
}
749+
750+
// Verify that we can parse YAML and retrieve IndexedMemProfData as expected.
751+
TEST(MemProf, YAMLParser) {
752+
StringRef YAMLData = R"YAML(
753+
---
754+
HeapProfileRecords:
755+
- GUID: 0xdeadbeef12345678
756+
AllocSites:
757+
- Callstack:
758+
- {Function: 0x100, LineOffset: 11, Column: 10, Inline: true}
759+
- {Function: 0x200, LineOffset: 22, Column: 20, Inline: false}
760+
MemInfoBlock:
761+
AllocCount: 777
762+
TotalSize: 888
763+
- Callstack:
764+
- {Function: 0x300, LineOffset: 33, Column: 30, Inline: false}
765+
- {Function: 0x400, LineOffset: 44, Column: 40, Inline: true}
766+
MemInfoBlock:
767+
AllocCount: 666
768+
TotalSize: 555
769+
CallSites:
770+
- - {Function: 0x500, LineOffset: 55, Column: 50, Inline: true}
771+
- {Function: 0x600, LineOffset: 66, Column: 60, Inline: false}
772+
- - {Function: 0x700, LineOffset: 77, Column: 70, Inline: true}
773+
- {Function: 0x800, LineOffset: 88, Column: 80, Inline: false}
774+
)YAML";
775+
776+
llvm::memprof::YAMLMemProfReader YAMLReader;
777+
YAMLReader.parse(YAMLData);
778+
llvm::memprof::IndexedMemProfData MemProfData = YAMLReader.takeMemProfData();
779+
780+
Frame F1(0x100, 11, 10, true);
781+
Frame F2(0x200, 22, 20, false);
782+
Frame F3(0x300, 33, 30, false);
783+
Frame F4(0x400, 44, 40, true);
784+
Frame F5(0x500, 55, 50, true);
785+
Frame F6(0x600, 66, 60, false);
786+
Frame F7(0x700, 77, 70, true);
787+
Frame F8(0x800, 88, 80, false);
788+
789+
llvm::SmallVector<FrameId> CS1 = {F1.hash(), F2.hash()};
790+
llvm::SmallVector<FrameId> CS2 = {F3.hash(), F4.hash()};
791+
llvm::SmallVector<FrameId> CS3 = {F5.hash(), F6.hash()};
792+
llvm::SmallVector<FrameId> CS4 = {F7.hash(), F8.hash()};
793+
794+
// Verify the entire contents of MemProfData.Frames.
795+
EXPECT_THAT(MemProfData.Frames,
796+
UnorderedElementsAre(Pair(F1.hash(), F1), Pair(F2.hash(), F2),
797+
Pair(F3.hash(), F3), Pair(F4.hash(), F4),
798+
Pair(F5.hash(), F5), Pair(F6.hash(), F6),
799+
Pair(F7.hash(), F7), Pair(F8.hash(), F8)));
800+
801+
// Verify the entire contents of MemProfData.Frames.
802+
EXPECT_THAT(MemProfData.CallStacks,
803+
UnorderedElementsAre(Pair(hashCallStack(CS1), CS1),
804+
Pair(hashCallStack(CS2), CS2),
805+
Pair(hashCallStack(CS3), CS3),
806+
Pair(hashCallStack(CS4), CS4)));
807+
808+
// Verify the entire contents of MemProfData.Records.
809+
ASSERT_THAT(MemProfData.Records, SizeIs(1));
810+
const auto &[GUID, Record] = *MemProfData.Records.begin();
811+
EXPECT_EQ(GUID, 0xdeadbeef12345678ULL);
812+
ASSERT_THAT(Record.AllocSites, SizeIs(2));
813+
EXPECT_EQ(Record.AllocSites[0].CSId, hashCallStack(CS1));
814+
EXPECT_EQ(Record.AllocSites[0].Info.getAllocCount(), 777U);
815+
EXPECT_EQ(Record.AllocSites[0].Info.getTotalSize(), 888U);
816+
EXPECT_EQ(Record.AllocSites[1].CSId, hashCallStack(CS2));
817+
EXPECT_EQ(Record.AllocSites[1].Info.getAllocCount(), 666U);
818+
EXPECT_EQ(Record.AllocSites[1].Info.getTotalSize(), 555U);
819+
EXPECT_THAT(Record.CallSiteIds,
820+
ElementsAre(hashCallStack(CS3), hashCallStack(CS4)));
821+
}
745822
} // namespace

0 commit comments

Comments
 (0)