-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[memprof] Use uint32_t for linear call stack IDs #93924
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Use uint32_t for linear call stack IDs #93924
Conversation
This patch switches to uint32_t for linear call stack IDs as uint32_t is sufficient to index into the call stack array.
@llvm/pr-subscribers-pgo Author: Kazu Hirata (kazutakahirata) ChangesThis patch switches to uint32_t for linear call stack IDs as uint32_t Full diff: https://github.com/llvm/llvm-project/pull/93924.diff 1 Files Affected:
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 854b35b6924e1..65eacb136d720 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -45,6 +45,16 @@ static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
return Size;
}
+static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
+ const MemProfSchema &Schema) {
+ size_t Size = 0;
+ // The linear call stack ID.
+ Size += sizeof(uint32_t);
+ // The size of the payload.
+ Size += PortableMemInfoBlock::serializedSize(Schema);
+ return Size;
+}
+
size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
IndexedVersion Version) const {
switch (Version) {
@@ -52,8 +62,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
case Version1:
return serializedSizeV0(*this, Schema);
case Version2:
- case Version3:
return serializedSizeV2(*this, Schema);
+ case Version3:
+ return serializedSizeV3(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
}
@@ -89,6 +100,20 @@ static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
return Result;
}
+static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
+ const MemProfSchema &Schema) {
+ // The number of alloc sites to serialize.
+ size_t Result = sizeof(uint64_t);
+ for (const IndexedAllocationInfo &N : Record.AllocSites)
+ Result += N.serializedSize(Schema, Version3);
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ // The linear call stack ID.
+ Result += Record.CallSiteIds.size() * sizeof(uint32_t);
+ return Result;
+}
+
size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
IndexedVersion Version) const {
switch (Version) {
@@ -96,8 +121,9 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
case Version1:
return serializedSizeV0(*this, Schema);
case Version2:
- case Version3:
return serializedSizeV2(*this, Schema);
+ case Version3:
+ return serializedSizeV3(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
}
@@ -154,7 +180,7 @@ serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
LE.write<uint64_t>(Record.AllocSites.size());
for (const IndexedAllocationInfo &N : Record.AllocSites) {
assert(MemProfCallStackIndexes.contains(N.CSId));
- LE.write<uint64_t>(MemProfCallStackIndexes[N.CSId]);
+ LE.write<uint32_t>(MemProfCallStackIndexes[N.CSId]);
N.Info.serialize(Schema, OS);
}
@@ -162,7 +188,7 @@ serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
LE.write<uint64_t>(Record.CallSiteIds.size());
for (const auto &CSId : Record.CallSiteIds) {
assert(MemProfCallStackIndexes.contains(CSId));
- LE.write<uint64_t>(MemProfCallStackIndexes[CSId]);
+ LE.write<uint32_t>(MemProfCallStackIndexes[CSId]);
}
}
@@ -259,6 +285,37 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
return Record;
}
+static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
+ using namespace support;
+
+ IndexedMemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.AllocSites.reserve(NumNodes);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ IndexedAllocationInfo Node;
+ Node.CSId = endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
+ Node.Info.deserialize(Schema, Ptr);
+ Ptr += PortableMemInfoBlock::serializedSize(Schema);
+ Record.AllocSites.push_back(Node);
+ }
+
+ // Read the callsite information.
+ const uint64_t NumCtxs =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.CallSiteIds.reserve(NumCtxs);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ CallStackId CSId =
+ endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
+ Record.CallSiteIds.push_back(CSId);
+ }
+
+ return Record;
+}
+
IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
@@ -268,8 +325,9 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
case Version1:
return deserializeV0(Schema, Ptr);
case Version2:
- case Version3:
return deserializeV2(Schema, Ptr);
+ case Version3:
+ return deserializeV3(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}
|
llvm/lib/ProfileData/MemProf.cpp
Outdated
endian::readNext<uint64_t, llvm::endianness::little>(Ptr); | ||
Record.CallSiteIds.reserve(NumCtxs); | ||
for (uint64_t J = 0; J < NumCtxs; J++) { | ||
CallStackId CSId = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CallStackId is still a typedef to uint64_t. I guess we can't change this completely while we still have v2 around, but it is a little confusing, and presumably using more memory than needed in the in-memory data structures. Should there be a TODO to switch this to 32-bits?
llvm/lib/ProfileData/MemProf.cpp
Outdated
// The number of callsites we have information for. | ||
Result += sizeof(uint64_t); | ||
// The linear call stack ID. | ||
Result += Record.CallSiteIds.size() * sizeof(uint32_t); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
v2 uses sizeof(CallStackId) which is still 64-bits. Should we create a new typename and use it in these functions with a TODO to remove the old one (see also my comment later on about this)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a new typename in the latest iteration.
I thought about the TODO that you are suggest suggesting, but it's a bit complicated. I think the source of confusion is that I am multi-purposing IndexedAllocationInfo::CSId
and IndexedMemProfRecord::CallSiteIds
:
- During serialization (like
llvm-profdata merge
), we store 64-bit call stack hash values there -- the original meaning ofCallStackId
. - During deserialization (in the compiler proper), we temporarily store 32-bit linear IDs in
CSId
andCallSiteIds
just before constructingMemProfRecord
.
For now, I'm inclined to keeping the way it is, except we use LinearCallStackId
for readability where we don't need to store 64-bit call stack hash values.
I thought about putting true CallStackId
(that is, 64-bit call stack hash values) in these fields during deserialization, replacing a linear ID with CallStackID
as we deserialize -- the opposite of what we do during serialization. The problem is that computation of the 64-bit call stack hash value requires a fully reconstructed call stack consisting of Frame
s. Now, we have a fully reconstructed call stack of Frame
s, why not go all the way to MemProfRecord
? It seems wasteful to reconstruct a call stack, store it away in some hash table, and immediately retrieve it when we construct MemProfRecord
.
This patch switches to uint32_t for linear call stack IDs as uint32_t
is sufficient to index into the call stack array.