[memprof] Use uint32_t for linear call stack IDs #93924

kazutakahirata · 2024-05-31T05:38:34Z

This patch switches to uint32_t for linear call stack IDs as uint32_t
is sufficient to index into the call stack array.

This patch switches to uint32_t for linear call stack IDs as uint32_t is sufficient to index into the call stack array.

llvmbot · 2024-05-31T05:39:04Z

@llvm/pr-subscribers-pgo

Author: Kazu Hirata (kazutakahirata)

Changes

This patch switches to uint32_t for linear call stack IDs as uint32_t
is sufficient to index into the call stack array.

Full diff: https://github.com/llvm/llvm-project/pull/93924.diff

1 Files Affected:

(modified) llvm/lib/ProfileData/MemProf.cpp (+63-5)

diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 854b35b6924e1..65eacb136d720 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -45,6 +45,16 @@ static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
   return Size;
 }
 
+static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
+                               const MemProfSchema &Schema) {
+  size_t Size = 0;
+  // The linear call stack ID.
+  Size += sizeof(uint32_t);
+  // The size of the payload.
+  Size += PortableMemInfoBlock::serializedSize(Schema);
+  return Size;
+}
+
 size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
                                              IndexedVersion Version) const {
   switch (Version) {
@@ -52,8 +62,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
   case Version1:
     return serializedSizeV0(*this, Schema);
   case Version2:
-  case Version3:
     return serializedSizeV2(*this, Schema);
+  case Version3:
+    return serializedSizeV3(*this, Schema);
   }
   llvm_unreachable("unsupported MemProf version");
 }
@@ -89,6 +100,20 @@ static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
   return Result;
 }
 
+static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
+                               const MemProfSchema &Schema) {
+  // The number of alloc sites to serialize.
+  size_t Result = sizeof(uint64_t);
+  for (const IndexedAllocationInfo &N : Record.AllocSites)
+    Result += N.serializedSize(Schema, Version3);
+
+  // The number of callsites we have information for.
+  Result += sizeof(uint64_t);
+  // The linear call stack ID.
+  Result += Record.CallSiteIds.size() * sizeof(uint32_t);
+  return Result;
+}
+
 size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
                                             IndexedVersion Version) const {
   switch (Version) {
@@ -96,8 +121,9 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
   case Version1:
     return serializedSizeV0(*this, Schema);
   case Version2:
-  case Version3:
     return serializedSizeV2(*this, Schema);
+  case Version3:
+    return serializedSizeV3(*this, Schema);
   }
   llvm_unreachable("unsupported MemProf version");
 }
@@ -154,7 +180,7 @@ serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
   LE.write<uint64_t>(Record.AllocSites.size());
   for (const IndexedAllocationInfo &N : Record.AllocSites) {
     assert(MemProfCallStackIndexes.contains(N.CSId));
-    LE.write<uint64_t>(MemProfCallStackIndexes[N.CSId]);
+    LE.write<uint32_t>(MemProfCallStackIndexes[N.CSId]);
     N.Info.serialize(Schema, OS);
   }
 
@@ -162,7 +188,7 @@ serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
   LE.write<uint64_t>(Record.CallSiteIds.size());
   for (const auto &CSId : Record.CallSiteIds) {
     assert(MemProfCallStackIndexes.contains(CSId));
-    LE.write<uint64_t>(MemProfCallStackIndexes[CSId]);
+    LE.write<uint32_t>(MemProfCallStackIndexes[CSId]);
   }
 }
 
@@ -259,6 +285,37 @@ static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
   return Record;
 }
 
+static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
+                                          const unsigned char *Ptr) {
+  using namespace support;
+
+  IndexedMemProfRecord Record;
+
+  // Read the meminfo nodes.
+  const uint64_t NumNodes =
+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  Record.AllocSites.reserve(NumNodes);
+  for (uint64_t I = 0; I < NumNodes; I++) {
+    IndexedAllocationInfo Node;
+    Node.CSId = endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
+    Node.Info.deserialize(Schema, Ptr);
+    Ptr += PortableMemInfoBlock::serializedSize(Schema);
+    Record.AllocSites.push_back(Node);
+  }
+
+  // Read the callsite information.
+  const uint64_t NumCtxs =
+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  Record.CallSiteIds.reserve(NumCtxs);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    CallStackId CSId =
+        endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
+    Record.CallSiteIds.push_back(CSId);
+  }
+
+  return Record;
+}
+
 IndexedMemProfRecord
 IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
                                   const unsigned char *Ptr,
@@ -268,8 +325,9 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
   case Version1:
     return deserializeV0(Schema, Ptr);
   case Version2:
-  case Version3:
     return deserializeV2(Schema, Ptr);
+  case Version3:
+    return deserializeV3(Schema, Ptr);
   }
   llvm_unreachable("unsupported MemProf version");
 }

teresajohnson · 2024-05-31T13:41:10Z

llvm/lib/ProfileData/MemProf.cpp

+      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  Record.CallSiteIds.reserve(NumCtxs);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    CallStackId CSId =


CallStackId is still a typedef to uint64_t. I guess we can't change this completely while we still have v2 around, but it is a little confusing, and presumably using more memory than needed in the in-memory data structures. Should there be a TODO to switch this to 32-bits?

teresajohnson · 2024-05-31T13:43:11Z

llvm/lib/ProfileData/MemProf.cpp

+  // The number of callsites we have information for.
+  Result += sizeof(uint64_t);
+  // The linear call stack ID.
+  Result += Record.CallSiteIds.size() * sizeof(uint32_t);


v2 uses sizeof(CallStackId) which is still 64-bits. Should we create a new typename and use it in these functions with a TODO to remove the old one (see also my comment later on about this)?

I've added a new typename in the latest iteration.

I thought about the TODO that you are suggest suggesting, but it's a bit complicated. I think the source of confusion is that I am multi-purposing IndexedAllocationInfo::CSId and IndexedMemProfRecord::CallSiteIds:

During serialization (like llvm-profdata merge), we store 64-bit call stack hash values there -- the original meaning of CallStackId.

During deserialization (in the compiler proper), we temporarily store 32-bit linear IDs in CSId and CallSiteIds just before constructing MemProfRecord.

For now, I'm inclined to keeping the way it is, except we use LinearCallStackId for readability where we don't need to store 64-bit call stack hash values.

I thought about putting true CallStackId (that is, 64-bit call stack hash values) in these fields during deserialization, replacing a linear ID with CallStackID as we deserialize -- the opposite of what we do during serialization. The problem is that computation of the 64-bit call stack hash value requires a fully reconstructed call stack consisting of Frames. Now, we have a fully reconstructed call stack of Frames, why not go all the way to MemProfRecord? It seems wasteful to reconstruct a call stack, store it away in some hash table, and immediately retrieve it when we construct MemProfRecord.

[memprof] Use uint32_t for linear call stack IDs

3752c5e

This patch switches to uint32_t for linear call stack IDs as uint32_t is sufficient to index into the call stack array.

kazutakahirata requested review from teresajohnson and snehasish May 31, 2024 05:38

llvmbot added the PGO Profile Guided Optimizations label May 31, 2024

teresajohnson reviewed May 31, 2024

View reviewed changes

Add LinearCallStackId.

a510e1f

teresajohnson approved these changes May 31, 2024

View reviewed changes

kazutakahirata merged commit 37f3023 into llvm:main May 31, 2024
5 of 7 checks passed

kazutakahirata deleted the memprof_v3_callstack_uint32_t branch May 31, 2024 17:29

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[memprof] Use uint32_t for linear call stack IDs #93924

[memprof] Use uint32_t for linear call stack IDs #93924

Uh oh!

kazutakahirata commented May 31, 2024

Uh oh!

llvmbot commented May 31, 2024

Uh oh!

teresajohnson May 31, 2024

Uh oh!

teresajohnson May 31, 2024

Uh oh!

kazutakahirata May 31, 2024

Uh oh!

Uh oh!

Uh oh!

[memprof] Use uint32_t for linear call stack IDs #93924

[memprof] Use uint32_t for linear call stack IDs #93924

Uh oh!

Conversation

kazutakahirata commented May 31, 2024

Uh oh!

llvmbot commented May 31, 2024

Uh oh!

teresajohnson May 31, 2024

Choose a reason for hiding this comment

Uh oh!

teresajohnson May 31, 2024

Choose a reason for hiding this comment

Uh oh!

kazutakahirata May 31, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!