llvm · kazutakahirata · Apr 19, 2024 · Apr 18, 2024
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -647,6 +647,26 @@ class InstrProfReaderRemapper {
                            ArrayRef<NamedInstrProfRecord> &Data) = 0;
 };
 
+class IndexedMemProfReader {
+private:
+  /// MemProf profile schema (if available).
+  memprof::MemProfSchema Schema;
+  /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
+  std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
+  /// MemProf frame profile data on-disk indexed via frame id.
+  std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+  /// MemProf call stack data on-disk indexed via call stack id.
+  std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
+
+public:
+  IndexedMemProfReader() = default;
+
+  Error deserialize(const unsigned char *Start, uint64_t MemProfOffset);
+
+  Expected<memprof::MemProfRecord>
+  getMemProfRecord(const uint64_t FuncNameHash) const;
+};
+
 /// Reader for the indexed binary instrprof format.
 class IndexedInstrProfReader : public InstrProfReader {
 private:
@@ -662,14 +682,7 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<ProfileSummary> Summary;
   /// Context sensitive profile summary data.
   std::unique_ptr<ProfileSummary> CS_Summary;
-  /// MemProf profile schema (if available).
-  memprof::MemProfSchema Schema;
-  /// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
-  std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
-  /// MemProf frame profile data on-disk indexed via frame id.
-  std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
-  /// MemProf call stack data on-disk indexed via call stack id.
-  std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
+  IndexedMemProfReader MemProfReader;
   /// VTableNamePtr points to the beginning of compressed vtable names.
   /// When a symtab is constructed from profiles by llvm-profdata, the list of
   /// names could be decompressed based on `VTableNamePtr` and
@@ -753,7 +766,9 @@ class IndexedInstrProfReader : public InstrProfReader {
 
   /// Return the memprof record for the function identified by
   /// llvm::md5(Name).
-  Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash);
+  Expected<memprof::MemProfRecord> getMemProfRecord(uint64_t FuncNameHash) {
+    return MemProfReader.getMemProfRecord(FuncNameHash);
+  }
 
   /// Fill Counts with the profile data for the given function name.
   Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,

diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1202,6 +1202,99 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
   }
 }
 
+Error IndexedMemProfReader::deserialize(const unsigned char *Start,
+                                        uint64_t MemProfOffset) {
+  const unsigned char *Ptr = Start + MemProfOffset;
+
+  // Read the first 64-bit word, which may be RecordTableOffset in
+  // memprof::MemProfVersion0 or the MemProf version number in
+  // memprof::MemProfVersion1 and above.
+  const uint64_t FirstWord =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  memprof::IndexedVersion Version = memprof::Version0;
+  if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
+    // Everything is good.  We can proceed to deserialize the rest.
+    Version = static_cast<memprof::IndexedVersion>(FirstWord);
+  } else if (FirstWord >= 24) {
+    // This is a heuristic/hack to detect memprof::MemProfVersion0,
+    // which does not have a version field in the header.
+    // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
+    // which should be at least 24 because of the MemProf header size.
+    Version = memprof::Version0;
+  } else {
+    return make_error<InstrProfError>(
+        instrprof_error::unsupported_version,
+        formatv("MemProf version {} not supported; "
+                "requires version between {} and {}, inclusive",
+                FirstWord, memprof::MinimumSupportedVersion,
+                memprof::MaximumSupportedVersion));
+  }
+
+  // The value returned from RecordTableGenerator.Emit.
+  const uint64_t RecordTableOffset =
+      Version == memprof::Version0
+          ? FirstWord
+          : support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The offset in the stream right before invoking
+  // FrameTableGenerator.Emit.
+  const uint64_t FramePayloadOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  // The value returned from FrameTableGenerator.Emit.
+  const uint64_t FrameTableOffset =
+      support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+  // The offset in the stream right before invoking
+  // CallStackTableGenerator.Emit.
+  uint64_t CallStackPayloadOffset = 0;
+  // The value returned from CallStackTableGenerator.Emit.
+  uint64_t CallStackTableOffset = 0;
+  if (Version >= memprof::Version2) {
+    CallStackPayloadOffset =
+        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+    CallStackTableOffset =
+        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+  }
+
+  // Read the schema.
+  auto SchemaOr = memprof::readMemProfSchema(Ptr);
+  if (!SchemaOr)
+    return SchemaOr.takeError();
+  Schema = SchemaOr.get();
+
+  // Now initialize the table reader with a pointer into data buffer.
+  MemProfRecordTable.reset(MemProfRecordHashTable::Create(
+      /*Buckets=*/Start + RecordTableOffset,
+      /*Payload=*/Ptr,
+      /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
+
+  // Initialize the frame table reader with the payload and bucket offsets.
+  MemProfFrameTable.reset(MemProfFrameHashTable::Create(
+      /*Buckets=*/Start + FrameTableOffset,
+      /*Payload=*/Start + FramePayloadOffset,
+      /*Base=*/Start, memprof::FrameLookupTrait()));
+
+  if (Version >= memprof::Version2)
+    MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
+        /*Buckets=*/Start + CallStackTableOffset,
+        /*Payload=*/Start + CallStackPayloadOffset,
+        /*Base=*/Start, memprof::CallStackLookupTrait()));
+
+#ifdef EXPENSIVE_CHECKS
+  // Go through all the records and verify that CSId has been correctly
+  // populated.  Do this only under EXPENSIVE_CHECKS.  Otherwise, we
+  // would defeat the purpose of OnDiskIterableChainedHashTable.
+  // Note that we can compare CSId against actual call stacks only for
+  // Version0 and Version1 because IndexedAllocationInfo::CallStack and
+  // IndexedMemProfRecord::CallSites are not populated in Version2.
+  if (Version <= memprof::Version1)
+    for (const auto &Record : MemProfRecordTable->data())
+      verifyIndexedMemProfRecord(Record);
+#endif
+
+  return Error::success();
+}
+
 Error IndexedInstrProfReader::readHeader() {
   using namespace support;
 
@@ -1244,95 +1337,8 @@ Error IndexedInstrProfReader::readHeader() {
     uint64_t MemProfOffset =
         endian::byte_swap<uint64_t, llvm::endianness::little>(
             Header->MemProfOffset);
-
-    const unsigned char *Ptr = Start + MemProfOffset;
-
-    // Read the first 64-bit word, which may be RecordTableOffset in
-    // memprof::MemProfVersion0 or the MemProf version number in
-    // memprof::MemProfVersion1 and above.
-    const uint64_t FirstWord =
-        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-
-    memprof::IndexedVersion Version = memprof::Version0;
-    if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) {
-      // Everything is good.  We can proceed to deserialize the rest.
-      Version = static_cast<memprof::IndexedVersion>(FirstWord);
-    } else if (FirstWord >= 24) {
-      // This is a heuristic/hack to detect memprof::MemProfVersion0,
-      // which does not have a version field in the header.
-      // In memprof::MemProfVersion0, FirstWord will be RecordTableOffset,
-      // which should be at least 24 because of the MemProf header size.
-      Version = memprof::Version0;
-    } else {
-      return make_error<InstrProfError>(
-          instrprof_error::unsupported_version,
-          formatv("MemProf version {} not supported; "
-                  "requires version between {} and {}, inclusive",
-                  FirstWord, memprof::MinimumSupportedVersion,
-                  memprof::MaximumSupportedVersion));
-    }
-
-    // The value returned from RecordTableGenerator.Emit.
-    const uint64_t RecordTableOffset =
-        Version == memprof::Version0
-            ? FirstWord
-            : support::endian::readNext<uint64_t, llvm::endianness::little>(
-                  Ptr);
-    // The offset in the stream right before invoking
-    // FrameTableGenerator.Emit.
-    const uint64_t FramePayloadOffset =
-        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-    // The value returned from FrameTableGenerator.Emit.
-    const uint64_t FrameTableOffset =
-        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-
-    // The offset in the stream right before invoking
-    // CallStackTableGenerator.Emit.
-    uint64_t CallStackPayloadOffset = 0;
-    // The value returned from CallStackTableGenerator.Emit.
-    uint64_t CallStackTableOffset = 0;
-    if (Version >= memprof::Version2) {
-      CallStackPayloadOffset =
-          support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-      CallStackTableOffset =
-          support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-    }
-
-    // Read the schema.
-    auto SchemaOr = memprof::readMemProfSchema(Ptr);
-    if (!SchemaOr)
-      return SchemaOr.takeError();
-    Schema = SchemaOr.get();
-
-    // Now initialize the table reader with a pointer into data buffer.
-    MemProfRecordTable.reset(MemProfRecordHashTable::Create(
-        /*Buckets=*/Start + RecordTableOffset,
-        /*Payload=*/Ptr,
-        /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
-
-    // Initialize the frame table reader with the payload and bucket offsets.
-    MemProfFrameTable.reset(MemProfFrameHashTable::Create(
-        /*Buckets=*/Start + FrameTableOffset,
-        /*Payload=*/Start + FramePayloadOffset,
-        /*Base=*/Start, memprof::FrameLookupTrait()));
-
-    if (Version >= memprof::Version2)
-      MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
-          /*Buckets=*/Start + CallStackTableOffset,
-          /*Payload=*/Start + CallStackPayloadOffset,
-          /*Base=*/Start, memprof::CallStackLookupTrait()));
-
-#ifdef EXPENSIVE_CHECKS
-    // Go through all the records and verify that CSId has been correctly
-    // populated.  Do this only under EXPENSIVE_CHECKS.  Otherwise, we
-    // would defeat the purpose of OnDiskIterableChainedHashTable.
-    // Note that we can compare CSId against actual call stacks only for
-    // Version0 and Version1 because IndexedAllocationInfo::CallStack and
-    // IndexedMemProfRecord::CallSites are not populated in Version2.
-    if (Version <= memprof::Version1)
-      for (const auto &Record : MemProfRecordTable->data())
-        verifyIndexedMemProfRecord(Record);
-#endif
+    if (Error E = MemProfReader.deserialize(Start, MemProfOffset))
+      return E;
   }
 
   // BinaryIdOffset field in the header is only valid when the format version
@@ -1501,7 +1507,7 @@ Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
 }
 
 Expected<memprof::MemProfRecord>
-IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
+IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
   // TODO: Add memprof specific errors.
   if (MemProfRecordTable == nullptr)
     return make_error<InstrProfError>(instrprof_error::invalid_prof,