-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MemProf] Change the STACK_ID record to fixed width values #116448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MemProf] Change the STACK_ID record to fixed width values #116448
Conversation
The stack ids are hashes that are close to 64 bits in size, so emitting as a pair of 32-bit fixed-width values is more efficient than a VBR. This reduced the summary bitcode size for a large target by about 1%. Bump the index version and ensure we can read the old format.
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-lto Author: Teresa Johnson (teresajohnson) ChangesThe stack ids are hashes that are close to 64 bits in size, so emitting Bump the index version and ensure we can read the old format. Full diff: https://github.com/llvm/llvm-project/pull/116448.diff 6 Files Affected:
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 50def0eaf78867..39c60229aa1d81 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1463,7 +1463,7 @@ class ModuleSummaryIndex {
// in the way some record are interpreted, like flags for instance.
// Note that incrementing this may require changes in both BitcodeReader.cpp
// and BitcodeWriter.cpp.
- static constexpr uint64_t BitcodeSummaryVersion = 11;
+ static constexpr uint64_t BitcodeSummaryVersion = 12;
// Regular LTO module name for ASM writer
static constexpr const char *getRegularLTOModuleName() {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9ca76b54a88d9d..3e6abacac27261 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7997,7 +7997,16 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_STACK_IDS: { // [n x stackid]
// Save stack ids in the reader to consult when adding stack ids from the
// lists in the stack node and alloc node entries.
- StackIds = ArrayRef<uint64_t>(Record);
+ if (Version <= 11) {
+ StackIds = ArrayRef<uint64_t>(Record);
+ break;
+ }
+ // This is an array of 32-bit fixed-width values, holding each 64-bit
+ // context id as a pair of adjacent (most significant first) 32-bit words.
+ assert(Record.size() % 2 == 0);
+ StackIds.reserve(Record.size() / 2);
+ for (auto R = Record.begin(); R != Record.end(); R += 2)
+ StackIds.push_back(*R << 32 | *(R + 1));
break;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5829af39cf5e2c..24a4c2e8303d5a 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4429,12 +4429,17 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- // FIXME: The stack ids are hashes that are close to 64 bits in size, so
- // emitting as a pair of 32-bit fixed-width values, as we do for context
- // ids, would be more efficient.
- StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ // The stack ids are hashes that are close to 64 bits in size, so emitting
+ // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
- Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
+ SmallVector<uint32_t> Vals;
+ Vals.reserve(Index->stackIds().size() * 2);
+ for (auto Id : Index->stackIds()) {
+ Vals.push_back(static_cast<uint32_t>(Id >> 32));
+ Vals.push_back(static_cast<uint32_t>(Id));
+ }
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
}
// n x context id
@@ -4624,9 +4629,17 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
// numids x stackid
StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ // The stack ids are hashes that are close to 64 bits in size, so emitting
+ // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
- Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
+ SmallVector<uint32_t> Vals;
+ Vals.reserve(StackIds.size() * 2);
+ for (auto Id : StackIds) {
+ Vals.push_back(static_cast<uint32_t>(Id >> 32));
+ Vals.push_back(static_cast<uint32_t>(Id));
+ }
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
}
// Abbrev for FS_COMBINED_PROFILE.
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index c8d36f812c208a..c95c145a08788b 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s
; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=11/>
+; CHECK: <VERSION op0=12/>
diff --git a/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc b/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc
new file mode 100644
index 00000000000000..78b134cb44216e
Binary files /dev/null and b/llvm/test/ThinLTO/X86/Inputs/memprof-old-stackid-summary.bc differ
diff --git a/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll b/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll
new file mode 100644
index 00000000000000..10048f8674a080
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-old-stackid-summary.ll
@@ -0,0 +1,20 @@
+;; Check that we can read the old STACK_ID summary format that encoded the id as
+;; a VBR8 instead of as a pair of 32-bit fixed-width values.
+;;
+;; The old bitcode was generated by the older compiler from `opt -thinlto-bc`
+;; on the following LLVM assembly:
+;;
+;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+;; target triple = "x86_64-unknown-linux-gnu"
+;;
+;; define void @bar() {
+;; call void @foo(), !callsite !0
+;; ret void
+;; }
+;;
+;; declare void @foo()
+;;
+;; !0 = !{i64 9086428284934609951}
+
+; RUN: llvm-dis %S/Inputs/memprof-old-stackid-summary.bc -o - | FileCheck %s
+; CHECK: stackIds: (9086428284934609951)
|
The stack ids are hashes that are close to 64 bits in size, so emitting
as a pair of 32-bit fixed-width values is more efficient than a VBR.
This reduced the summary bitcode size for a large target by about 1%.
Bump the index version and ensure we can read the old format.