Skip to content

[MemProf] Disable alloc context in combined summary for ndebug builds #139161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,11 @@ enum GlobalValueSummarySymtabCodes {
// CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
// [n x entry]
FS_CONTEXT_RADIX_TREE_ARRAY = 32,
// Summary of combined index allocation memprof metadata, without context.
// [nummib, numver,
// nummib x alloc type,
// numver x version]
FS_COMBINED_ALLOC_INFO_NO_CONTEXT = 33,
};

enum MetadataCodes {
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10781,12 +10781,15 @@ bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
return true;

SmallVector<unsigned> StackIdIndices;
do {
uint64_t StackId = 0;
if (parseUInt64(StackId))
return true;
StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
} while (EatIfPresent(lltok::comma));
// Combined index alloc records may not have a stack id list.
if (Lex.getKind() != lltok::rparen) {
do {
uint64_t StackId = 0;
if (parseUInt64(StackId))
return true;
StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
} while (EatIfPresent(lltok::comma));
}

if (parseToken(lltok::rparen, "expected ')' in stackIds"))
return true;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, STACK_IDS)
STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8178,7 +8178,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
break;
}

case bitc::FS_COMBINED_ALLOC_INFO: {
case bitc::FS_COMBINED_ALLOC_INFO:
case bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT: {
unsigned I = 0;
std::vector<MIBInfo> MIBs;
unsigned NumMIBs = Record[I++];
Expand All @@ -8187,7 +8188,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
while (MIBsRead++ < NumMIBs) {
assert(Record.size() - I >= 2);
AllocationType AllocType = (AllocationType)Record[I++];
auto StackIdList = parseAllocInfoContext(Record, I);
SmallVector<unsigned> StackIdList;
if (BitCode == bitc::FS_COMBINED_ALLOC_INFO)
StackIdList = parseAllocInfoContext(Record, I);
MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
}
assert(Record.size() - I >= NumVersions);
Expand Down
135 changes: 83 additions & 52 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ static cl::opt<bool> WriteRelBFToSummary(
"write-relbf-to-summary", cl::Hidden, cl::init(false),
cl::desc("Write relative block frequency to function summary "));

// Since we only use the context information in the memprof summary records in
// the LTO backends to do assertion checking, save time and space by only
// serializing the context for non-NDEBUG builds.
// TODO: Currently this controls writing context of the allocation info records,
// which are larger and more expensive, but we should do this for the callsite
// records as well.
// FIXME: Convert to a const once this has undergone more sigificant testing.
static cl::opt<bool>
CombinedIndexMemProfContext("combined-index-memprof-context", cl::Hidden,
#ifdef NDEBUG
cl::init(false),
#else
cl::init(true),
#endif
cl::desc(""));

namespace llvm {
extern FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold;
}
Expand Down Expand Up @@ -528,10 +544,12 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
for (auto Idx : CI.StackIdIndices)
RecordStackIdReference(Idx);
}
for (auto &AI : FS->allocs())
for (auto &MIB : AI.MIBs)
for (auto Idx : MIB.StackIdIndices)
RecordStackIdReference(Idx);
if (CombinedIndexMemProfContext) {
for (auto &AI : FS->allocs())
for (auto &MIB : AI.MIBs)
for (auto Idx : MIB.StackIdIndices)
RecordStackIdReference(Idx);
}
});
}

Expand Down Expand Up @@ -4349,9 +4367,14 @@ static void writeFunctionHeapProfileRecords(
Record.push_back(AI.Versions.size());
for (auto &MIB : AI.MIBs) {
Record.push_back((uint8_t)MIB.AllocType);
// Record the index into the radix tree array for this context.
assert(CallStackCount <= CallStackPos.size());
Record.push_back(CallStackPos[CallStackCount++]);
// The per-module summary always needs to include the alloc context, as we
// use it during the thin link. For the combined index it is optional (see
// comments where CombinedIndexMemProfContext is defined).
if (PerModule || CombinedIndexMemProfContext) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain a bit about the PerModule usage here and below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment here. The below usage was pre-existing so I wasn't sure another comment was needed.

// Record the index into the radix tree array for this context.
assert(CallStackCount <= CallStackPos.size());
Record.push_back(CallStackPos[CallStackCount++]);
}
}
if (!PerModule)
llvm::append_range(Record, AI.Versions);
Expand Down Expand Up @@ -4384,8 +4407,11 @@ static void writeFunctionHeapProfileRecords(
Stream.EmitRecord(bitc::FS_ALLOC_CONTEXT_IDS, ContextIds,
ContextIdAbbvId);
}
Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO,
Stream.EmitRecord(PerModule
? bitc::FS_PERMODULE_ALLOC_INFO
: (CombinedIndexMemProfContext
? bitc::FS_COMBINED_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT),
Record, AllocAbbrev);
}
}
Expand Down Expand Up @@ -4847,7 +4873,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));

Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
Abbv->Add(BitCodeAbbrevOp(CombinedIndexMemProfContext
? bitc::FS_COMBINED_ALLOC_INFO
: bitc::FS_COMBINED_ALLOC_INFO_NO_CONTEXT));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
// nummib x (alloc type, context radix tree index),
Expand All @@ -4857,13 +4885,6 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));

Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
// n x entry
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));

auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
if (DecSummaries == nullptr)
return false;
Expand Down Expand Up @@ -4900,44 +4921,54 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
};

// First walk through all the functions and collect the allocation contexts in
// their associated summaries, for use in constructing a radix tree of
// contexts. Note that we need to do this in the same order as the functions
// are processed further below since the call stack positions in the resulting
// radix tree array are identified based on this order.
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
forEachSummary([&](GVInfo I, bool IsAliasee) {
// Don't collect this when invoked for an aliasee, as it is not needed for
// the alias summary. If the aliasee is to be imported, we will invoke this
// separately with IsAliasee=false.
if (IsAliasee)
return;
GlobalValueSummary *S = I.second;
assert(S);
auto *FS = dyn_cast<FunctionSummary>(S);
if (!FS)
return;
collectMemProfCallStacks(
FS,
/*GetStackIndex*/
[&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
assert(StackIdIndicesToIndex.contains(I));
return StackIdIndicesToIndex[I];
},
CallStacks);
});
// Finalize the radix tree, write it out, and get the map of positions in the
// linearized tree array.
DenseMap<CallStackId, LinearCallStackId> CallStackPos;
if (!CallStacks.empty()) {
CallStackPos =
writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);
if (CombinedIndexMemProfContext) {
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
// n x entry
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));

// First walk through all the functions and collect the allocation contexts
// in their associated summaries, for use in constructing a radix tree of
// contexts. Note that we need to do this in the same order as the functions
// are processed further below since the call stack positions in the
// resulting radix tree array are identified based on this order.
MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
forEachSummary([&](GVInfo I, bool IsAliasee) {
// Don't collect this when invoked for an aliasee, as it is not needed for
// the alias summary. If the aliasee is to be imported, we will invoke
// this separately with IsAliasee=false.
if (IsAliasee)
return;
GlobalValueSummary *S = I.second;
assert(S);
auto *FS = dyn_cast<FunctionSummary>(S);
if (!FS)
return;
collectMemProfCallStacks(
FS,
/*GetStackIndex*/
[&](unsigned I) {
// Get the corresponding index into the list of StackIds actually
// being written for this combined index (which may be a subset in
// the case of distributed indexes).
assert(StackIdIndicesToIndex.contains(I));
return StackIdIndicesToIndex[I];
},
CallStacks);
});
// Finalize the radix tree, write it out, and get the map of positions in
// the linearized tree array.
if (!CallStacks.empty()) {
CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream,
RadixAbbrev);
}
}

// Keep track of the current index into the CallStackPos map.
// Keep track of the current index into the CallStackPos map. Not used if
// CombinedIndexMemProfContext is false.
CallStackId CallStackCount = 0;

DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
Expand Down
15 changes: 12 additions & 3 deletions llvm/test/Assembler/thinlto-memprof-summary.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
;; Test memprof summary parsing (tests all types/fields in various combinations).
; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s

;; Force enable -combined-index-memprof-context to get the allocation context
;; stack ids even in release builds.
; RUN: llvm-as %s -o - -combined-index-memprof-context | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,CONTEXT

;; Force disable -combined-index-memprof-context to block the allocation context
;; stack ids even in non-release builds.
; RUN: llvm-as %s -o - -combined-index-memprof-context=false | llvm-dis -o - | FileCheck %s --check-prefixes=CHECK,NOCONTEXT

; ModuleID = 'thinlto-memprof-summary.thinlto.bc'

Expand All @@ -17,8 +24,10 @@

; Make sure we get back from llvm-dis what we put in via llvm-as.
; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
; CONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: hot, stackIds: (987654321))))))))
; NOCONTEXT: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()), (type: hot, stackIds: ())))))))
; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
; CONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
; NOCONTEXT: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: ()), (type: cold, stackIds: ()))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: ()), (type: notcold, stackIds: ())))))))
; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))
1 change: 1 addition & 0 deletions llvm/test/ThinLTO/X86/memprof_direct_recursion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
; RUN: llvm-lto2 run %t/b.o %t/a.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
; RUN: -combined-index-memprof-context \
; RUN: -r=%t/b.o,_Z3fooi,plx \
; RUN: -r=%t/b.o,aliasee,plx \
; RUN: -r=%t/b.o,a \
Expand Down
Loading