Skip to content

Commit 6c1091e

Browse files
Revert "[MemProf] Optionally save context size info on largest cold allocations" (#142688)
Reverts #142507 due to buildbot failures that I will look into tomorrow.
1 parent 33fae08 commit 6c1091e

File tree

9 files changed

+40
-352
lines changed

9 files changed

+40
-352
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,6 @@ class OptimizationRemarkEmitter;
2424

2525
namespace memprof {
2626

27-
/// Whether the alloc memeprof metadata will include context size info for all
28-
/// MIBs.
29-
LLVM_ABI bool metadataIncludesAllContextSizeInfo();
30-
31-
/// Whether the alloc memprof metadata may include context size info for some
32-
/// MIBs (but possibly not all).
33-
LLVM_ABI bool metadataMayIncludeContextSizeInfo();
34-
35-
/// Whether we need to record the context size info in the alloc trie used to
36-
/// build metadata.
37-
LLVM_ABI bool recordContextSizeInfoForAnalysis();
38-
3927
/// Build callstack metadata from the provided list of call stack ids. Returns
4028
/// the resulting metadata node.
4129
LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
@@ -99,9 +87,6 @@ class CallStackTrie {
9987
// allocations for which we apply non-context sensitive allocation hints.
10088
OptimizationRemarkEmitter *ORE;
10189

102-
// The maximum size of a cold allocation context, from the profile summary.
103-
uint64_t MaxColdSize;
104-
10590
void deleteTrieNode(CallStackTrieNode *Node) {
10691
if (!Node)
10792
return;
@@ -128,9 +113,7 @@ class CallStackTrie {
128113
uint64_t &ColdBytes);
129114

130115
public:
131-
CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr,
132-
uint64_t MaxColdSize = 0)
133-
: ORE(ORE), MaxColdSize(MaxColdSize) {}
116+
CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {}
134117
~CallStackTrie() { deleteTrieNode(Alloc); }
135118

136119
bool empty() const { return Alloc == nullptr; }

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 9 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -46,25 +46,6 @@ cl::opt<unsigned> MinCallsiteColdBytePercent(
4646
cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
4747
"contexts"));
4848

49-
// Enable saving context size information for largest cold contexts, which can
50-
// be used to flag contexts for more aggressive cloning and reporting.
51-
cl::opt<unsigned> MinPercentMaxColdSize(
52-
"memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden,
53-
cl::desc("Min percent of max cold bytes for critical cold context"));
54-
55-
bool llvm::memprof::metadataIncludesAllContextSizeInfo() {
56-
return MemProfReportHintedSizes || MinClonedColdBytePercent < 100;
57-
}
58-
59-
bool llvm::memprof::metadataMayIncludeContextSizeInfo() {
60-
return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100;
61-
}
62-
63-
bool llvm::memprof::recordContextSizeInfoForAnalysis() {
64-
return metadataMayIncludeContextSizeInfo() ||
65-
MinCallsiteColdBytePercent < 100;
66-
}
67-
6849
MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
6950
LLVMContext &Ctx) {
7051
SmallVector<Metadata *, 8> StackVals;
@@ -187,8 +168,7 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
187168
static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
188169
AllocationType AllocType,
189170
ArrayRef<ContextTotalSize> ContextSizeInfo,
190-
const uint64_t MaxColdSize, uint64_t &TotalBytes,
191-
uint64_t &ColdBytes) {
171+
uint64_t &TotalBytes, uint64_t &ColdBytes) {
192172
SmallVector<Metadata *> MIBPayload(
193173
{buildCallstackMetadata(MIBCallStack, Ctx)});
194174
MIBPayload.push_back(
@@ -204,21 +184,12 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
204184

205185
for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
206186
TotalBytes += TotalSize;
207-
bool LargeColdContext = false;
208-
if (AllocType == AllocationType::Cold) {
187+
if (AllocType == AllocationType::Cold)
209188
ColdBytes += TotalSize;
210-
// If we have the max cold context size from summary information and have
211-
// requested identification of contexts above a percentage of the max, see
212-
// if this context qualifies.
213-
if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
214-
TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)
215-
LargeColdContext = true;
216-
}
217189
// Only add the context size info as metadata if we need it in the thin
218-
// link (currently if reporting of hinted sizes is enabled, we have
219-
// specified a threshold for marking allocations cold after cloning, or we
220-
// have identified this as a large cold context of interest above).
221-
if (metadataIncludesAllContextSizeInfo() || LargeColdContext) {
190+
// link (currently if reporting of hinted sizes is enabled or we have
191+
// specified a threshold for marking allocations cold after cloning).
192+
if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) {
222193
auto *FullStackIdMD = ValueAsMetadata::get(
223194
ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId));
224195
auto *TotalSizeMD = ValueAsMetadata::get(
@@ -386,9 +357,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
386357
if (hasSingleAllocType(Node->AllocTypes)) {
387358
std::vector<ContextTotalSize> ContextSizeInfo;
388359
collectContextSizeInfo(Node, ContextSizeInfo);
389-
MIBNodes.push_back(
390-
createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes,
391-
ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes));
360+
MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack,
361+
(AllocationType)Node->AllocTypes,
362+
ContextSizeInfo, TotalBytes, ColdBytes));
392363
return true;
393364
}
394365

@@ -442,8 +413,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
442413
std::vector<ContextTotalSize> ContextSizeInfo;
443414
collectContextSizeInfo(Node, ContextSizeInfo);
444415
MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
445-
ContextSizeInfo, MaxColdSize, TotalBytes,
446-
ColdBytes));
416+
ContextSizeInfo, TotalBytes, ColdBytes));
447417
return true;
448418
}
449419

llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ static void computeFunctionSummary(
525525
if (MemProfMD) {
526526
std::vector<MIBInfo> MIBs;
527527
std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
528-
bool HasNonZeroContextSizeInfos = false;
529528
for (auto &MDOp : MemProfMD->operands()) {
530529
auto *MIBMD = cast<const MDNode>(MDOp);
531530
MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -545,8 +544,7 @@ static void computeFunctionSummary(
545544
}
546545
// If we have context size information, collect it for inclusion in
547546
// the summary.
548-
assert(MIBMD->getNumOperands() > 2 ||
549-
!metadataIncludesAllContextSizeInfo());
547+
assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes);
550548
if (MIBMD->getNumOperands() > 2) {
551549
std::vector<ContextTotalSize> ContextSizes;
552550
for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
@@ -560,31 +558,14 @@ static void computeFunctionSummary(
560558
->getZExtValue();
561559
ContextSizes.push_back({FullStackId, TS});
562560
}
563-
// Flag that we need to keep the ContextSizeInfos array for this
564-
// alloc as it now contains non-zero context info sizes.
565-
HasNonZeroContextSizeInfos = true;
566561
ContextSizeInfos.push_back(std::move(ContextSizes));
567-
} else {
568-
// The ContextSizeInfos must be in the same relative position as the
569-
// associated MIB. In some cases we only include a ContextSizeInfo
570-
// for a subset of MIBs in an allocation. To handle that, eagerly
571-
// fill any MIB entries that don't have context size info metadata
572-
// with a pair of 0s. Later on we will only use this array if it
573-
// ends up containing any non-zero entries (see where we set
574-
// HasNonZeroContextSizeInfos above).
575-
ContextSizeInfos.push_back({{0, 0}});
576562
}
577563
MIBs.push_back(
578564
MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
579565
}
580566
Allocs.push_back(AllocInfo(std::move(MIBs)));
581-
assert(HasNonZeroContextSizeInfos ||
582-
!metadataIncludesAllContextSizeInfo());
583-
// We eagerly build the ContextSizeInfos array, but it will be filled
584-
// with sub arrays of pairs of 0s if no MIBs on this alloc actually
585-
// contained context size info metadata. Only save it if any MIBs had
586-
// any such metadata.
587-
if (HasNonZeroContextSizeInfos) {
567+
assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes);
568+
if (!ContextSizeInfos.empty()) {
588569
assert(Allocs.back().MIBs.size() == ContextSizeInfos.size());
589570
Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos);
590571
}

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8164,14 +8164,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
81648164
ContextSizes.reserve(NumContextSizeInfoEntries);
81658165
for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) {
81668166
assert(ContextIdIndex < PendingContextIds.size());
8167-
// Skip any 0 entries for MIBs without the context size info.
8168-
if (PendingContextIds[ContextIdIndex] == 0) {
8169-
// The size should also be 0 if the context was 0.
8170-
assert(!Record[I]);
8171-
ContextIdIndex++;
8172-
I++;
8173-
continue;
8174-
}
81758167
// PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS
81768168
// should be in the same order as the total sizes.
81778169
ContextSizes.push_back(

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "llvm/ADT/SmallVector.h"
2424
#include "llvm/ADT/StringMap.h"
2525
#include "llvm/ADT/StringRef.h"
26-
#include "llvm/Analysis/MemoryProfileInfo.h"
2726
#include "llvm/BinaryFormat/Dwarf.h"
2827
#include "llvm/Bitcode/BitcodeCommon.h"
2928
#include "llvm/Bitcode/BitcodeReader.h"
@@ -4586,23 +4585,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
45864585
Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
45874586
}
45884587

4589-
unsigned ContextIdAbbvId = 0;
4590-
if (metadataMayIncludeContextSizeInfo()) {
4591-
// n x context id
4592-
auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
4593-
ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
4594-
ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4595-
// The context ids are hashes that are close to 64 bits in size, so emitting
4596-
// as a pair of 32-bit fixed-width values is more efficient than a VBR if we
4597-
// are emitting them for all MIBs. Otherwise we use VBR to better compress 0
4598-
// values that are expected to more frequently occur in an alloc's memprof
4599-
// summary.
4600-
if (metadataIncludesAllContextSizeInfo())
4601-
ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
4602-
else
4603-
ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
4604-
ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
4605-
}
4588+
// n x context id
4589+
auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
4590+
ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
4591+
ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
4592+
// The context ids are hashes that are close to 64 bits in size, so emitting
4593+
// as a pair of 32-bit fixed-width values is more efficient than a VBR.
4594+
ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
4595+
unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
46064596

46074597
// Abbrev for FS_PERMODULE_PROFILE.
46084598
Abbv = std::make_shared<BitCodeAbbrev>();

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,8 +2232,9 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
22322232
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
22332233
EmptyContext;
22342234
unsigned I = 0;
2235-
assert(!metadataMayIncludeContextSizeInfo() ||
2236-
AN.ContextSizeInfos.size() == AN.MIBs.size());
2235+
assert(
2236+
(!MemProfReportHintedSizes && MinClonedColdBytePercent >= 100) ||
2237+
AN.ContextSizeInfos.size() == AN.MIBs.size());
22372238
// Now add all of the MIBs and their stack nodes.
22382239
for (auto &MIB : AN.MIBs) {
22392240
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ static cl::opt<bool> ClMemProfAttachCalleeGuids(
184184
"Attach calleeguids as value profile metadata for indirect calls."),
185185
cl::init(true), cl::Hidden);
186186

187+
extern cl::opt<bool> MemProfReportHintedSizes;
188+
extern cl::opt<unsigned> MinClonedColdBytePercent;
189+
extern cl::opt<unsigned> MinCallsiteColdBytePercent;
190+
187191
static cl::opt<unsigned> MinMatchedColdBytePercent(
188192
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
189193
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
@@ -295,6 +299,13 @@ class ModuleMemProfiler {
295299
Function *MemProfCtorFunction = nullptr;
296300
};
297301

302+
// Options under which we need to record the context size info in the alloc trie
303+
// used to build metadata.
304+
bool recordContextSizeInfo() {
305+
return MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ||
306+
MinCallsiteColdBytePercent < 100;
307+
}
308+
298309
} // end anonymous namespace
299310

300311
MemProfilerPass::MemProfilerPass() = default;
@@ -747,7 +758,7 @@ static AllocationType addCallStack(CallStackTrie &AllocTrie,
747758
AllocInfo->Info.getAllocCount(),
748759
AllocInfo->Info.getTotalLifetime());
749760
std::vector<ContextTotalSize> ContextSizeInfo;
750-
if (recordContextSizeInfoForAnalysis()) {
761+
if (recordContextSizeInfo()) {
751762
auto TotalSize = AllocInfo->Info.getTotalSize();
752763
assert(TotalSize);
753764
assert(FullStackId != 0);
@@ -992,7 +1003,7 @@ static void readMemprof(Module &M, Function &F,
9921003
&FullStackIdToAllocMatchInfo,
9931004
std::set<std::vector<uint64_t>> &MatchedCallSites,
9941005
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
995-
OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
1006+
OptimizationRemarkEmitter &ORE) {
9961007
auto &Ctx = M.getContext();
9971008
// Previously we used getIRPGOFuncName() here. If F is local linkage,
9981009
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1181,7 +1192,7 @@ static void readMemprof(Module &M, Function &F,
11811192
// We may match this instruction's location list to multiple MIB
11821193
// contexts. Add them to a Trie specialized for trimming the contexts to
11831194
// the minimal needed to disambiguate contexts with unique behavior.
1184-
CallStackTrie AllocTrie(&ORE, MaxColdSize);
1195+
CallStackTrie AllocTrie(&ORE);
11851196
uint64_t TotalSize = 0;
11861197
uint64_t TotalColdSize = 0;
11871198
for (auto *AllocInfo : AllocInfoIter->second) {
@@ -1192,7 +1203,7 @@ static void readMemprof(Module &M, Function &F,
11921203
InlinedCallStack)) {
11931204
NumOfMemProfMatchedAllocContexts++;
11941205
uint64_t FullStackId = 0;
1195-
if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())
1206+
if (ClPrintMemProfMatchInfo || recordContextSizeInfo())
11961207
FullStackId = computeFullStackId(AllocInfo->CallStack);
11971208
auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
11981209
TotalSize += AllocInfo->Info.getTotalSize();
@@ -1329,18 +1340,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
13291340
// call stack.
13301341
std::set<std::vector<uint64_t>> MatchedCallSites;
13311342

1332-
uint64_t MaxColdSize = 0;
1333-
if (auto *MemProfSum = MemProfReader->getMemProfSummary())
1334-
MaxColdSize = MemProfSum->getMaxColdTotalSize();
1335-
13361343
for (auto &F : M) {
13371344
if (F.isDeclaration())
13381345
continue;
13391346

13401347
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
13411348
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
13421349
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1343-
MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
1350+
MatchedCallSites, UndriftMaps, ORE);
13441351
}
13451352

13461353
if (ClPrintMemProfMatchInfo) {

llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll

Lines changed: 0 additions & 73 deletions
This file was deleted.

0 commit comments

Comments
 (0)