13
13
#include " llvm/Analysis/MemoryProfileInfo.h"
14
14
#include " llvm/IR/Constants.h"
15
15
#include " llvm/Support/CommandLine.h"
16
+ #include " llvm/Support/Format.h"
16
17
17
18
using namespace llvm ;
18
19
using namespace llvm ::memprof;
@@ -58,6 +59,19 @@ cl::opt<bool> MemProfKeepAllNotColdContexts(
58
59
" memprof-keep-all-not-cold-contexts" , cl::init(false ), cl::Hidden,
59
60
cl::desc(" Keep all non-cold contexts (increases cloning overheads)" ));
60
61
62
+ cl::opt<unsigned > MinClonedColdBytePercent (
63
+ " memprof-cloning-cold-threshold" , cl::init(100 ), cl::Hidden,
64
+ cl::desc(" Min percent of cold bytes to hint alloc cold during cloning" ));
65
+
66
+ // Discard non-cold contexts if they overlap with much larger cold contexts,
67
+ // specifically, if all contexts reaching a given callsite are at least this
68
+ // percent cold byte allocations. This reduces the amount of cloning required
69
+ // to expose the cold contexts when they greatly dominate non-cold contexts.
70
+ cl::opt<unsigned > MinCallsiteColdBytePercent (
71
+ " memprof-callsite-cold-threshold" , cl::init(100 ), cl::Hidden,
72
+ cl::desc(" Min percent of cold bytes at a callsite to discard non-cold "
73
+ " contexts" ));
74
+
61
75
AllocationType llvm::memprof::getAllocType (uint64_t TotalLifetimeAccessDensity,
62
76
uint64_t AllocCount,
63
77
uint64_t TotalLifetime) {
@@ -208,13 +222,29 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
208
222
209
223
static MDNode *createMIBNode (LLVMContext &Ctx, ArrayRef<uint64_t > MIBCallStack,
210
224
AllocationType AllocType,
211
- ArrayRef<ContextTotalSize> ContextSizeInfo) {
225
+ ArrayRef<ContextTotalSize> ContextSizeInfo,
226
+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
212
227
SmallVector<Metadata *> MIBPayload (
213
228
{buildCallstackMetadata (MIBCallStack, Ctx)});
214
229
MIBPayload.push_back (
215
230
MDString::get (Ctx, getAllocTypeAttributeString (AllocType)));
216
- if (!ContextSizeInfo.empty ()) {
217
- for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
231
+
232
+ if (ContextSizeInfo.empty ()) {
233
+ // The profile matcher should have provided context size info if there was a
234
+ // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
235
+ // handle a user-provided percent larger than 100.
236
+ assert (MinCallsiteColdBytePercent >= 100 );
237
+ return MDNode::get (Ctx, MIBPayload);
238
+ }
239
+
240
+ for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
241
+ TotalBytes += TotalSize;
242
+ if (AllocType == AllocationType::Cold)
243
+ ColdBytes += TotalSize;
244
+ // Only add the context size info as metadata if we need it in the thin
245
+ // link (currently if reporting of hinted sizes is enabled or we have
246
+ // specified a threshold for marking allocations cold after cloning).
247
+ if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ) {
218
248
auto *FullStackIdMD = ValueAsMetadata::get (
219
249
ConstantInt::get (Type::getInt64Ty (Ctx), FullStackId));
220
250
auto *TotalSizeMD = ValueAsMetadata::get (
@@ -223,6 +253,7 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
223
253
MIBPayload.push_back (ContextSizeMD);
224
254
}
225
255
}
256
+ assert (TotalBytes > 0 );
226
257
return MDNode::get (Ctx, MIBPayload);
227
258
}
228
259
@@ -246,9 +277,14 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
246
277
// on options that enable filtering out some NotCold contexts.
247
278
static void saveFilteredNewMIBNodes (std::vector<Metadata *> &NewMIBNodes,
248
279
std::vector<Metadata *> &SavedMIBNodes,
249
- unsigned CallerContextLength) {
280
+ unsigned CallerContextLength,
281
+ uint64_t TotalBytes, uint64_t ColdBytes) {
282
+ const bool MostlyCold =
283
+ MinCallsiteColdBytePercent < 100 &&
284
+ ColdBytes * 100 >= MinCallsiteColdBytePercent * TotalBytes;
285
+
250
286
// In the simplest case, with pruning disabled, keep all the new MIB nodes.
251
- if (MemProfKeepAllNotColdContexts) {
287
+ if (MemProfKeepAllNotColdContexts && !MostlyCold ) {
252
288
append_range (SavedMIBNodes, NewMIBNodes);
253
289
return ;
254
290
}
@@ -271,6 +307,30 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
271
307
}
272
308
};
273
309
310
+ // If the cold bytes at the current callsite exceed the given threshold, we
311
+ // discard all non-cold contexts so do not need any of the later pruning
312
+ // handling. We can simply copy over all the cold contexts and return early.
313
+ if (MostlyCold) {
314
+ auto NewColdMIBNodes =
315
+ make_filter_range (NewMIBNodes, [&](const Metadata *M) {
316
+ auto MIBMD = cast<MDNode>(M);
317
+ // Only append cold contexts.
318
+ if (getMIBAllocType (MIBMD) == AllocationType::Cold)
319
+ return true ;
320
+ if (MemProfReportHintedSizes) {
321
+ const float PercentCold = ColdBytes * 100.0 / TotalBytes;
322
+ std::string PercentStr;
323
+ llvm::raw_string_ostream OS (PercentStr);
324
+ OS << format (" for %5.2f%% cold bytes" , PercentCold);
325
+ EmitMessageForRemovedContexts (MIBMD, " discarded" , OS.str ());
326
+ }
327
+ return false ;
328
+ });
329
+ for (auto *M : NewColdMIBNodes)
330
+ SavedMIBNodes.push_back (M);
331
+ return ;
332
+ }
333
+
274
334
// Prune unneeded NotCold contexts, taking advantage of the fact
275
335
// that we later will only clone Cold contexts, as NotCold is the allocation
276
336
// default. We only need to keep as metadata the NotCold contexts that
@@ -341,17 +401,20 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
341
401
// Recursive helper to trim contexts and create metadata nodes.
342
402
// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
343
403
// caller makes it simpler to handle the many early returns in this method.
404
+ // Updates the total and cold profiled bytes in the subtrie rooted at this node.
344
405
bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
345
406
std::vector<uint64_t > &MIBCallStack,
346
407
std::vector<Metadata *> &MIBNodes,
347
- bool CalleeHasAmbiguousCallerContext) {
408
+ bool CalleeHasAmbiguousCallerContext,
409
+ uint64_t &TotalBytes, uint64_t &ColdBytes) {
348
410
// Trim context below the first node in a prefix with a single alloc type.
349
411
// Add an MIB record for the current call stack prefix.
350
412
if (hasSingleAllocType (Node->AllocTypes )) {
351
413
std::vector<ContextTotalSize> ContextSizeInfo;
352
414
collectContextSizeInfo (Node, ContextSizeInfo);
353
- MIBNodes.push_back (createMIBNode (
354
- Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
415
+ MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
416
+ (AllocationType)Node->AllocTypes ,
417
+ ContextSizeInfo, TotalBytes, ColdBytes));
355
418
return true ;
356
419
}
357
420
@@ -364,17 +427,25 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
364
427
// that will later be filtered before adding to the caller's MIBNodes
365
428
// vector.
366
429
std::vector<Metadata *> NewMIBNodes;
430
+ // Determine the total and cold byte counts for all callers, then add to the
431
+ // caller's counts further below.
432
+ uint64_t CallerTotalBytes = 0 ;
433
+ uint64_t CallerColdBytes = 0 ;
367
434
for (auto &Caller : Node->Callers ) {
368
435
MIBCallStack.push_back (Caller.first );
369
- AddedMIBNodesForAllCallerContexts &=
370
- buildMIBNodes ( Caller.second , Ctx, MIBCallStack, NewMIBNodes,
371
- NodeHasAmbiguousCallerContext );
436
+ AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
437
+ Caller.second , Ctx, MIBCallStack, NewMIBNodes,
438
+ NodeHasAmbiguousCallerContext, CallerTotalBytes, CallerColdBytes );
372
439
// Remove Caller.
373
440
MIBCallStack.pop_back ();
374
441
}
375
442
// Pass in the stack length of the MIB nodes added for the immediate caller,
376
443
// which is the current stack length plus 1.
377
- saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 );
444
+ saveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 ,
445
+ CallerTotalBytes, CallerColdBytes);
446
+ TotalBytes += CallerTotalBytes;
447
+ ColdBytes += CallerColdBytes;
448
+
378
449
if (AddedMIBNodesForAllCallerContexts)
379
450
return true ;
380
451
// We expect that the callers should be forced to add MIBs to disambiguate
@@ -397,7 +468,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
397
468
std::vector<ContextTotalSize> ContextSizeInfo;
398
469
collectContextSizeInfo (Node, ContextSizeInfo);
399
470
MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack, AllocationType::NotCold,
400
- ContextSizeInfo));
471
+ ContextSizeInfo, TotalBytes, ColdBytes ));
401
472
return true ;
402
473
}
403
474
@@ -444,12 +515,15 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
444
515
std::vector<uint64_t > MIBCallStack;
445
516
MIBCallStack.push_back (AllocStackId);
446
517
std::vector<Metadata *> MIBNodes;
518
+ uint64_t TotalBytes = 0 ;
519
+ uint64_t ColdBytes = 0 ;
447
520
assert (!Alloc->Callers .empty () && " addCallStack has not been called yet" );
448
521
// The CalleeHasAmbiguousCallerContext flag is meant to say whether the
449
522
// callee of the given node has more than one caller. Here the node being
450
523
// passed in is the alloc and it has no callees. So it's false.
451
524
if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
452
- /* CalleeHasAmbiguousCallerContext=*/ false )) {
525
+ /* CalleeHasAmbiguousCallerContext=*/ false , TotalBytes,
526
+ ColdBytes)) {
453
527
assert (MIBCallStack.size () == 1 &&
454
528
" Should only be left with Alloc's location in stack" );
455
529
CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments