14
14
// subsequently annotated with an attribute for later transformation.
15
15
//
16
16
// The transformations can be performed either directly on IR (regular LTO), or
17
- // (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO
18
- // backend). Both types of LTO operate on a the same base graph representation,
19
- // which uses CRTP to support either IR or Index formats.
17
+ // on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
18
+ // Both types of LTO operate on a the same base graph representation, which
19
+ // uses CRTP to support either IR or Index formats.
20
20
//
21
21
// ===----------------------------------------------------------------------===//
22
22
28
28
#include " llvm/ADT/SmallSet.h"
29
29
#include " llvm/ADT/SmallVector.h"
30
30
#include " llvm/Analysis/MemoryProfileInfo.h"
31
+ #include " llvm/Analysis/ModuleSummaryAnalysis.h"
31
32
#include " llvm/IR/Constants.h"
32
33
#include " llvm/IR/Instructions.h"
33
34
#include " llvm/IR/Module.h"
35
+ #include " llvm/IR/ModuleSummaryIndex.h"
34
36
#include " llvm/Pass.h"
35
37
#include " llvm/Support/CommandLine.h"
36
38
#include " llvm/Support/FileSystem.h"
@@ -458,6 +460,56 @@ class ModuleCallsiteContextGraph
458
460
const Module &Mod;
459
461
};
460
462
463
+ // / Represents a call in the summary index graph, which can either be an
464
+ // / allocation or an interior callsite node in an allocation's context.
465
+ // / Holds a pointer to the corresponding data structure in the index.
466
+ struct IndexCall : public PointerUnion <CallsiteInfo *, AllocInfo *> {
467
+ IndexCall () : PointerUnion() {}
468
+ IndexCall (std::nullptr_t ) : IndexCall() {}
469
+ IndexCall (CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
470
+ IndexCall (AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
471
+
472
+ IndexCall *operator ->() { return this ; }
473
+
474
+ void print (raw_ostream &OS) const {
475
+ if (auto *AI = dyn_cast<AllocInfo *>())
476
+ OS << *AI;
477
+ else {
478
+ auto *CI = dyn_cast<CallsiteInfo *>();
479
+ assert (CI);
480
+ OS << *CI;
481
+ }
482
+ }
483
+ };
484
+
485
+ // / CRTP derived class for graphs built from summary index (ThinLTO).
486
+ class IndexCallsiteContextGraph
487
+ : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
488
+ IndexCall> {
489
+ public:
490
+ IndexCallsiteContextGraph (
491
+ ModuleSummaryIndex &Index,
492
+ function_ref<bool (GlobalValue::GUID, const GlobalValueSummary *)>
493
+ isPrevailing);
494
+
495
+ private:
496
+ friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
497
+ IndexCall>;
498
+
499
+ uint64_t getStackId (uint64_t IdOrIndex) const ;
500
+ bool calleeMatchesFunc (IndexCall &Call, const FunctionSummary *Func);
501
+ uint64_t getLastStackId (IndexCall &Call);
502
+ std::vector<uint64_t > getStackIdsWithContextNodesForCall (IndexCall &Call);
503
+ std::string getLabel (const FunctionSummary *Func, const IndexCall &Call,
504
+ unsigned CloneNo) const ;
505
+
506
+ // Saves mapping from function summaries containing memprof records back to
507
+ // its VI, for use in checking and debugging.
508
+ std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
509
+
510
+ const ModuleSummaryIndex &Index;
511
+ };
512
+
461
513
namespace {
462
514
463
515
struct FieldSeparator {
@@ -475,6 +527,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
475
527
return OS << FS.Sep ;
476
528
}
477
529
530
+ // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
531
+ // type we should actually use on the corresponding allocation.
532
+ // If we can't clone a node that has NotCold+Cold alloc type, we will fall
533
+ // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
534
+ // from NotCold.
535
+ AllocationType allocTypeToUse (uint8_t AllocTypes) {
536
+ assert (AllocTypes != (uint8_t )AllocationType::None);
537
+ if (AllocTypes ==
538
+ ((uint8_t )AllocationType::NotCold | (uint8_t )AllocationType::Cold))
539
+ return AllocationType::NotCold;
540
+ else
541
+ return (AllocationType)AllocTypes;
542
+ }
543
+
478
544
} // end anonymous namespace
479
545
480
546
template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -1118,6 +1184,20 @@ uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
1118
1184
return CallsiteContext.back ();
1119
1185
}
1120
1186
1187
+ uint64_t IndexCallsiteContextGraph::getLastStackId (IndexCall &Call) {
1188
+ assert (Call.is <CallsiteInfo *>());
1189
+ CallStack<CallsiteInfo, SmallVector<unsigned >::const_iterator>
1190
+ CallsiteContext (Call.dyn_cast <CallsiteInfo *>());
1191
+ // Need to convert index into stack id.
1192
+ return Index.getStackIdAtIndex (CallsiteContext.back ());
1193
+ }
1194
+
1195
+ static std::string getMemProfFuncName (Twine Base, unsigned CloneNo) {
1196
+ if (!CloneNo)
1197
+ return Base.str ();
1198
+ return (Base + " .memprof." + Twine (CloneNo)).str ();
1199
+ }
1200
+
1121
1201
std::string ModuleCallsiteContextGraph::getLabel (const Function *Func,
1122
1202
const Instruction *Call,
1123
1203
unsigned CloneNo) const {
@@ -1126,6 +1206,22 @@ std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
1126
1206
.str ();
1127
1207
}
1128
1208
1209
+ std::string IndexCallsiteContextGraph::getLabel (const FunctionSummary *Func,
1210
+ const IndexCall &Call,
1211
+ unsigned CloneNo) const {
1212
+ auto VI = FSToVIMap.find (Func);
1213
+ assert (VI != FSToVIMap.end ());
1214
+ if (Call.is <AllocInfo *>())
1215
+ return (VI->second .name () + " -> alloc" ).str ();
1216
+ else {
1217
+ auto *Callsite = Call.dyn_cast <CallsiteInfo *>();
1218
+ return (VI->second .name () + " -> " +
1219
+ getMemProfFuncName (Callsite->Callee .name (),
1220
+ Callsite->Clones [CloneNo]))
1221
+ .str ();
1222
+ }
1223
+ }
1224
+
1129
1225
std::vector<uint64_t >
1130
1226
ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall (
1131
1227
Instruction *Call) {
@@ -1135,6 +1231,16 @@ ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1135
1231
CallsiteContext);
1136
1232
}
1137
1233
1234
+ std::vector<uint64_t >
1235
+ IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall (IndexCall &Call) {
1236
+ assert (Call.is <CallsiteInfo *>());
1237
+ CallStack<CallsiteInfo, SmallVector<unsigned >::const_iterator>
1238
+ CallsiteContext (Call.dyn_cast <CallsiteInfo *>());
1239
+ return getStackIdsWithContextNodes<CallsiteInfo,
1240
+ SmallVector<unsigned >::const_iterator>(
1241
+ CallsiteContext);
1242
+ }
1243
+
1138
1244
template <typename DerivedCCG, typename FuncTy, typename CallTy>
1139
1245
template <class NodeT , class IteratorT >
1140
1246
std::vector<uint64_t >
@@ -1207,6 +1313,84 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) {
1207
1313
Call.call ()->setMetadata (LLVMContext::MD_callsite, nullptr );
1208
1314
}
1209
1315
1316
+ IndexCallsiteContextGraph::IndexCallsiteContextGraph (
1317
+ ModuleSummaryIndex &Index,
1318
+ function_ref<bool (GlobalValue::GUID, const GlobalValueSummary *)>
1319
+ isPrevailing)
1320
+ : Index(Index) {
1321
+ for (auto &I : Index) {
1322
+ auto VI = Index.getValueInfo (I);
1323
+ for (auto &S : VI.getSummaryList ()) {
1324
+ // We should only add the prevailing nodes. Otherwise we may try to clone
1325
+ // in a weak copy that won't be linked (and may be different than the
1326
+ // prevailing version).
1327
+ // We only keep the memprof summary on the prevailing copy now when
1328
+ // building the combined index, as a space optimization, however don't
1329
+ // rely on this optimization. The linker doesn't resolve local linkage
1330
+ // values so don't check whether those are prevailing.
1331
+ if (!GlobalValue::isLocalLinkage (S->linkage ()) &&
1332
+ !isPrevailing (VI.getGUID (), S.get ()))
1333
+ continue ;
1334
+ auto *FS = dyn_cast<FunctionSummary>(S.get ());
1335
+ if (!FS)
1336
+ continue ;
1337
+ std::vector<CallInfo> CallsWithMetadata;
1338
+ if (!FS->allocs ().empty ()) {
1339
+ for (auto &AN : FS->mutableAllocs ()) {
1340
+ // This can happen because of recursion elimination handling that
1341
+ // currently exists in ModuleSummaryAnalysis. Skip these for now.
1342
+ // We still added them to the summary because we need to be able to
1343
+ // correlate properly in applyImport in the backends.
1344
+ if (AN.MIBs .empty ())
1345
+ continue ;
1346
+ CallsWithMetadata.push_back ({&AN});
1347
+ auto *AllocNode = addAllocNode ({&AN}, FS);
1348
+ // Pass an empty CallStack to the CallsiteContext (second)
1349
+ // parameter, since for ThinLTO we already collapsed out the inlined
1350
+ // stack ids on the allocation call during ModuleSummaryAnalysis.
1351
+ CallStack<MIBInfo, SmallVector<unsigned >::const_iterator>
1352
+ EmptyContext;
1353
+ // Now add all of the MIBs and their stack nodes.
1354
+ for (auto &MIB : AN.MIBs ) {
1355
+ CallStack<MIBInfo, SmallVector<unsigned >::const_iterator>
1356
+ StackContext (&MIB);
1357
+ addStackNodesForMIB<MIBInfo, SmallVector<unsigned >::const_iterator>(
1358
+ AllocNode, StackContext, EmptyContext, MIB.AllocType );
1359
+ }
1360
+ assert (AllocNode->AllocTypes != (uint8_t )AllocationType::None);
1361
+ // Initialize version 0 on the summary alloc node to the current alloc
1362
+ // type, unless it has both types in which case make it default, so
1363
+ // that in the case where we aren't able to clone the original version
1364
+ // always ends up with the default allocation behavior.
1365
+ AN.Versions [0 ] = (uint8_t )allocTypeToUse (AllocNode->AllocTypes );
1366
+ }
1367
+ }
1368
+ // For callsite metadata, add to list for this function for later use.
1369
+ if (!FS->callsites ().empty ())
1370
+ for (auto &SN : FS->mutableCallsites ())
1371
+ CallsWithMetadata.push_back ({&SN});
1372
+
1373
+ if (!CallsWithMetadata.empty ())
1374
+ FuncToCallsWithMetadata.push_back ({FS, CallsWithMetadata});
1375
+
1376
+ if (!FS->allocs ().empty () || !FS->callsites ().empty ())
1377
+ FSToVIMap[FS] = VI;
1378
+ }
1379
+ }
1380
+
1381
+ if (DumpCCG) {
1382
+ dbgs () << " CCG before updating call stack chains:\n " ;
1383
+ dbgs () << *this ;
1384
+ }
1385
+
1386
+ if (ExportToDot)
1387
+ exportToDot (" prestackupdate" );
1388
+
1389
+ updateStackNodes ();
1390
+
1391
+ handleCallsitesWithMultipleTargets ();
1392
+ }
1393
+
1210
1394
template <typename DerivedCCG, typename FuncTy, typename CallTy>
1211
1395
void CallsiteContextGraph<DerivedCCG, FuncTy,
1212
1396
CallTy>::handleCallsitesWithMultipleTargets() {
@@ -1251,6 +1435,12 @@ uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1251
1435
return IdOrIndex;
1252
1436
}
1253
1437
1438
+ uint64_t IndexCallsiteContextGraph::getStackId (uint64_t IdOrIndex) const {
1439
+ // In the Index case this is an index into the stack id list in the summary
1440
+ // index, convert it to an Id.
1441
+ return Index.getStackIdAtIndex (IdOrIndex);
1442
+ }
1443
+
1254
1444
bool ModuleCallsiteContextGraph::calleeMatchesFunc (Instruction *Call,
1255
1445
const Function *Func) {
1256
1446
auto *CB = dyn_cast<CallBase>(Call);
@@ -1264,6 +1454,23 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
1264
1454
return Alias && Alias->getAliasee () == Func;
1265
1455
}
1266
1456
1457
+ bool IndexCallsiteContextGraph::calleeMatchesFunc (IndexCall &Call,
1458
+ const FunctionSummary *Func) {
1459
+ ValueInfo Callee = Call.dyn_cast <CallsiteInfo *>()->Callee ;
1460
+ // If there is no summary list then this is a call to an externally defined
1461
+ // symbol.
1462
+ AliasSummary *Alias =
1463
+ Callee.getSummaryList ().empty ()
1464
+ ? nullptr
1465
+ : dyn_cast<AliasSummary>(Callee.getSummaryList ()[0 ].get ());
1466
+ assert (FSToVIMap.count (Func));
1467
+ return Callee == FSToVIMap[Func] ||
1468
+ // If callee is an alias, check the aliasee, since only function
1469
+ // summary base objects will contain the stack node summaries and thus
1470
+ // get a context node.
1471
+ (Alias && Alias->getAliaseeVI () == FSToVIMap[Func]);
1472
+ }
1473
+
1267
1474
static std::string getAllocTypeString (uint8_t AllocTypes) {
1268
1475
if (!AllocTypes)
1269
1476
return " None" ;
@@ -1581,3 +1788,11 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
1581
1788
return PreservedAnalyses::all ();
1582
1789
return PreservedAnalyses::none ();
1583
1790
}
1791
+
1792
+ void MemProfContextDisambiguation::run (
1793
+ ModuleSummaryIndex &Index,
1794
+ function_ref<bool (GlobalValue::GUID, const GlobalValueSummary *)>
1795
+ isPrevailing) {
1796
+ IndexCallsiteContextGraph CCG (Index, isPrevailing);
1797
+ CCG.process ();
1798
+ }
0 commit comments