Skip to content

Commit fe27495

Browse files
committed
[MemProf] Context disambiguation cloning pass [patch 1b/3]
Adds support for building the graph in ThinLTO from MemProf summaries. Follow-on patches will contain the support for cloning on the graph and in the IR. Depends on D140908. Differential Revision: https://reviews.llvm.org/D145836
1 parent fb8d894 commit fe27495

File tree

10 files changed

+1599
-7
lines changed

10 files changed

+1599
-7
lines changed

llvm/include/llvm/IR/ModuleSummaryIndex.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,12 +988,22 @@ class FunctionSummary : public GlobalValueSummary {
988988
return {};
989989
}
990990

991+
CallsitesTy &mutableCallsites() {
992+
assert(Callsites);
993+
return *Callsites;
994+
}
995+
991996
ArrayRef<AllocInfo> allocs() const {
992997
if (Allocs)
993998
return *Allocs;
994999
return {};
9951000
}
9961001

1002+
AllocsTy &mutableAllocs() {
1003+
assert(Allocs);
1004+
return *Allocs;
1005+
}
1006+
9971007
friend struct GraphTraits<ValueInfo>;
9981008
};
9991009

llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919
#include "llvm/ADT/StringSet.h"
2020
#include "llvm/IR/GlobalValue.h"
2121
#include "llvm/IR/PassManager.h"
22+
#include <functional>
2223

2324
namespace llvm {
25+
class GlobalValueSummary;
2426
class Module;
27+
class ModuleSummaryIndex;
2528

2629
class MemProfContextDisambiguation
2730
: public PassInfoMixin<MemProfContextDisambiguation> {
@@ -32,6 +35,10 @@ class MemProfContextDisambiguation
3235
MemProfContextDisambiguation() {}
3336

3437
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
38+
39+
void run(ModuleSummaryIndex &Index,
40+
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
41+
isPrevailing);
3542
};
3643
} // end namespace llvm
3744

llvm/lib/LTO/LTO.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "llvm/Support/raw_ostream.h"
5252
#include "llvm/Target/TargetOptions.h"
5353
#include "llvm/Transforms/IPO.h"
54+
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
5455
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
5556
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
5657
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -75,6 +76,9 @@ cl::opt<bool> EnableLTOInternalization(
7576
cl::desc("Enable global value internalization in LTO"));
7677
}
7778

79+
/// Enable MemProf context disambiguation for thin link.
80+
extern cl::opt<bool> EnableMemProfContextDisambiguation;
81+
7882
// Computes a unique hash for the Module considering the current list of
7983
// export/import and other global analysis results.
8084
// The hash is produced in \p Key.
@@ -1539,6 +1543,14 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
15391543
runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
15401544
LocalWPDTargetsMap);
15411545

1546+
auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
1547+
return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
1548+
};
1549+
if (EnableMemProfContextDisambiguation) {
1550+
MemProfContextDisambiguation ContextDisambiguation;
1551+
ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing);
1552+
}
1553+
15421554
if (Conf.OptLevel > 0)
15431555
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
15441556
ImportLists, ExportLists);
@@ -1580,10 +1592,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
15801592
updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported,
15811593
LocalWPDTargetsMap);
15821594

1583-
auto isPrevailing = [&](GlobalValue::GUID GUID,
1584-
const GlobalValueSummary *S) {
1585-
return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
1586-
};
15871595
thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported,
15881596
isPrevailing);
15891597

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 218 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
// subsequently annotated with an attribute for later transformation.
1515
//
1616
// The transformations can be performed either directly on IR (regular LTO), or
17-
// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO
18-
// backend). Both types of LTO operate on a the same base graph representation,
19-
// which uses CRTP to support either IR or Index formats.
17+
// on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
18+
// Both types of LTO operate on a the same base graph representation, which
19+
// uses CRTP to support either IR or Index formats.
2020
//
2121
//===----------------------------------------------------------------------===//
2222

@@ -28,9 +28,11 @@
2828
#include "llvm/ADT/SmallSet.h"
2929
#include "llvm/ADT/SmallVector.h"
3030
#include "llvm/Analysis/MemoryProfileInfo.h"
31+
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
3132
#include "llvm/IR/Constants.h"
3233
#include "llvm/IR/Instructions.h"
3334
#include "llvm/IR/Module.h"
35+
#include "llvm/IR/ModuleSummaryIndex.h"
3436
#include "llvm/Pass.h"
3537
#include "llvm/Support/CommandLine.h"
3638
#include "llvm/Support/FileSystem.h"
@@ -458,6 +460,56 @@ class ModuleCallsiteContextGraph
458460
const Module &Mod;
459461
};
460462

463+
/// Represents a call in the summary index graph, which can either be an
464+
/// allocation or an interior callsite node in an allocation's context.
465+
/// Holds a pointer to the corresponding data structure in the index.
466+
struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
467+
IndexCall() : PointerUnion() {}
468+
IndexCall(std::nullptr_t) : IndexCall() {}
469+
IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
470+
IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
471+
472+
IndexCall *operator->() { return this; }
473+
474+
void print(raw_ostream &OS) const {
475+
if (auto *AI = dyn_cast<AllocInfo *>())
476+
OS << *AI;
477+
else {
478+
auto *CI = dyn_cast<CallsiteInfo *>();
479+
assert(CI);
480+
OS << *CI;
481+
}
482+
}
483+
};
484+
485+
/// CRTP derived class for graphs built from summary index (ThinLTO).
486+
class IndexCallsiteContextGraph
487+
: public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
488+
IndexCall> {
489+
public:
490+
IndexCallsiteContextGraph(
491+
ModuleSummaryIndex &Index,
492+
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
493+
isPrevailing);
494+
495+
private:
496+
friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
497+
IndexCall>;
498+
499+
uint64_t getStackId(uint64_t IdOrIndex) const;
500+
bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
501+
uint64_t getLastStackId(IndexCall &Call);
502+
std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
503+
std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
504+
unsigned CloneNo) const;
505+
506+
// Saves mapping from function summaries containing memprof records back to
507+
// its VI, for use in checking and debugging.
508+
std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
509+
510+
const ModuleSummaryIndex &Index;
511+
};
512+
461513
namespace {
462514

463515
struct FieldSeparator {
@@ -475,6 +527,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
475527
return OS << FS.Sep;
476528
}
477529

530+
// Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
531+
// type we should actually use on the corresponding allocation.
532+
// If we can't clone a node that has NotCold+Cold alloc type, we will fall
533+
// back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
534+
// from NotCold.
535+
AllocationType allocTypeToUse(uint8_t AllocTypes) {
536+
assert(AllocTypes != (uint8_t)AllocationType::None);
537+
if (AllocTypes ==
538+
((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
539+
return AllocationType::NotCold;
540+
else
541+
return (AllocationType)AllocTypes;
542+
}
543+
478544
} // end anonymous namespace
479545

480546
template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -1118,6 +1184,20 @@ uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
11181184
return CallsiteContext.back();
11191185
}
11201186

1187+
uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1188+
assert(Call.is<CallsiteInfo *>());
1189+
CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1190+
CallsiteContext(Call.dyn_cast<CallsiteInfo *>());
1191+
// Need to convert index into stack id.
1192+
return Index.getStackIdAtIndex(CallsiteContext.back());
1193+
}
1194+
1195+
static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
1196+
if (!CloneNo)
1197+
return Base.str();
1198+
return (Base + ".memprof." + Twine(CloneNo)).str();
1199+
}
1200+
11211201
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
11221202
const Instruction *Call,
11231203
unsigned CloneNo) const {
@@ -1126,6 +1206,22 @@ std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
11261206
.str();
11271207
}
11281208

1209+
std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
1210+
const IndexCall &Call,
1211+
unsigned CloneNo) const {
1212+
auto VI = FSToVIMap.find(Func);
1213+
assert(VI != FSToVIMap.end());
1214+
if (Call.is<AllocInfo *>())
1215+
return (VI->second.name() + " -> alloc").str();
1216+
else {
1217+
auto *Callsite = Call.dyn_cast<CallsiteInfo *>();
1218+
return (VI->second.name() + " -> " +
1219+
getMemProfFuncName(Callsite->Callee.name(),
1220+
Callsite->Clones[CloneNo]))
1221+
.str();
1222+
}
1223+
}
1224+
11291225
std::vector<uint64_t>
11301226
ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
11311227
Instruction *Call) {
@@ -1135,6 +1231,16 @@ ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
11351231
CallsiteContext);
11361232
}
11371233

1234+
std::vector<uint64_t>
1235+
IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1236+
assert(Call.is<CallsiteInfo *>());
1237+
CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1238+
CallsiteContext(Call.dyn_cast<CallsiteInfo *>());
1239+
return getStackIdsWithContextNodes<CallsiteInfo,
1240+
SmallVector<unsigned>::const_iterator>(
1241+
CallsiteContext);
1242+
}
1243+
11381244
template <typename DerivedCCG, typename FuncTy, typename CallTy>
11391245
template <class NodeT, class IteratorT>
11401246
std::vector<uint64_t>
@@ -1207,6 +1313,84 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) {
12071313
Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
12081314
}
12091315

1316+
IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1317+
ModuleSummaryIndex &Index,
1318+
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1319+
isPrevailing)
1320+
: Index(Index) {
1321+
for (auto &I : Index) {
1322+
auto VI = Index.getValueInfo(I);
1323+
for (auto &S : VI.getSummaryList()) {
1324+
// We should only add the prevailing nodes. Otherwise we may try to clone
1325+
// in a weak copy that won't be linked (and may be different than the
1326+
// prevailing version).
1327+
// We only keep the memprof summary on the prevailing copy now when
1328+
// building the combined index, as a space optimization, however don't
1329+
// rely on this optimization. The linker doesn't resolve local linkage
1330+
// values so don't check whether those are prevailing.
1331+
if (!GlobalValue::isLocalLinkage(S->linkage()) &&
1332+
!isPrevailing(VI.getGUID(), S.get()))
1333+
continue;
1334+
auto *FS = dyn_cast<FunctionSummary>(S.get());
1335+
if (!FS)
1336+
continue;
1337+
std::vector<CallInfo> CallsWithMetadata;
1338+
if (!FS->allocs().empty()) {
1339+
for (auto &AN : FS->mutableAllocs()) {
1340+
// This can happen because of recursion elimination handling that
1341+
// currently exists in ModuleSummaryAnalysis. Skip these for now.
1342+
// We still added them to the summary because we need to be able to
1343+
// correlate properly in applyImport in the backends.
1344+
if (AN.MIBs.empty())
1345+
continue;
1346+
CallsWithMetadata.push_back({&AN});
1347+
auto *AllocNode = addAllocNode({&AN}, FS);
1348+
// Pass an empty CallStack to the CallsiteContext (second)
1349+
// parameter, since for ThinLTO we already collapsed out the inlined
1350+
// stack ids on the allocation call during ModuleSummaryAnalysis.
1351+
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1352+
EmptyContext;
1353+
// Now add all of the MIBs and their stack nodes.
1354+
for (auto &MIB : AN.MIBs) {
1355+
CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1356+
StackContext(&MIB);
1357+
addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1358+
AllocNode, StackContext, EmptyContext, MIB.AllocType);
1359+
}
1360+
assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1361+
// Initialize version 0 on the summary alloc node to the current alloc
1362+
// type, unless it has both types in which case make it default, so
1363+
// that in the case where we aren't able to clone the original version
1364+
// always ends up with the default allocation behavior.
1365+
AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1366+
}
1367+
}
1368+
// For callsite metadata, add to list for this function for later use.
1369+
if (!FS->callsites().empty())
1370+
for (auto &SN : FS->mutableCallsites())
1371+
CallsWithMetadata.push_back({&SN});
1372+
1373+
if (!CallsWithMetadata.empty())
1374+
FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
1375+
1376+
if (!FS->allocs().empty() || !FS->callsites().empty())
1377+
FSToVIMap[FS] = VI;
1378+
}
1379+
}
1380+
1381+
if (DumpCCG) {
1382+
dbgs() << "CCG before updating call stack chains:\n";
1383+
dbgs() << *this;
1384+
}
1385+
1386+
if (ExportToDot)
1387+
exportToDot("prestackupdate");
1388+
1389+
updateStackNodes();
1390+
1391+
handleCallsitesWithMultipleTargets();
1392+
}
1393+
12101394
template <typename DerivedCCG, typename FuncTy, typename CallTy>
12111395
void CallsiteContextGraph<DerivedCCG, FuncTy,
12121396
CallTy>::handleCallsitesWithMultipleTargets() {
@@ -1251,6 +1435,12 @@ uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
12511435
return IdOrIndex;
12521436
}
12531437

1438+
uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1439+
// In the Index case this is an index into the stack id list in the summary
1440+
// index, convert it to an Id.
1441+
return Index.getStackIdAtIndex(IdOrIndex);
1442+
}
1443+
12541444
bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
12551445
const Function *Func) {
12561446
auto *CB = dyn_cast<CallBase>(Call);
@@ -1264,6 +1454,23 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
12641454
return Alias && Alias->getAliasee() == Func;
12651455
}
12661456

1457+
bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
1458+
const FunctionSummary *Func) {
1459+
ValueInfo Callee = Call.dyn_cast<CallsiteInfo *>()->Callee;
1460+
// If there is no summary list then this is a call to an externally defined
1461+
// symbol.
1462+
AliasSummary *Alias =
1463+
Callee.getSummaryList().empty()
1464+
? nullptr
1465+
: dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
1466+
assert(FSToVIMap.count(Func));
1467+
return Callee == FSToVIMap[Func] ||
1468+
// If callee is an alias, check the aliasee, since only function
1469+
// summary base objects will contain the stack node summaries and thus
1470+
// get a context node.
1471+
(Alias && Alias->getAliaseeVI() == FSToVIMap[Func]);
1472+
}
1473+
12671474
static std::string getAllocTypeString(uint8_t AllocTypes) {
12681475
if (!AllocTypes)
12691476
return "None";
@@ -1581,3 +1788,11 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
15811788
return PreservedAnalyses::all();
15821789
return PreservedAnalyses::none();
15831790
}
1791+
1792+
void MemProfContextDisambiguation::run(
1793+
ModuleSummaryIndex &Index,
1794+
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1795+
isPrevailing) {
1796+
IndexCallsiteContextGraph CCG(Index, isPrevailing);
1797+
CCG.process();
1798+
}

0 commit comments

Comments
 (0)