-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ThinLTO] Allow importing based on a workload definition #74545
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
7e256dc
[ThinLTO] Allow importing based on a workload definition
mtrofin 37bc16a
Fix test - wasn't `mkdir`-ing the second dir.
mtrofin f71bf9a
actually fix dir creation.
mtrofin 72cf3dc
addressed feedback in FunctionImport.cpp
mtrofin d090aa1
split tests, handling aliases
mtrofin 569ed19
Handling interposable and non-interposable the same way.
mtrofin 483e157
some formatting
mtrofin 54cc882
fix the test
mtrofin 71b5305
comment giving example of json content
mtrofin e428d7b
feedback
mtrofin df23812
asserts
mtrofin d017457
error checking
mtrofin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,7 @@ | |
#include "llvm/Support/Error.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include "llvm/Support/FileSystem.h" | ||
#include "llvm/Support/JSON.h" | ||
#include "llvm/Support/SourceMgr.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Transforms/IPO/Internalize.h" | ||
|
@@ -138,6 +139,29 @@ static cl::opt<bool> | |
ImportAllIndex("import-all-index", | ||
cl::desc("Import all external functions in index.")); | ||
|
||
/// Pass a workload description file - an example of workload would be the | ||
/// functions executed to satisfy a RPC request. A workload is defined by a root | ||
/// function and the list of functions that are (frequently) needed to satisfy | ||
/// it. The module that defines the root will have all those functions imported. | ||
/// The file contains a JSON dictionary. The keys are root functions, the values | ||
/// are lists of functions to import in the module defining the root. It is | ||
/// assumed -funique-internal-linkage-names was used, thus ensuring function | ||
/// names are unique even for local linkage ones. | ||
static cl::opt<std::string> WorkloadDefinitions( | ||
"thinlto-workload-def", | ||
cl::desc("Pass a workload definition. This is a file containing a JSON " | ||
"dictionary. The keys are root functions, the values are lists of " | ||
"functions to import in the module defining the root. It is " | ||
"assumed -funique-internal-linkage-names was used, to ensure " | ||
"local linkage functions have unique names. For example: \n" | ||
"{\n" | ||
" \"rootFunction_1\": [\"function_to_import_1\", " | ||
"\"function_to_import_2\"], \n" | ||
" \"rootFunction_2\": [\"function_to_import_3\", " | ||
"\"function_to_import_4\"] \n" | ||
"}"), | ||
cl::Hidden); | ||
|
||
// Load lazily a module from \p FileName in \p Context. | ||
static std::unique_ptr<Module> loadFile(const std::string &FileName, | ||
LLVMContext &Context) { | ||
|
@@ -369,29 +393,264 @@ class GlobalsImporter final { | |
} | ||
}; | ||
|
||
static const char *getFailureName(FunctionImporter::ImportFailureReason Reason); | ||
|
||
/// Determine the list of imports and exports for each module. | ||
class ModuleImportsManager final { | ||
class ModuleImportsManager { | ||
protected: | ||
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> | ||
IsPrevailing; | ||
const ModuleSummaryIndex &Index; | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists; | ||
|
||
public: | ||
ModuleImportsManager( | ||
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> | ||
IsPrevailing, | ||
const ModuleSummaryIndex &Index, | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr) | ||
: IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {} | ||
|
||
public: | ||
virtual ~ModuleImportsManager() = default; | ||
|
||
/// Given the list of globals defined in a module, compute the list of imports | ||
/// as well as the list of "exports", i.e. the list of symbols referenced from | ||
/// another module (that may require promotion). | ||
void computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, | ||
StringRef ModName, | ||
FunctionImporter::ImportMapTy &ImportList); | ||
virtual void | ||
computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, | ||
StringRef ModName, | ||
FunctionImporter::ImportMapTy &ImportList); | ||
|
||
static std::unique_ptr<ModuleImportsManager> | ||
create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> | ||
IsPrevailing, | ||
const ModuleSummaryIndex &Index, | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = | ||
nullptr); | ||
}; | ||
|
||
/// A ModuleImportsManager that operates based on a workload definition (see | ||
/// -thinlto-workload-def). For modules that do not define workload roots, it | ||
/// applies the base ModuleImportsManager import policy. | ||
class WorkloadImportsManager : public ModuleImportsManager { | ||
// Keep a module name -> value infos to import association. We use it to | ||
// determine if a module's import list should be done by the base | ||
// ModuleImportsManager or by us. | ||
StringMap<DenseSet<ValueInfo>> Workloads; | ||
|
||
void | ||
computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, | ||
StringRef ModName, | ||
FunctionImporter::ImportMapTy &ImportList) override { | ||
auto SetIter = Workloads.find(ModName); | ||
if (SetIter == Workloads.end()) { | ||
LLVM_DEBUG(dbgs() << "[Workload] " << ModName | ||
<< " does not contain the root of any context.\n"); | ||
return ModuleImportsManager::computeImportForModule(DefinedGVSummaries, | ||
ModName, ImportList); | ||
} | ||
LLVM_DEBUG(dbgs() << "[Workload] " << ModName | ||
<< " contains the root(s) of context(s).\n"); | ||
|
||
GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList, | ||
ExportLists); | ||
auto &ValueInfos = SetIter->second; | ||
SmallVector<EdgeInfo, 128> GlobWorklist; | ||
for (auto &VI : llvm::make_early_inc_range(ValueInfos)) { | ||
auto It = DefinedGVSummaries.find(VI.getGUID()); | ||
if (It != DefinedGVSummaries.end() && | ||
IsPrevailing(VI.getGUID(), It->second)) { | ||
LLVM_DEBUG( | ||
dbgs() << "[Workload] " << VI.name() | ||
<< " has the prevailing variant already in the module " | ||
<< ModName << ". No need to import\n"); | ||
continue; | ||
} | ||
auto Candidates = | ||
qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName); | ||
|
||
const GlobalValueSummary *GVS = nullptr; | ||
auto PotentialCandidates = llvm::map_range( | ||
llvm::make_filter_range( | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Candidates, | ||
[&](const auto &Candidate) { | ||
LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name() | ||
<< " from " << Candidate.second->modulePath() | ||
<< " ImportFailureReason: " | ||
<< getFailureName(Candidate.first) << "\n"); | ||
return Candidate.first == | ||
FunctionImporter::ImportFailureReason::None; | ||
}), | ||
[](const auto &Candidate) { return Candidate.second; }); | ||
if (PotentialCandidates.empty()) { | ||
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name() | ||
<< " because can't find eligible Callee. Guid is: " | ||
<< Function::getGUID(VI.name()) << "\n"); | ||
continue; | ||
} | ||
/// We will prefer importing the prevailing candidate, if not, we'll | ||
/// still pick the first available candidate. The reason we want to make | ||
/// sure we do import the prevailing candidate is because the goal of | ||
/// workload-awareness is to enable optimizations specializing the call | ||
/// graph of that workload. Suppose a function is already defined in the | ||
/// module, but it's not the prevailing variant. Suppose also we do not | ||
/// inline it (in fact, if it were interposable, we can't inline it), | ||
/// but we could specialize it to the workload in other ways. However, | ||
/// the linker would drop it in the favor of the prevailing copy. | ||
/// Instead, by importing the prevailing variant (assuming also the use | ||
/// of `-avail-extern-to-local`), we keep the specialization. We could | ||
/// alteranatively make the non-prevailing variant local, but the | ||
/// prevailing one is also the one for which we would have previously | ||
/// collected profiles, making it preferrable. | ||
auto PrevailingCandidates = llvm::make_filter_range( | ||
PotentialCandidates, [&](const auto *Candidate) { | ||
return IsPrevailing(VI.getGUID(), Candidate); | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}); | ||
if (PrevailingCandidates.empty()) { | ||
GVS = *PotentialCandidates.begin(); | ||
if (!llvm::hasSingleElement(PotentialCandidates) && | ||
GlobalValue::isLocalLinkage(GVS->linkage())) | ||
LLVM_DEBUG( | ||
dbgs() | ||
<< "[Workload] Found multiple non-prevailing candidates for " | ||
<< VI.name() | ||
<< ". This is unexpected. Are module paths passed to the " | ||
"compiler unique for the modules passed to the linker?"); | ||
// We could in theory have multiple (interposable) copies of a symbol | ||
// when there is no prevailing candidate, if say the prevailing copy was | ||
// in a native object being linked in. However, we should in theory be | ||
// marking all of these non-prevailing IR copies dead in that case, in | ||
// which case they won't be candidates. | ||
assert(GVS->isLive()); | ||
} else { | ||
assert(llvm::hasSingleElement(PrevailingCandidates)); | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
GVS = *PrevailingCandidates.begin(); | ||
} | ||
|
||
auto ExportingModule = GVS->modulePath(); | ||
// We checked that for the prevailing case, but if we happen to have for | ||
// example an internal that's defined in this module, it'd have no | ||
// PrevailingCandidates. | ||
if (ExportingModule == ModName) { | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name() | ||
<< " because its defining module is the same as the " | ||
"current module\n"); | ||
continue; | ||
} | ||
LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from " | ||
<< ExportingModule << " : " | ||
<< Function::getGUID(VI.name()) << "\n"); | ||
ImportList[ExportingModule].insert(VI.getGUID()); | ||
GVI.onImportingSummary(*GVS); | ||
if (ExportLists) | ||
(*ExportLists)[ExportingModule].insert(VI); | ||
} | ||
LLVM_DEBUG(dbgs() << "[Workload] Done\n"); | ||
} | ||
|
||
public: | ||
WorkloadImportsManager( | ||
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> | ||
IsPrevailing, | ||
const ModuleSummaryIndex &Index, | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) | ||
: ModuleImportsManager(IsPrevailing, Index, ExportLists) { | ||
// Since the workload def uses names, we need a quick lookup | ||
// name->ValueInfo. | ||
StringMap<ValueInfo> NameToValueInfo; | ||
StringSet<> AmbiguousNames; | ||
for (auto &I : Index) { | ||
ValueInfo VI = Index.getValueInfo(I); | ||
if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second) | ||
LLVM_DEBUG(AmbiguousNames.insert(VI.name())); | ||
} | ||
auto DbgReportIfAmbiguous = [&](StringRef Name) { | ||
LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) { | ||
dbgs() << "[Workload] Function name " << Name | ||
<< " present in the workload definition is ambiguous. Consider " | ||
"compiling with -funique-internal-linkage-names."; | ||
}); | ||
}; | ||
std::error_code EC; | ||
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions); | ||
if (std::error_code EC = BufferOrErr.getError()) { | ||
report_fatal_error("Failed to open context file"); | ||
return; | ||
} | ||
auto Buffer = std::move(BufferOrErr.get()); | ||
std::map<std::string, std::vector<std::string>> WorkloadDefs; | ||
json::Path::Root NullRoot; | ||
// The JSON is supposed to contain a dictionary matching the type of | ||
// WorkloadDefs. For example: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest moving this example to the option definition. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
// { | ||
// "rootFunction_1": ["function_to_import_1", "function_to_import_2"], | ||
// "rootFunction_2": ["function_to_import_3", "function_to_import_4"] | ||
// } | ||
auto Parsed = json::parse(Buffer->getBuffer()); | ||
mtrofin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (!Parsed) | ||
report_fatal_error(Parsed.takeError()); | ||
if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot)) | ||
report_fatal_error("Invalid thinlto contextual profile format."); | ||
for (const auto &Workload : WorkloadDefs) { | ||
const auto &Root = Workload.first; | ||
DbgReportIfAmbiguous(Root); | ||
LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n"); | ||
const auto &AllCallees = Workload.second; | ||
auto RootIt = NameToValueInfo.find(Root); | ||
if (RootIt == NameToValueInfo.end()) { | ||
LLVM_DEBUG(dbgs() << "[Workload] Root " << Root | ||
<< " not found in this linkage unit.\n"); | ||
continue; | ||
} | ||
auto RootVI = RootIt->second; | ||
if (RootVI.getSummaryList().size() != 1) { | ||
LLVM_DEBUG(dbgs() << "[Workload] Root " << Root | ||
<< " should have exactly one summary, but has " | ||
<< RootVI.getSummaryList().size() << ". Skipping.\n"); | ||
continue; | ||
} | ||
StringRef RootDefiningModule = | ||
RootVI.getSummaryList().front()->modulePath(); | ||
LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root | ||
<< " is : " << RootDefiningModule << "\n"); | ||
auto &Set = Workloads[RootDefiningModule]; | ||
for (const auto &Callee : AllCallees) { | ||
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n"); | ||
DbgReportIfAmbiguous(Callee); | ||
auto ElemIt = NameToValueInfo.find(Callee); | ||
if (ElemIt == NameToValueInfo.end()) { | ||
LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n"); | ||
continue; | ||
} | ||
Set.insert(ElemIt->second); | ||
} | ||
LLVM_DEBUG({ | ||
dbgs() << "[Workload] Root: " << Root << " we have " << Set.size() | ||
<< " distinct callees.\n"; | ||
for (const auto &VI : Set) { | ||
dbgs() << "[Workload] Root: " << Root | ||
<< " Would include: " << VI.getGUID() << "\n"; | ||
} | ||
}); | ||
} | ||
} | ||
}; | ||
|
||
std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create( | ||
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> | ||
IsPrevailing, | ||
const ModuleSummaryIndex &Index, | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) { | ||
if (WorkloadDefinitions.empty()) { | ||
LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n"); | ||
return std::unique_ptr<ModuleImportsManager>( | ||
new ModuleImportsManager(IsPrevailing, Index, ExportLists)); | ||
} | ||
LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n"); | ||
return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index, | ||
ExportLists); | ||
} | ||
|
||
static const char * | ||
getFailureName(FunctionImporter::ImportFailureReason Reason) { | ||
switch (Reason) { | ||
|
@@ -732,14 +991,14 @@ void llvm::ComputeCrossModuleImport( | |
isPrevailing, | ||
DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists, | ||
DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) { | ||
ModuleImportsManager MIS(isPrevailing, Index, &ExportLists); | ||
auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists); | ||
// For each module that has function defined, compute the import/export lists. | ||
for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { | ||
auto &ImportList = ImportLists[DefinedGVSummaries.first]; | ||
LLVM_DEBUG(dbgs() << "Computing import for Module '" | ||
<< DefinedGVSummaries.first << "'\n"); | ||
MIS.computeImportForModule(DefinedGVSummaries.second, | ||
DefinedGVSummaries.first, ImportList); | ||
MIS->computeImportForModule(DefinedGVSummaries.second, | ||
DefinedGVSummaries.first, ImportList); | ||
} | ||
|
||
// When computing imports we only added the variables and functions being | ||
|
@@ -855,8 +1114,8 @@ static void ComputeCrossModuleImportForModuleForTest( | |
|
||
// Compute the import list for this module. | ||
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n"); | ||
ModuleImportsManager MIS(isPrevailing, Index); | ||
MIS.computeImportForModule(FunctionSummaryMap, ModulePath, ImportList); | ||
auto MIS = ModuleImportsManager::create(isPrevailing, Index); | ||
MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList); | ||
|
||
#ifndef NDEBUG | ||
dumpImportListForModule(Index, ModulePath, ImportList); | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.