|
37 | 37 | #include "llvm/Support/Error.h"
|
38 | 38 | #include "llvm/Support/ErrorHandling.h"
|
39 | 39 | #include "llvm/Support/FileSystem.h"
|
| 40 | +#include "llvm/Support/JSON.h" |
40 | 41 | #include "llvm/Support/SourceMgr.h"
|
41 | 42 | #include "llvm/Support/raw_ostream.h"
|
42 | 43 | #include "llvm/Transforms/IPO/Internalize.h"
|
@@ -138,6 +139,29 @@ static cl::opt<bool>
|
138 | 139 | ImportAllIndex("import-all-index",
|
139 | 140 | cl::desc("Import all external functions in index."));
|
140 | 141 |
|
| 142 | +/// Pass a workload description file - an example of workload would be the |
| 143 | +/// functions executed to satisfy a RPC request. A workload is defined by a root |
| 144 | +/// function and the list of functions that are (frequently) needed to satisfy |
| 145 | +/// it. The module that defines the root will have all those functions imported. |
| 146 | +/// The file contains a JSON dictionary. The keys are root functions, the values |
| 147 | +/// are lists of functions to import in the module defining the root. It is |
| 148 | +/// assumed -funique-internal-linkage-names was used, thus ensuring function |
| 149 | +/// names are unique even for local linkage ones. |
| 150 | +static cl::opt<std::string> WorkloadDefinitions( |
| 151 | + "thinlto-workload-def", |
| 152 | + cl::desc("Pass a workload definition. This is a file containing a JSON " |
| 153 | + "dictionary. The keys are root functions, the values are lists of " |
| 154 | + "functions to import in the module defining the root. It is " |
| 155 | + "assumed -funique-internal-linkage-names was used, to ensure " |
| 156 | + "local linkage functions have unique names. For example: \n" |
| 157 | + "{\n" |
| 158 | + " \"rootFunction_1\": [\"function_to_import_1\", " |
| 159 | + "\"function_to_import_2\"], \n" |
| 160 | + " \"rootFunction_2\": [\"function_to_import_3\", " |
| 161 | + "\"function_to_import_4\"] \n" |
| 162 | + "}"), |
| 163 | + cl::Hidden); |
| 164 | + |
141 | 165 | // Load lazily a module from \p FileName in \p Context.
|
142 | 166 | static std::unique_ptr<Module> loadFile(const std::string &FileName,
|
143 | 167 | LLVMContext &Context) {
|
@@ -369,29 +393,264 @@ class GlobalsImporter final {
|
369 | 393 | }
|
370 | 394 | };
|
371 | 395 |
|
| 396 | +static const char *getFailureName(FunctionImporter::ImportFailureReason Reason); |
| 397 | + |
372 | 398 | /// Determine the list of imports and exports for each module.
|
373 |
| -class ModuleImportsManager final { |
| 399 | +class ModuleImportsManager { |
| 400 | +protected: |
374 | 401 | function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
|
375 | 402 | IsPrevailing;
|
376 | 403 | const ModuleSummaryIndex &Index;
|
377 | 404 | DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
|
378 | 405 |
|
379 |
| -public: |
380 | 406 | ModuleImportsManager(
|
381 | 407 | function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
|
382 | 408 | IsPrevailing,
|
383 | 409 | const ModuleSummaryIndex &Index,
|
384 | 410 | DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
|
385 | 411 | : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
|
386 | 412 |
|
| 413 | +public: |
| 414 | + virtual ~ModuleImportsManager() = default; |
| 415 | + |
387 | 416 | /// Given the list of globals defined in a module, compute the list of imports
|
388 | 417 | /// as well as the list of "exports", i.e. the list of symbols referenced from
|
389 | 418 | /// another module (that may require promotion).
|
390 |
| - void computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, |
391 |
| - StringRef ModName, |
392 |
| - FunctionImporter::ImportMapTy &ImportList); |
| 419 | + virtual void |
| 420 | + computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, |
| 421 | + StringRef ModName, |
| 422 | + FunctionImporter::ImportMapTy &ImportList); |
| 423 | + |
| 424 | + static std::unique_ptr<ModuleImportsManager> |
| 425 | + create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| 426 | + IsPrevailing, |
| 427 | + const ModuleSummaryIndex &Index, |
| 428 | + DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = |
| 429 | + nullptr); |
| 430 | +}; |
| 431 | + |
| 432 | +/// A ModuleImportsManager that operates based on a workload definition (see |
| 433 | +/// -thinlto-workload-def). For modules that do not define workload roots, it |
| 434 | +/// applies the base ModuleImportsManager import policy. |
| 435 | +class WorkloadImportsManager : public ModuleImportsManager { |
| 436 | + // Keep a module name -> value infos to import association. We use it to |
| 437 | + // determine if a module's import list should be done by the base |
| 438 | + // ModuleImportsManager or by us. |
| 439 | + StringMap<DenseSet<ValueInfo>> Workloads; |
| 440 | + |
| 441 | + void |
| 442 | + computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries, |
| 443 | + StringRef ModName, |
| 444 | + FunctionImporter::ImportMapTy &ImportList) override { |
| 445 | + auto SetIter = Workloads.find(ModName); |
| 446 | + if (SetIter == Workloads.end()) { |
| 447 | + LLVM_DEBUG(dbgs() << "[Workload] " << ModName |
| 448 | + << " does not contain the root of any context.\n"); |
| 449 | + return ModuleImportsManager::computeImportForModule(DefinedGVSummaries, |
| 450 | + ModName, ImportList); |
| 451 | + } |
| 452 | + LLVM_DEBUG(dbgs() << "[Workload] " << ModName |
| 453 | + << " contains the root(s) of context(s).\n"); |
| 454 | + |
| 455 | + GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList, |
| 456 | + ExportLists); |
| 457 | + auto &ValueInfos = SetIter->second; |
| 458 | + SmallVector<EdgeInfo, 128> GlobWorklist; |
| 459 | + for (auto &VI : llvm::make_early_inc_range(ValueInfos)) { |
| 460 | + auto It = DefinedGVSummaries.find(VI.getGUID()); |
| 461 | + if (It != DefinedGVSummaries.end() && |
| 462 | + IsPrevailing(VI.getGUID(), It->second)) { |
| 463 | + LLVM_DEBUG( |
| 464 | + dbgs() << "[Workload] " << VI.name() |
| 465 | + << " has the prevailing variant already in the module " |
| 466 | + << ModName << ". No need to import\n"); |
| 467 | + continue; |
| 468 | + } |
| 469 | + auto Candidates = |
| 470 | + qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName); |
| 471 | + |
| 472 | + const GlobalValueSummary *GVS = nullptr; |
| 473 | + auto PotentialCandidates = llvm::map_range( |
| 474 | + llvm::make_filter_range( |
| 475 | + Candidates, |
| 476 | + [&](const auto &Candidate) { |
| 477 | + LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name() |
| 478 | + << " from " << Candidate.second->modulePath() |
| 479 | + << " ImportFailureReason: " |
| 480 | + << getFailureName(Candidate.first) << "\n"); |
| 481 | + return Candidate.first == |
| 482 | + FunctionImporter::ImportFailureReason::None; |
| 483 | + }), |
| 484 | + [](const auto &Candidate) { return Candidate.second; }); |
| 485 | + if (PotentialCandidates.empty()) { |
| 486 | + LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name() |
| 487 | + << " because can't find eligible Callee. Guid is: " |
| 488 | + << Function::getGUID(VI.name()) << "\n"); |
| 489 | + continue; |
| 490 | + } |
| 491 | + /// We will prefer importing the prevailing candidate, if not, we'll |
| 492 | + /// still pick the first available candidate. The reason we want to make |
| 493 | + /// sure we do import the prevailing candidate is because the goal of |
| 494 | + /// workload-awareness is to enable optimizations specializing the call |
| 495 | + /// graph of that workload. Suppose a function is already defined in the |
| 496 | + /// module, but it's not the prevailing variant. Suppose also we do not |
| 497 | + /// inline it (in fact, if it were interposable, we can't inline it), |
| 498 | + /// but we could specialize it to the workload in other ways. However, |
| 499 | + /// the linker would drop it in the favor of the prevailing copy. |
| 500 | + /// Instead, by importing the prevailing variant (assuming also the use |
| 501 | + /// of `-avail-extern-to-local`), we keep the specialization. We could |
| 502 | + /// alteranatively make the non-prevailing variant local, but the |
| 503 | + /// prevailing one is also the one for which we would have previously |
| 504 | + /// collected profiles, making it preferrable. |
| 505 | + auto PrevailingCandidates = llvm::make_filter_range( |
| 506 | + PotentialCandidates, [&](const auto *Candidate) { |
| 507 | + return IsPrevailing(VI.getGUID(), Candidate); |
| 508 | + }); |
| 509 | + if (PrevailingCandidates.empty()) { |
| 510 | + GVS = *PotentialCandidates.begin(); |
| 511 | + if (!llvm::hasSingleElement(PotentialCandidates) && |
| 512 | + GlobalValue::isLocalLinkage(GVS->linkage())) |
| 513 | + LLVM_DEBUG( |
| 514 | + dbgs() |
| 515 | + << "[Workload] Found multiple non-prevailing candidates for " |
| 516 | + << VI.name() |
| 517 | + << ". This is unexpected. Are module paths passed to the " |
| 518 | + "compiler unique for the modules passed to the linker?"); |
| 519 | + // We could in theory have multiple (interposable) copies of a symbol |
| 520 | + // when there is no prevailing candidate, if say the prevailing copy was |
| 521 | + // in a native object being linked in. However, we should in theory be |
| 522 | + // marking all of these non-prevailing IR copies dead in that case, in |
| 523 | + // which case they won't be candidates. |
| 524 | + assert(GVS->isLive()); |
| 525 | + } else { |
| 526 | + assert(llvm::hasSingleElement(PrevailingCandidates)); |
| 527 | + GVS = *PrevailingCandidates.begin(); |
| 528 | + } |
| 529 | + |
| 530 | + auto ExportingModule = GVS->modulePath(); |
| 531 | + // We checked that for the prevailing case, but if we happen to have for |
| 532 | + // example an internal that's defined in this module, it'd have no |
| 533 | + // PrevailingCandidates. |
| 534 | + if (ExportingModule == ModName) { |
| 535 | + LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name() |
| 536 | + << " because its defining module is the same as the " |
| 537 | + "current module\n"); |
| 538 | + continue; |
| 539 | + } |
| 540 | + LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from " |
| 541 | + << ExportingModule << " : " |
| 542 | + << Function::getGUID(VI.name()) << "\n"); |
| 543 | + ImportList[ExportingModule].insert(VI.getGUID()); |
| 544 | + GVI.onImportingSummary(*GVS); |
| 545 | + if (ExportLists) |
| 546 | + (*ExportLists)[ExportingModule].insert(VI); |
| 547 | + } |
| 548 | + LLVM_DEBUG(dbgs() << "[Workload] Done\n"); |
| 549 | + } |
| 550 | + |
| 551 | +public: |
| 552 | + WorkloadImportsManager( |
| 553 | + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| 554 | + IsPrevailing, |
| 555 | + const ModuleSummaryIndex &Index, |
| 556 | + DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) |
| 557 | + : ModuleImportsManager(IsPrevailing, Index, ExportLists) { |
| 558 | + // Since the workload def uses names, we need a quick lookup |
| 559 | + // name->ValueInfo. |
| 560 | + StringMap<ValueInfo> NameToValueInfo; |
| 561 | + StringSet<> AmbiguousNames; |
| 562 | + for (auto &I : Index) { |
| 563 | + ValueInfo VI = Index.getValueInfo(I); |
| 564 | + if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second) |
| 565 | + LLVM_DEBUG(AmbiguousNames.insert(VI.name())); |
| 566 | + } |
| 567 | + auto DbgReportIfAmbiguous = [&](StringRef Name) { |
| 568 | + LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) { |
| 569 | + dbgs() << "[Workload] Function name " << Name |
| 570 | + << " present in the workload definition is ambiguous. Consider " |
| 571 | + "compiling with -funique-internal-linkage-names."; |
| 572 | + }); |
| 573 | + }; |
| 574 | + std::error_code EC; |
| 575 | + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions); |
| 576 | + if (std::error_code EC = BufferOrErr.getError()) { |
| 577 | + report_fatal_error("Failed to open context file"); |
| 578 | + return; |
| 579 | + } |
| 580 | + auto Buffer = std::move(BufferOrErr.get()); |
| 581 | + std::map<std::string, std::vector<std::string>> WorkloadDefs; |
| 582 | + json::Path::Root NullRoot; |
| 583 | + // The JSON is supposed to contain a dictionary matching the type of |
| 584 | + // WorkloadDefs. For example: |
| 585 | + // { |
| 586 | + // "rootFunction_1": ["function_to_import_1", "function_to_import_2"], |
| 587 | + // "rootFunction_2": ["function_to_import_3", "function_to_import_4"] |
| 588 | + // } |
| 589 | + auto Parsed = json::parse(Buffer->getBuffer()); |
| 590 | + if (!Parsed) |
| 591 | + report_fatal_error(Parsed.takeError()); |
| 592 | + if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot)) |
| 593 | + report_fatal_error("Invalid thinlto contextual profile format."); |
| 594 | + for (const auto &Workload : WorkloadDefs) { |
| 595 | + const auto &Root = Workload.first; |
| 596 | + DbgReportIfAmbiguous(Root); |
| 597 | + LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n"); |
| 598 | + const auto &AllCallees = Workload.second; |
| 599 | + auto RootIt = NameToValueInfo.find(Root); |
| 600 | + if (RootIt == NameToValueInfo.end()) { |
| 601 | + LLVM_DEBUG(dbgs() << "[Workload] Root " << Root |
| 602 | + << " not found in this linkage unit.\n"); |
| 603 | + continue; |
| 604 | + } |
| 605 | + auto RootVI = RootIt->second; |
| 606 | + if (RootVI.getSummaryList().size() != 1) { |
| 607 | + LLVM_DEBUG(dbgs() << "[Workload] Root " << Root |
| 608 | + << " should have exactly one summary, but has " |
| 609 | + << RootVI.getSummaryList().size() << ". Skipping.\n"); |
| 610 | + continue; |
| 611 | + } |
| 612 | + StringRef RootDefiningModule = |
| 613 | + RootVI.getSummaryList().front()->modulePath(); |
| 614 | + LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root |
| 615 | + << " is : " << RootDefiningModule << "\n"); |
| 616 | + auto &Set = Workloads[RootDefiningModule]; |
| 617 | + for (const auto &Callee : AllCallees) { |
| 618 | + LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n"); |
| 619 | + DbgReportIfAmbiguous(Callee); |
| 620 | + auto ElemIt = NameToValueInfo.find(Callee); |
| 621 | + if (ElemIt == NameToValueInfo.end()) { |
| 622 | + LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n"); |
| 623 | + continue; |
| 624 | + } |
| 625 | + Set.insert(ElemIt->second); |
| 626 | + } |
| 627 | + LLVM_DEBUG({ |
| 628 | + dbgs() << "[Workload] Root: " << Root << " we have " << Set.size() |
| 629 | + << " distinct callees.\n"; |
| 630 | + for (const auto &VI : Set) { |
| 631 | + dbgs() << "[Workload] Root: " << Root |
| 632 | + << " Would include: " << VI.getGUID() << "\n"; |
| 633 | + } |
| 634 | + }); |
| 635 | + } |
| 636 | + } |
393 | 637 | };
|
394 | 638 |
|
| 639 | +std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create( |
| 640 | + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| 641 | + IsPrevailing, |
| 642 | + const ModuleSummaryIndex &Index, |
| 643 | + DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) { |
| 644 | + if (WorkloadDefinitions.empty()) { |
| 645 | + LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n"); |
| 646 | + return std::unique_ptr<ModuleImportsManager>( |
| 647 | + new ModuleImportsManager(IsPrevailing, Index, ExportLists)); |
| 648 | + } |
| 649 | + LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n"); |
| 650 | + return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index, |
| 651 | + ExportLists); |
| 652 | +} |
| 653 | + |
395 | 654 | static const char *
|
396 | 655 | getFailureName(FunctionImporter::ImportFailureReason Reason) {
|
397 | 656 | switch (Reason) {
|
@@ -732,14 +991,14 @@ void llvm::ComputeCrossModuleImport(
|
732 | 991 | isPrevailing,
|
733 | 992 | DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
|
734 | 993 | DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
|
735 |
| - ModuleImportsManager MIS(isPrevailing, Index, &ExportLists); |
| 994 | + auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists); |
736 | 995 | // For each module that has function defined, compute the import/export lists.
|
737 | 996 | for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
|
738 | 997 | auto &ImportList = ImportLists[DefinedGVSummaries.first];
|
739 | 998 | LLVM_DEBUG(dbgs() << "Computing import for Module '"
|
740 | 999 | << DefinedGVSummaries.first << "'\n");
|
741 |
| - MIS.computeImportForModule(DefinedGVSummaries.second, |
742 |
| - DefinedGVSummaries.first, ImportList); |
| 1000 | + MIS->computeImportForModule(DefinedGVSummaries.second, |
| 1001 | + DefinedGVSummaries.first, ImportList); |
743 | 1002 | }
|
744 | 1003 |
|
745 | 1004 | // When computing imports we only added the variables and functions being
|
@@ -855,8 +1114,8 @@ static void ComputeCrossModuleImportForModuleForTest(
|
855 | 1114 |
|
856 | 1115 | // Compute the import list for this module.
|
857 | 1116 | LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
|
858 |
| - ModuleImportsManager MIS(isPrevailing, Index); |
859 |
| - MIS.computeImportForModule(FunctionSummaryMap, ModulePath, ImportList); |
| 1117 | + auto MIS = ModuleImportsManager::create(isPrevailing, Index); |
| 1118 | + MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList); |
860 | 1119 |
|
861 | 1120 | #ifndef NDEBUG
|
862 | 1121 | dumpImportListForModule(Index, ModulePath, ImportList);
|
|
0 commit comments