|
21 | 21 | #include "llvm/Analysis/TargetTransformInfo.h"
|
22 | 22 | #include "llvm/Bitcode/BitcodeReader.h"
|
23 | 23 | #include "llvm/Bitcode/BitcodeWriter.h"
|
| 24 | +#include "llvm/CGData/CodeGenData.h" |
24 | 25 | #include "llvm/CodeGen/Analysis.h"
|
25 | 26 | #include "llvm/Config/llvm-config.h"
|
26 | 27 | #include "llvm/IR/AutoUpgrade.h"
|
@@ -70,6 +71,8 @@ static cl::opt<bool>
|
70 | 71 | DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
|
71 | 72 | cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
|
72 | 73 |
|
| 74 | +extern cl::opt<bool> CodeGenDataThinLTOTwoRounds; |
| 75 | + |
73 | 76 | namespace llvm {
|
74 | 77 | /// Enable global value internalization in LTO.
|
75 | 78 | cl::opt<bool> EnableLTOInternalization(
|
@@ -1424,7 +1427,7 @@ class InProcessThinBackend : public ThinBackendProc {
|
1424 | 1427 | GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
|
1425 | 1428 | }
|
1426 | 1429 |
|
1427 |
| - Error runThinLTOBackendThread( |
| 1430 | + virtual Error runThinLTOBackendThread( |
1428 | 1431 | AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
|
1429 | 1432 | ModuleSummaryIndex &CombinedIndex,
|
1430 | 1433 | const FunctionImporter::ImportMapTy &ImportList,
|
@@ -1513,6 +1516,60 @@ class InProcessThinBackend : public ThinBackendProc {
|
1513 | 1516 | return Error::success();
|
1514 | 1517 | }
|
1515 | 1518 | };
|
| 1519 | + |
| 1520 | +/// This Backend will run ThinBackend process but throw away all the output from |
| 1521 | +/// the codegen. This class facilitates the first codegen round. |
| 1522 | +class NoOutputThinBackend : public InProcessThinBackend { |
| 1523 | +public: |
| 1524 | + NoOutputThinBackend( |
| 1525 | + const Config &Conf, ModuleSummaryIndex &CombinedIndex, |
| 1526 | + ThreadPoolStrategy ThinLTOParallelism, |
| 1527 | + const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, |
| 1528 | + std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch) |
| 1529 | + : InProcessThinBackend( |
| 1530 | + Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries, |
| 1531 | + // Allocate a scratch buffer for each task to write output to. |
| 1532 | + [Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) { |
| 1533 | + return std::make_unique<CachedFileStream>( |
| 1534 | + std::make_unique<raw_svector_ostream>((*Allocation)[Task])); |
| 1535 | + }, |
| 1536 | + FileCache(), nullptr, false, false), |
| 1537 | + Scratch(std::move(Scratch)) {} |
| 1538 | + |
| 1539 | + /// Scratch space for writing output during the codegen. |
| 1540 | + std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch; |
| 1541 | +}; |
| 1542 | + |
| 1543 | +/// This Backend performs codegen on bitcode that was previously saved after |
| 1544 | +/// going through optimization. This class facilitates the second codegen round. |
| 1545 | +class OptimizedBitcodeThinBackend : public InProcessThinBackend { |
| 1546 | +public: |
| 1547 | + OptimizedBitcodeThinBackend( |
| 1548 | + const Config &Conf, ModuleSummaryIndex &CombinedIndex, |
| 1549 | + ThreadPoolStrategy ThinLTOParallelism, |
| 1550 | + const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, |
| 1551 | + AddStreamFn AddStream) |
| 1552 | + : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, |
| 1553 | + ModuleToDefinedGVSummaries, AddStream, FileCache(), |
| 1554 | + nullptr, false, false) {} |
| 1555 | + |
| 1556 | + virtual Error runThinLTOBackendThread( |
| 1557 | + AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, |
| 1558 | + ModuleSummaryIndex &CombinedIndex, |
| 1559 | + const FunctionImporter::ImportMapTy &ImportList, |
| 1560 | + const FunctionImporter::ExportSetTy &ExportList, |
| 1561 | + const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, |
| 1562 | + const GVSummaryMapTy &DefinedGlobals, |
| 1563 | + MapVector<StringRef, BitcodeModule> &ModuleMap) override { |
| 1564 | + LTOLLVMContext BackendContext(Conf); |
| 1565 | + std::unique_ptr<Module> LoadedModule = |
| 1566 | + cgdata::loadModuleForTwoRounds(BM, Task, BackendContext); |
| 1567 | + |
| 1568 | + return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex, |
| 1569 | + ImportList, DefinedGlobals, &ModuleMap, |
| 1570 | + /*CodeGenOnly=*/true); |
| 1571 | + } |
| 1572 | +}; |
1516 | 1573 | } // end anonymous namespace
|
1517 | 1574 |
|
1518 | 1575 | ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
|
@@ -1855,10 +1912,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
|
1855 | 1912 | return BackendProcess->wait();
|
1856 | 1913 | };
|
1857 | 1914 |
|
1858 |
| - std::unique_ptr<ThinBackendProc> BackendProc = |
1859 |
| - ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, |
1860 |
| - AddStream, Cache); |
1861 |
| - return RunBackends(BackendProc.get()); |
| 1915 | + if (!CodeGenDataThinLTOTwoRounds) { |
| 1916 | + std::unique_ptr<ThinBackendProc> BackendProc = |
| 1917 | + ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, |
| 1918 | + AddStream, Cache); |
| 1919 | + return RunBackends(BackendProc.get()); |
| 1920 | + } |
| 1921 | + |
| 1922 | + // Perform two rounds of code generation for ThinLTO: |
| 1923 | + // 1. First round: Run optimization and code generation with a scratch output. |
| 1924 | + // 2. Merge codegen data extracted from the scratch output. |
| 1925 | + // 3. Second round: Run code generation again using the merged data. |
| 1926 | + LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n"); |
| 1927 | + |
| 1928 | + // Initialize a temporary path to store and retrieve optimized IRs for |
| 1929 | + // two-round code generation. |
| 1930 | + cgdata::initializeTwoCodegenRounds(); |
| 1931 | + |
| 1932 | + // Create a scratch output to hold intermediate results. |
| 1933 | + auto Outputs = |
| 1934 | + std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks()); |
| 1935 | + auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>( |
| 1936 | + Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(), |
| 1937 | + ModuleToDefinedGVSummaries, std::move(Outputs)); |
| 1938 | + // First round: Run optimization and code generation with a scratch output. |
| 1939 | + // Before code generation, serialize modules. |
| 1940 | + if (Error E = RunBackends(FirstRoundLTO.get())) |
| 1941 | + return E; |
| 1942 | + |
| 1943 | + // Merge codegen data extracted from the scratch output. |
| 1944 | + if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch))) |
| 1945 | + return E; |
| 1946 | + |
| 1947 | + // Second round: Run code generation by reading IRs. |
| 1948 | + std::unique_ptr<ThinBackendProc> SecondRoundLTO = |
| 1949 | + std::make_unique<OptimizedBitcodeThinBackend>( |
| 1950 | + Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(), |
| 1951 | + ModuleToDefinedGVSummaries, AddStream); |
| 1952 | + Error E = RunBackends(SecondRoundLTO.get()); |
| 1953 | + |
| 1954 | + return E; |
1862 | 1955 | }
|
1863 | 1956 |
|
1864 | 1957 | Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
|
|
0 commit comments