Skip to content

Commit 411fc45

Browse files
committed
[CGData][ThinLTO] Global Outlining with Two-CodeGen Rounds
1 parent 75611ca commit 411fc45

File tree

7 files changed

+302
-6
lines changed

7 files changed

+302
-6
lines changed

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,22 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164164
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165165
}
166166

167+
/// Initialize the two-codegen rounds.
168+
void initializeTwoCodegenRounds();
169+
170+
/// Save the current module before the first codegen round.
171+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
172+
173+
/// Load the current module before the second codegen round.
174+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
175+
unsigned Task,
176+
LLVMContext &Context);
177+
178+
/// Merge the codegen data from the input files in scratch vector in ThinLTO
179+
/// two-codegen rounds.
180+
Error mergeCodeGenData(
181+
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
182+
167183
void warn(Error E, StringRef Whence = "");
168184
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
169185

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Object/ObjectFile.h"
1818
#include "llvm/Support/CommandLine.h"
1919
#include "llvm/Support/FileSystem.h"
20+
#include "llvm/Support/Path.h"
2021
#include "llvm/Support/WithColor.h"
2122

2223
#define DEBUG_TYPE "cg-data"
@@ -30,6 +31,14 @@ cl::opt<bool>
3031
cl::opt<std::string>
3132
CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
3233
cl::desc("File path to where .cgdata file is read"));
34+
cl::opt<bool> CodeGenDataThinLTOTwoRounds(
35+
"codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
36+
cl::desc("Enable two-round ThinLTO code generation. The first round "
37+
"emits codegen data, while the second round uses the emitted "
38+
"codegen data for further optimizations."));
39+
40+
// Path to where the optimized bitcodes are saved and restored for ThinLTO.
41+
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
3342

3443
static std::string getCGDataErrString(cgdata_error Err,
3544
const std::string &ErrMsg = "") {
@@ -139,7 +148,7 @@ CodeGenData &CodeGenData::getInstance() {
139148
std::call_once(CodeGenData::OnceFlag, []() {
140149
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
141150

142-
if (CodeGenDataGenerate)
151+
if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds)
143152
Instance->EmitCGData = true;
144153
else if (!CodeGenDataUsePath.empty()) {
145154
// Initialize the global CGData if the input file name is given.
@@ -215,6 +224,76 @@ void warn(Error E, StringRef Whence) {
215224
}
216225
}
217226

227+
static std::string getPath(StringRef Dir, unsigned Task) {
228+
return (Dir + "/" + llvm::Twine(Task) + ".saved_copy.bc").str();
229+
}
230+
231+
void initializeTwoCodegenRounds() {
232+
assert(CodeGenDataThinLTOTwoRounds);
233+
if (auto EC = llvm::sys::fs::createUniqueDirectory(
234+
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
235+
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
236+
}
237+
238+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
239+
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
240+
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
241+
std::error_code EC;
242+
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
243+
if (EC)
244+
report_fatal_error(Twine("Failed to open ") + Path +
245+
" to save optimized bitcode: " + EC.message());
246+
WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
247+
}
248+
249+
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
250+
unsigned Task,
251+
LLVMContext &Context) {
252+
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
253+
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
254+
auto FileOrError = MemoryBuffer::getFile(Path);
255+
if (auto EC = FileOrError.getError())
256+
report_fatal_error(Twine("Failed to open ") + Path +
257+
" to load optimized bitcode: " + EC.message());
258+
259+
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
260+
auto RestoredModule = llvm::parseBitcodeFile(*FileBuffer, Context);
261+
if (!RestoredModule)
262+
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
263+
Path + "\n");
264+
265+
// Restore the original module identifier.
266+
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
267+
return std::move(*RestoredModule);
268+
}
269+
270+
Error mergeCodeGenData(
271+
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
272+
273+
OutlinedHashTreeRecord GlobalOutlineRecord;
274+
for (auto &InputFile : *(InputFiles)) {
275+
if (InputFile.empty())
276+
continue;
277+
StringRef File = StringRef(InputFile.data(), InputFile.size());
278+
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
279+
File, "in-memory object file", /*RequiresNullTerminator=*/false);
280+
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
281+
object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
282+
if (!BinOrErr)
283+
return BinOrErr.takeError();
284+
285+
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
286+
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
287+
GlobalOutlineRecord))
288+
return E;
289+
}
290+
291+
if (!GlobalOutlineRecord.empty())
292+
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
293+
294+
return Error::success();
295+
}
296+
218297
} // end namespace cgdata
219298

220299
} // end namespace llvm

llvm/lib/LTO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMLTO
2121
BinaryFormat
2222
BitReader
2323
BitWriter
24+
CGData
2425
CodeGen
2526
CodeGenTypes
2627
Core

llvm/lib/LTO/LTO.cpp

Lines changed: 98 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Analysis/TargetTransformInfo.h"
2222
#include "llvm/Bitcode/BitcodeReader.h"
2323
#include "llvm/Bitcode/BitcodeWriter.h"
24+
#include "llvm/CGData/CodeGenData.h"
2425
#include "llvm/CodeGen/Analysis.h"
2526
#include "llvm/Config/llvm-config.h"
2627
#include "llvm/IR/AutoUpgrade.h"
@@ -70,6 +71,8 @@ static cl::opt<bool>
7071
DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
7172
cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
7273

74+
extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
75+
7376
namespace llvm {
7477
/// Enable global value internalization in LTO.
7578
cl::opt<bool> EnableLTOInternalization(
@@ -1424,7 +1427,7 @@ class InProcessThinBackend : public ThinBackendProc {
14241427
GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
14251428
}
14261429

1427-
Error runThinLTOBackendThread(
1430+
virtual Error runThinLTOBackendThread(
14281431
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
14291432
ModuleSummaryIndex &CombinedIndex,
14301433
const FunctionImporter::ImportMapTy &ImportList,
@@ -1513,6 +1516,60 @@ class InProcessThinBackend : public ThinBackendProc {
15131516
return Error::success();
15141517
}
15151518
};
1519+
1520+
/// This Backend will run ThinBackend process but throw away all the output from
1521+
/// the codegen. This class facilitates the first codegen round.
1522+
class NoOutputThinBackend : public InProcessThinBackend {
1523+
public:
1524+
NoOutputThinBackend(
1525+
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1526+
ThreadPoolStrategy ThinLTOParallelism,
1527+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1528+
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch)
1529+
: InProcessThinBackend(
1530+
Conf, CombinedIndex, ThinLTOParallelism, ModuleToDefinedGVSummaries,
1531+
// Allocate a scratch buffer for each task to write output to.
1532+
[Allocation = &*Scratch](unsigned Task, const Twine &ModuleName) {
1533+
return std::make_unique<CachedFileStream>(
1534+
std::make_unique<raw_svector_ostream>((*Allocation)[Task]));
1535+
},
1536+
FileCache(), nullptr, false, false),
1537+
Scratch(std::move(Scratch)) {}
1538+
1539+
/// Scratch space for writing output during the codegen.
1540+
std::unique_ptr<std::vector<llvm::SmallString<0>>> Scratch;
1541+
};
1542+
1543+
/// This Backend performs codegen on bitcode that was previously saved after
1544+
/// going through optimization. This class facilitates the second codegen round.
1545+
class OptimizedBitcodeThinBackend : public InProcessThinBackend {
1546+
public:
1547+
OptimizedBitcodeThinBackend(
1548+
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1549+
ThreadPoolStrategy ThinLTOParallelism,
1550+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1551+
AddStreamFn AddStream)
1552+
: InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1553+
ModuleToDefinedGVSummaries, AddStream, FileCache(),
1554+
nullptr, false, false) {}
1555+
1556+
virtual Error runThinLTOBackendThread(
1557+
AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1558+
ModuleSummaryIndex &CombinedIndex,
1559+
const FunctionImporter::ImportMapTy &ImportList,
1560+
const FunctionImporter::ExportSetTy &ExportList,
1561+
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1562+
const GVSummaryMapTy &DefinedGlobals,
1563+
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1564+
LTOLLVMContext BackendContext(Conf);
1565+
std::unique_ptr<Module> LoadedModule =
1566+
cgdata::loadModuleForTwoRounds(BM, Task, BackendContext);
1567+
1568+
return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
1569+
ImportList, DefinedGlobals, &ModuleMap,
1570+
/*CodeGenOnly=*/true);
1571+
}
1572+
};
15161573
} // end anonymous namespace
15171574

15181575
ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
@@ -1855,10 +1912,46 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
18551912
return BackendProcess->wait();
18561913
};
18571914

1858-
std::unique_ptr<ThinBackendProc> BackendProc =
1859-
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1860-
AddStream, Cache);
1861-
return RunBackends(BackendProc.get());
1915+
if (!CodeGenDataThinLTOTwoRounds) {
1916+
std::unique_ptr<ThinBackendProc> BackendProc =
1917+
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
1918+
AddStream, Cache);
1919+
return RunBackends(BackendProc.get());
1920+
}
1921+
1922+
// Perform two rounds of code generation for ThinLTO:
1923+
// 1. First round: Run optimization and code generation with a scratch output.
1924+
// 2. Merge codegen data extracted from the scratch output.
1925+
// 3. Second round: Run code generation again using the merged data.
1926+
LLVM_DEBUG(dbgs() << "Running ThinLTO two-codegen rounds\n");
1927+
1928+
// Initialize a temporary path to store and retrieve optimized IRs for
1929+
// two-round code generation.
1930+
cgdata::initializeTwoCodegenRounds();
1931+
1932+
// Create a scratch output to hold intermediate results.
1933+
auto Outputs =
1934+
std::make_unique<std::vector<llvm::SmallString<0>>>(getMaxTasks());
1935+
auto FirstRoundLTO = std::make_unique<NoOutputThinBackend>(
1936+
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
1937+
ModuleToDefinedGVSummaries, std::move(Outputs));
1938+
// First round: Run optimization and code generation with a scratch output.
1939+
// Before code generation, serialize modules.
1940+
if (Error E = RunBackends(FirstRoundLTO.get()))
1941+
return E;
1942+
1943+
// Merge codegen data extracted from the scratch output.
1944+
if (Error E = cgdata::mergeCodeGenData(std::move(FirstRoundLTO->Scratch)))
1945+
return E;
1946+
1947+
// Second round: Run code generation by reading IRs.
1948+
std::unique_ptr<ThinBackendProc> SecondRoundLTO =
1949+
std::make_unique<OptimizedBitcodeThinBackend>(
1950+
Conf, ThinLTO.CombinedIndex, llvm::heavyweight_hardware_concurrency(),
1951+
ModuleToDefinedGVSummaries, AddStream);
1952+
Error E = RunBackends(SecondRoundLTO.get());
1953+
1954+
return E;
18621955
}
18631956

18641957
Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(

llvm/lib/LTO/LTOBackend.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/TargetLibraryInfo.h"
2121
#include "llvm/Bitcode/BitcodeReader.h"
2222
#include "llvm/Bitcode/BitcodeWriter.h"
23+
#include "llvm/CGData/CodeGenData.h"
2324
#include "llvm/IR/LLVMRemarkStreamer.h"
2425
#include "llvm/IR/LegacyPassManager.h"
2526
#include "llvm/IR/PassManager.h"
@@ -74,6 +75,8 @@ static cl::opt<bool> ThinLTOAssumeMerged(
7475
cl::desc("Assume the input has already undergone ThinLTO function "
7576
"importing and the other pre-optimization pipeline changes."));
7677

78+
extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
79+
7780
namespace llvm {
7881
extern cl::opt<bool> NoPGOWarnMismatch;
7982
}
@@ -599,11 +602,19 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
599602
auto OptimizeAndCodegen =
600603
[&](Module &Mod, TargetMachine *TM,
601604
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
605+
// Perform optimization and code generation for ThinLTO.
602606
if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
603607
/*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
604608
CmdArgs))
605609
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
606610

611+
// Save the current module before the first codegen round.
612+
// Note that the second codegen round runs only `codegen()` without
613+
// running `opt()`. We're not reaching here as it's bailed out earlier
614+
// with CodeGenOnly which has been set in `OptimizedBitcodeThinBackend`.
615+
if (CodeGenDataThinLTOTwoRounds)
616+
cgdata::saveModuleForTwoRounds(Mod, Task);
617+
607618
codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
608619
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
609620
};

0 commit comments

Comments
 (0)