Skip to content

Commit 6ab1664

Browse files
committed
Address comments from teresajohnson
1 parent 56569a7 commit 6ab1664

File tree

11 files changed

+457
-120
lines changed

11 files changed

+457
-120
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,10 +1321,11 @@ static void runThinLTOBackend(
13211321
Conf.CGFileType = getCodeGenFileType(Action);
13221322
break;
13231323
}
1324-
if (Error E = thinBackend(
1325-
Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326-
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327-
/* ModuleMap */ nullptr, Conf.CodeGenOnly, CGOpts.CmdArgs)) {
1324+
if (Error E =
1325+
thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
1326+
ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
1327+
/*ModuleMap=*/nullptr, Conf.CodeGenOnly,
1328+
/*IRAddStream=*/nullptr, CGOpts.CmdArgs)) {
13281329
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
13291330
errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
13301331
});

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#define LLVM_CGDATA_CODEGENDATA_H
1616

1717
#include "llvm/ADT/BitmaskEnum.h"
18+
#include "llvm/ADT/StableHashing.h"
1819
#include "llvm/Bitcode/BitcodeReader.h"
1920
#include "llvm/CGData/OutlinedHashTree.h"
2021
#include "llvm/CGData/OutlinedHashTreeRecord.h"
2122
#include "llvm/IR/Module.h"
2223
#include "llvm/Object/ObjectFile.h"
24+
#include "llvm/Support/Caching.h"
2325
#include "llvm/Support/ErrorHandling.h"
2426
#include "llvm/TargetParser/Triple.h"
2527
#include <mutex>
@@ -164,22 +166,73 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164166
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165167
}
166168

167-
void initializeTwoCodegenRounds();
169+
struct StreamCacheData {
170+
/// Backing buffer for serialized data stream.
171+
SmallVector<SmallString<0>> Outputs;
172+
/// Callback function to add serialized data to the stream.
173+
AddStreamFn AddStream;
174+
/// Backing buffer for cached data.
175+
SmallVector<std::unique_ptr<MemoryBuffer>> Files;
176+
/// Cache mechanism for storing data.
177+
FileCache Cache;
178+
179+
StreamCacheData(unsigned Size, const FileCache &OrigCache,
180+
const Twine &CachePrefix)
181+
: Outputs(Size), Files(Size) {
182+
AddStream = [&](size_t Task, const Twine &ModuleName) {
183+
return std::make_unique<CachedFileStream>(
184+
std::make_unique<raw_svector_ostream>(Outputs[Task]));
185+
};
186+
187+
if (OrigCache.isValid()) {
188+
auto CGCacheOrErr =
189+
localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
190+
[&](size_t Task, const Twine &ModuleName,
191+
std::unique_ptr<MemoryBuffer> MB) {
192+
Files[Task] = std::move(MB);
193+
});
194+
if (Error Err = CGCacheOrErr.takeError())
195+
report_fatal_error(std::move(Err));
196+
Cache = std::move(*CGCacheOrErr);
197+
}
198+
}
199+
StreamCacheData() = delete;
200+
201+
/// Retrieve results from either the cache or the stream.
202+
std::unique_ptr<SmallVector<StringRef>> getResult() {
203+
unsigned NumOutputs = Outputs.size();
204+
auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
205+
for (unsigned I = 0; I < NumOutputs; ++I)
206+
if (Files[I])
207+
(*Result)[I] = Files[I]->getBuffer();
208+
else
209+
(*Result)[I] = Outputs[I];
210+
return Result;
211+
}
212+
};
168213

169214
/// Save \p TheModule before the first codegen round.
170215
/// \p Task represents the partition number in the parallel code generation
171-
/// process.
172-
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task);
173-
174-
/// Load the optimized module before the second codegen round.
216+
/// process. \p AddStream is the callback used to add the serialized module to
217+
/// the stream.
218+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
219+
AddStreamFn AddStream);
220+
221+
/// Load the optimized bitcode module for the second codegen round.
222+
/// \p OrigModule is the original bitcode module.
223+
/// \p Task identifies the partition number in the parallel code generation
224+
/// process. \p Context provides the environment settings for module operations.
225+
/// \p IRFiles contains optimized bitcode module files needed for loading.
226+
/// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
175227
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
176228
unsigned Task,
177-
LLVMContext &Context);
229+
LLVMContext &Context,
230+
ArrayRef<StringRef> IRFiles);
178231

179-
/// Merge the codegen data from the input files in scratch vector in ThinLTO
180-
/// two-codegen rounds.
181-
Error mergeCodeGenData(
182-
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles);
232+
/// Merge the codegen data from the scratch objects \p ObjectFiles from the
233+
/// first codegen round.
234+
/// \return the combined hash of the merged codegen data.
235+
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);
183236

184237
void warn(Error E, StringRef Whence = "");
185238
void warn(Twine Message, std::string Whence = "", std::string Hint = "");

llvm/include/llvm/CGData/CodeGenDataReader.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ class CodeGenDataReader {
5454
/// Extract the cgdata embedded in sections from the given object file and
5555
/// merge them into the GlobalOutlineRecord. This is a static helper that
5656
/// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds.
57+
/// Optionally, \p CombinedHash can be used to compuate the combined hash of
58+
/// the merged data.
5759
static Error mergeFromObjectFile(const object::ObjectFile *Obj,
58-
OutlinedHashTreeRecord &GlobalOutlineRecord);
60+
OutlinedHashTreeRecord &GlobalOutlineRecord,
61+
stable_hash *CombinedHash = nullptr);
5962

6063
protected:
6164
/// The outlined hash tree that has been read. When it's released by

llvm/include/llvm/LTO/LTO.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,17 @@ void thinLTOInternalizeAndPromoteInIndex(
6565
isPrevailing);
6666

6767
/// Computes a unique hash for the Module considering the current list of
68-
/// export/import and other global analysis results.
68+
/// export/import and other global analysis results. Optionally, \p ExtraID
69+
/// can be used to add an extra identifier to the hash.
6970
std::string computeLTOCacheKey(
7071
const lto::Config &Conf, const ModuleSummaryIndex &Index,
7172
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
7273
const FunctionImporter::ExportSetTy &ExportList,
7374
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
7475
const GVSummaryMapTy &DefinedGlobals,
7576
const DenseSet<GlobalValue::GUID> &CfiFunctionDefs = {},
76-
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {});
77+
const DenseSet<GlobalValue::GUID> &CfiFunctionDecls = {},
78+
StringRef ExtraID = {});
7779

7880
namespace lto {
7981

llvm/include/llvm/LTO/LTOBackend.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ Error backend(const Config &C, AddStreamFn AddStream,
5151
/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
5252
/// be mapped to memory on demand and at any given time during importing, only
5353
/// one source module will be kept open at the most. If \p CodeGenOnly is true,
54-
/// the backend will skip optimization and only perform code generation.
54+
/// the backend will skip optimization and only perform code generation. If
55+
/// \p IRAddStream is not nullptr, it will be called just before code generation
56+
/// to serialize the optimized IR.
5557
Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
5658
Module &M, const ModuleSummaryIndex &CombinedIndex,
5759
const FunctionImporter::ImportMapTy &ImportList,
5860
const GVSummaryMapTy &DefinedGlobals,
5961
MapVector<StringRef, BitcodeModule> *ModuleMap,
60-
bool CodeGenOnly,
62+
bool CodeGenOnly, AddStreamFn IRAddStream = nullptr,
6163
const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>());
6264

6365
Error finalizeOptimizationRemarks(

llvm/lib/CGData/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ add_llvm_component_library(LLVMCGData
1212
intrinsics_gen
1313

1414
LINK_COMPONENTS
15+
BitReader
16+
BitWriter
1517
Core
1618
Support
1719
Object

llvm/lib/CGData/CodeGenData.cpp

Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/CGData/CodeGenDataReader.h"
1616
#include "llvm/CGData/OutlinedHashTreeRecord.h"
1717
#include "llvm/Object/ObjectFile.h"
18+
#include "llvm/Support/Caching.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include "llvm/Support/FileSystem.h"
2021
#include "llvm/Support/Path.h"
@@ -37,9 +38,6 @@ cl::opt<bool> CodeGenDataThinLTOTwoRounds(
3738
"emits codegen data, while the second round uses the emitted "
3839
"codegen data for further optimizations."));
3940

40-
// Path to where the optimized bitcodes are saved and restored for ThinLTO.
41-
static SmallString<128> CodeGenDataThinLTOTwoRoundsPath;
42-
4341
static std::string getCGDataErrString(cgdata_error Err,
4442
const std::string &ErrMsg = "") {
4543
std::string Msg;
@@ -224,59 +222,45 @@ void warn(Error E, StringRef Whence) {
224222
}
225223
}
226224

227-
static std::string getPath(StringRef Dir, unsigned Task) {
228-
llvm::SmallString<128> Path(Dir);
229-
llvm::sys::path::append(Path, llvm::Twine(Task) + ".saved_copy.bc");
230-
return std::string(Path);
231-
}
232-
233-
void initializeTwoCodegenRounds() {
234-
assert(CodeGenDataThinLTOTwoRounds);
235-
if (auto EC = llvm::sys::fs::createUniqueDirectory(
236-
"cgdata", CodeGenDataThinLTOTwoRoundsPath))
237-
report_fatal_error(Twine("Failed to create directory: ") + EC.message());
238-
}
239-
240-
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task) {
241-
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
242-
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
243-
std::error_code EC;
244-
raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::OF_None);
245-
if (EC)
246-
report_fatal_error(Twine("Failed to open ") + Path +
247-
" to save optimized bitcode: " + EC.message());
248-
WriteBitcodeToFile(TheModule, OS, /*ShouldPreserveUseListOrder=*/true);
225+
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
226+
AddStreamFn AddStream) {
227+
LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
228+
<< " in Task " << Task << "\n");
229+
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
230+
AddStream(Task, TheModule.getModuleIdentifier());
231+
if (Error Err = StreamOrErr.takeError())
232+
report_fatal_error(std::move(Err));
233+
std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
234+
235+
WriteBitcodeToFile(TheModule, *Stream->OS,
236+
/*ShouldPreserveUseListOrder=*/true);
249237
}
250238

251239
std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
252240
unsigned Task,
253-
LLVMContext &Context) {
254-
assert(sys::fs::is_directory(CodeGenDataThinLTOTwoRoundsPath));
255-
std::string Path = getPath(CodeGenDataThinLTOTwoRoundsPath, Task);
256-
auto FileOrError = MemoryBuffer::getFile(Path);
257-
if (auto EC = FileOrError.getError())
258-
report_fatal_error(Twine("Failed to open ") + Path +
259-
" to load optimized bitcode: " + EC.message());
260-
261-
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
241+
LLVMContext &Context,
242+
ArrayRef<StringRef> IRFiles) {
243+
LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
244+
<< " in Task " << Task << "\n");
245+
auto FileBuffer = MemoryBuffer::getMemBuffer(
246+
IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
262247
auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
263248
if (!RestoredModule)
264-
report_fatal_error(Twine("Failed to parse optimized bitcode loaded from ") +
265-
Path + "\n");
249+
report_fatal_error(
250+
Twine("Failed to parse optimized bitcode loaded for Task: ") +
251+
Twine(Task) + "\n");
266252

267253
// Restore the original module identifier.
268254
(*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
269255
return std::move(*RestoredModule);
270256
}
271257

272-
Error mergeCodeGenData(
273-
const std::unique_ptr<std::vector<llvm::SmallString<0>>> InputFiles) {
274-
258+
Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) {
275259
OutlinedHashTreeRecord GlobalOutlineRecord;
276-
for (auto &InputFile : *(InputFiles)) {
277-
if (InputFile.empty())
260+
stable_hash CombinedHash = 0;
261+
for (auto File : ObjFiles) {
262+
if (File.empty())
278263
continue;
279-
StringRef File = StringRef(InputFile.data(), InputFile.size());
280264
std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
281265
File, "in-memory object file", /*RequiresNullTerminator=*/false);
282266
Expected<std::unique_ptr<object::ObjectFile>> BinOrErr =
@@ -285,15 +269,15 @@ Error mergeCodeGenData(
285269
return BinOrErr.takeError();
286270

287271
std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
288-
if (auto E = CodeGenDataReader::mergeFromObjectFile(Obj.get(),
289-
GlobalOutlineRecord))
272+
if (auto E = CodeGenDataReader::mergeFromObjectFile(
273+
Obj.get(), GlobalOutlineRecord, &CombinedHash))
290274
return E;
291275
}
292276

293277
if (!GlobalOutlineRecord.empty())
294278
cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
295279

296-
return Error::success();
280+
return CombinedHash;
297281
}
298282

299283
} // end namespace cgdata

llvm/lib/CGData/CodeGenDataReader.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
3131
}
3232

3333
Error CodeGenDataReader::mergeFromObjectFile(
34-
const object::ObjectFile *Obj,
35-
OutlinedHashTreeRecord &GlobalOutlineRecord) {
34+
const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35+
stable_hash *CombinedHash) {
3636
Triple TT = Obj->makeTriple();
3737
auto CGOutLineName =
3838
getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
@@ -48,6 +48,9 @@ Error CodeGenDataReader::mergeFromObjectFile(
4848
auto *EndData = Data + ContentsOrErr->size();
4949

5050
if (*NameOrErr == CGOutLineName) {
51+
if (CombinedHash)
52+
*CombinedHash =
53+
stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
5154
// In case dealing with an executable that has concatenated cgdata,
5255
// we want to merge them into a single cgdata.
5356
// Although it's not a typical workflow, we support this scenario.

0 commit comments

Comments
 (0)